good enough
This commit is contained in:
parent
9b54a3f3e8
commit
e61a272271
5
Makefile
5
Makefile
@ -45,6 +45,9 @@ clean:
|
||||
|
||||
test: chad_test
|
||||
|
||||
.PHONY: test clean install
|
||||
run:
|
||||
hl < source/main.c
|
||||
|
||||
.PHONY: test clean install run
|
||||
|
||||
.DEFAULT_GOAL:=${TARGET}
|
||||
|
145
source/jeger.c
145
source/jeger.c
@ -121,7 +121,7 @@ typedef struct {
|
||||
int flags;
|
||||
int state;
|
||||
int width;
|
||||
int width2;
|
||||
int match_width;
|
||||
char * whitelist;
|
||||
char * blacklist;
|
||||
} compiler_state;
|
||||
@ -131,7 +131,11 @@ typedef struct {
|
||||
// ----------------------------------
|
||||
// ### Regex creation/destruction ###
|
||||
// ----------------------------------
|
||||
static const int HALT_AND_CATCH_FIRE = INT_MIN;
|
||||
enum {
|
||||
ASSERTION_FAILURE = 0,
|
||||
ASSERTION_SUCCESS = 1,
|
||||
HALT_AND_CATCH_FIRE = INT_MIN,
|
||||
};
|
||||
|
||||
#define ASSERT_HALT(a) ((a == HALT_AND_CATCH_FIRE) ? HALT_AND_CATCH_FIRE : (cs->state + a))
|
||||
|
||||
@ -148,7 +152,7 @@ void HOOK_ALL(const int from,
|
||||
.input = *s,
|
||||
.to = ASSERT_HALT(to),
|
||||
.pattern_width = cs->width,
|
||||
.match_width = cs->width2,
|
||||
.match_width = cs->match_width,
|
||||
};
|
||||
vector_push(®ex->delta_table,
|
||||
&delta);
|
||||
@ -490,6 +494,11 @@ regex_t * regex_compile(const char * const pattern) {
|
||||
char whitelist[64];
|
||||
char blacklist[64];
|
||||
|
||||
static const int REGEX_PREVERSABLE_FLAGS = IS_AT_THE_BEGINNING
|
||||
| FORCE_START_OF_STRING
|
||||
| DO_FORBID_START_OF_STRING
|
||||
;
|
||||
|
||||
compiler_state cs = {
|
||||
.flags = IS_AT_THE_BEGINNING,
|
||||
.state = JEGER_INIT_STATE,
|
||||
@ -500,11 +509,11 @@ regex_t * regex_compile(const char * const pattern) {
|
||||
for (const char * s = pattern; *s != '\00';) {
|
||||
assert(!is_quantifier(*s) && "Pattern starts with quantifier.");
|
||||
// Reset the compiler
|
||||
whitelist[0] = '\0';
|
||||
blacklist[0] = '\0';
|
||||
cs.flags &= (IS_AT_THE_BEGINNING | FORCE_START_OF_STRING);
|
||||
cs.width = 1;
|
||||
cs.width2 = 1;
|
||||
whitelist[0] = '\0';
|
||||
blacklist[0] = '\0';
|
||||
cs.flags &= REGEX_PREVERSABLE_FLAGS;
|
||||
cs.width = 1;
|
||||
cs.match_width = 1;
|
||||
|
||||
// Translate char
|
||||
switch (*s) {
|
||||
@ -535,12 +544,6 @@ regex_t * regex_compile(const char * const pattern) {
|
||||
} break;
|
||||
}
|
||||
|
||||
/* Ew */
|
||||
if (*s == '\\'
|
||||
&& is_hologram_escape(*(s+1))) {
|
||||
++s;
|
||||
}
|
||||
|
||||
// Compile char
|
||||
switch (*s) {
|
||||
// holograms
|
||||
@ -555,49 +558,58 @@ regex_t * regex_compile(const char * const pattern) {
|
||||
}
|
||||
s += 1;
|
||||
} break;
|
||||
case '<': {
|
||||
// XXX: make this legible
|
||||
if (cs.flags & IS_AT_THE_BEGINNING
|
||||
&& !(cs.flags & DO_CATCH)
|
||||
&& !(cs.flags & IS_NEGATIVE)
|
||||
&& whitelist[0] == '\0') {
|
||||
// ---
|
||||
cs.flags |= INCREMENT_STATE;
|
||||
cs.flags |= DO_FORBID_START_OF_STRING;
|
||||
strcat(whitelist, JEGER_CHAR_symbol_chars);
|
||||
// ---
|
||||
ABSOLUTE_OFFSHOOT( JEGER_SOS_STATE, JEGER_INIT_STATE+1, 0, 0, regex);
|
||||
ABSOLUTE_OFFSHOOT(JEGER_INIT_STATE, JEGER_INIT_STATE+2, 1, 0, regex);
|
||||
HOOK_ALL(0, whitelist, HALT_AND_CATCH_FIRE, &cs, regex);
|
||||
// ---
|
||||
++cs.state;
|
||||
cs.width = 0;
|
||||
cs.width2 = 0;
|
||||
HOOK_ALL(0, whitelist, +1, &cs, regex);
|
||||
cs.width = 1;
|
||||
OFFSHOOT(0, +1, 1, 0, &cs, regex);
|
||||
// ---
|
||||
case '\\': {
|
||||
if(is_hologram_escape(*(s+1))) {
|
||||
++s;
|
||||
} else {
|
||||
HOOK_ALL(0, whitelist, +1, &cs, regex);
|
||||
if ((cs.flags & DO_CATCH)
|
||||
|| (cs.flags & IS_NEGATIVE)) {
|
||||
OFFSHOOT(+1, +2, 1, 1, &cs, regex);
|
||||
} else {
|
||||
cs.flags |= INCREMENT_STATE;
|
||||
}
|
||||
OFFSHOOT(0, +1, 1, 0, &cs, regex);
|
||||
goto DEFAULT;
|
||||
}
|
||||
switch(*s){
|
||||
case '<': {
|
||||
// XXX: make this legible
|
||||
if (cs.flags & IS_AT_THE_BEGINNING
|
||||
&& !(cs.flags & DO_CATCH)
|
||||
&& !(cs.flags & IS_NEGATIVE)
|
||||
&& whitelist[0] == '\0') {
|
||||
// ---
|
||||
cs.flags |= INCREMENT_STATE;
|
||||
cs.flags |= DO_FORBID_START_OF_STRING;
|
||||
strcat(whitelist, JEGER_CHAR_symbol_chars);
|
||||
// ---
|
||||
ABSOLUTE_OFFSHOOT( JEGER_SOS_STATE, JEGER_INIT_STATE+1, 0, 0, regex);
|
||||
ABSOLUTE_OFFSHOOT(JEGER_INIT_STATE, JEGER_INIT_STATE+2, 1, 0, regex);
|
||||
HOOK_ALL(0, whitelist, HALT_AND_CATCH_FIRE, &cs, regex);
|
||||
// ---
|
||||
++cs.state;
|
||||
cs.width = 0;
|
||||
cs.match_width = 0;
|
||||
HOOK_ALL(0, whitelist, +1, &cs, regex);
|
||||
cs.width = 1;
|
||||
OFFSHOOT(0, +1, 1, 0, &cs, regex);
|
||||
// ---
|
||||
} else {
|
||||
HOOK_ALL(0, whitelist, +1, &cs, regex);
|
||||
if ((cs.flags & DO_CATCH)
|
||||
|| (cs.flags & IS_NEGATIVE)) {
|
||||
OFFSHOOT(+1, +2, 1, 1, &cs, regex);
|
||||
} else {
|
||||
cs.flags |= INCREMENT_STATE;
|
||||
}
|
||||
OFFSHOOT(0, +1, 1, 0, &cs, regex);
|
||||
}
|
||||
cs.flags |= IS_NEGATIVE;
|
||||
strcat(blacklist, JEGER_CHAR_symbol_chars);
|
||||
s += 1;
|
||||
} break;
|
||||
case '>': {
|
||||
HOOK_ALL(0, whitelist, +1, &cs, regex);
|
||||
cs.flags |= IS_NEGATIVE | INCREMENT_STATE;
|
||||
strcat(blacklist, JEGER_CHAR_symbol_chars);
|
||||
OFFSHOOT(+1, +2, 0, 0, &cs, regex);
|
||||
++cs.state;
|
||||
s += 1;
|
||||
} break;
|
||||
}
|
||||
cs.flags |= IS_NEGATIVE;
|
||||
strcat(blacklist, JEGER_CHAR_symbol_chars);
|
||||
s += 1;
|
||||
} break;
|
||||
case '>': {
|
||||
HOOK_ALL(0, whitelist, +1, &cs, regex);
|
||||
cs.flags |= IS_NEGATIVE | INCREMENT_STATE;
|
||||
strcat(blacklist, JEGER_CHAR_symbol_chars);
|
||||
OFFSHOOT(+1, +2, 0, 0, &cs, regex);
|
||||
++cs.state;
|
||||
s += 1;
|
||||
} break;
|
||||
// quantifiers
|
||||
case '=':
|
||||
@ -631,6 +643,7 @@ regex_t * regex_compile(const char * const pattern) {
|
||||
}
|
||||
s += 1;
|
||||
} break;
|
||||
DEFAULT:
|
||||
default: { // Literal
|
||||
cs.flags |= INCREMENT_STATE;
|
||||
HOOK_ALL(0, whitelist, +1, &cs, regex);
|
||||
@ -653,6 +666,7 @@ regex_t * regex_compile(const char * const pattern) {
|
||||
++cs.state;
|
||||
}
|
||||
|
||||
// Purge SOS flag
|
||||
cs.flags &= (~IS_AT_THE_BEGINNING);
|
||||
}
|
||||
|
||||
@ -697,12 +711,12 @@ const offshoot_t * catch_table_lookup(const regex_t * const regex,
|
||||
}
|
||||
|
||||
static
|
||||
bool regex_assert(const regex_t * const regex,
|
||||
int regex_assert(const regex_t * const regex,
|
||||
const char * const string,
|
||||
int state,
|
||||
match_t * const match) {
|
||||
if (state == HALT_AND_CATCH_FIRE) {
|
||||
return false;
|
||||
return HALT_AND_CATCH_FIRE;
|
||||
}
|
||||
|
||||
bool last_stand = false;
|
||||
@ -743,11 +757,16 @@ bool regex_assert(const regex_t * const regex,
|
||||
do_reset = true;
|
||||
}
|
||||
const int r = regex_assert(regex, s + delta->pattern_width, delta->to, match);
|
||||
if(r){
|
||||
if(r == ASSERTION_SUCCESS){
|
||||
match->width += delta->match_width;
|
||||
return r;
|
||||
} else if (do_reset) {
|
||||
match->_pos_ptr = NULL;
|
||||
} else {
|
||||
if (r == ASSERTION_FAILURE) {
|
||||
was_found = false;
|
||||
}
|
||||
if (do_reset) {
|
||||
match->_pos_ptr = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -765,7 +784,7 @@ bool regex_assert(const regex_t * const regex,
|
||||
}
|
||||
}
|
||||
|
||||
return (state == regex->accepting_state);
|
||||
return ((state == regex->accepting_state) ? ASSERTION_SUCCESS : ASSERTION_FAILURE);
|
||||
}
|
||||
|
||||
match_t * regex_match(const regex_t * const regex,
|
||||
@ -796,7 +815,8 @@ match_t * regex_match(const regex_t * const regex,
|
||||
.width = 0,
|
||||
};
|
||||
|
||||
if (regex_assert(regex, s, initial_state, match)) {
|
||||
if (regex_assert(regex, s, initial_state, match) == 1) {
|
||||
//printf("true: %s\n", s);
|
||||
if (match->_pos_ptr) {
|
||||
match->position = (match->_pos_ptr - string);
|
||||
} else {
|
||||
@ -808,6 +828,7 @@ match_t * regex_match(const regex_t * const regex,
|
||||
s += ((match->width > 0) ? match->width : 1);
|
||||
match = (match_t *)malloc(sizeof(match_t));
|
||||
} else {
|
||||
//printf("false: %s\n", s);
|
||||
++s;
|
||||
}
|
||||
} while (*s != '\0');
|
||||
|
Loading…
Reference in New Issue
Block a user