good enough

This commit is contained in:
anon 2023-09-26 16:42:25 +02:00
parent 9b54a3f3e8
commit e61a272271
2 changed files with 87 additions and 63 deletions

View File

@ -45,6 +45,9 @@ clean:
test: chad_test
.PHONY: test clean install
run:
hl < source/main.c
.PHONY: test clean install run
.DEFAULT_GOAL:=${TARGET}

View File

@ -121,7 +121,7 @@ typedef struct {
int flags;
int state;
int width;
int width2;
int match_width;
char * whitelist;
char * blacklist;
} compiler_state;
@ -131,7 +131,11 @@ typedef struct {
// ----------------------------------
// ### Regex creation/destruction ###
// ----------------------------------
static const int HALT_AND_CATCH_FIRE = INT_MIN;
enum {
ASSERTION_FAILURE = 0,
ASSERTION_SUCCESS = 1,
HALT_AND_CATCH_FIRE = INT_MIN,
};
#define ASSERT_HALT(a) ((a == HALT_AND_CATCH_FIRE) ? HALT_AND_CATCH_FIRE : (cs->state + a))
@ -148,7 +152,7 @@ void HOOK_ALL(const int from,
.input = *s,
.to = ASSERT_HALT(to),
.pattern_width = cs->width,
.match_width = cs->width2,
.match_width = cs->match_width,
};
vector_push(&regex->delta_table,
&delta);
@ -490,6 +494,11 @@ regex_t * regex_compile(const char * const pattern) {
char whitelist[64];
char blacklist[64];
static const int REGEX_PREVERSABLE_FLAGS = IS_AT_THE_BEGINNING
| FORCE_START_OF_STRING
| DO_FORBID_START_OF_STRING
;
compiler_state cs = {
.flags = IS_AT_THE_BEGINNING,
.state = JEGER_INIT_STATE,
@ -500,11 +509,11 @@ regex_t * regex_compile(const char * const pattern) {
for (const char * s = pattern; *s != '\00';) {
assert(!is_quantifier(*s) && "Pattern starts with quantifier.");
// Reset the compiler
whitelist[0] = '\0';
blacklist[0] = '\0';
cs.flags &= (IS_AT_THE_BEGINNING | FORCE_START_OF_STRING);
cs.width = 1;
cs.width2 = 1;
whitelist[0] = '\0';
blacklist[0] = '\0';
cs.flags &= REGEX_PREVERSABLE_FLAGS;
cs.width = 1;
cs.match_width = 1;
// Translate char
switch (*s) {
@ -535,12 +544,6 @@ regex_t * regex_compile(const char * const pattern) {
} break;
}
/* Ew */
if (*s == '\\'
&& is_hologram_escape(*(s+1))) {
++s;
}
// Compile char
switch (*s) {
// holograms
@ -555,49 +558,58 @@ regex_t * regex_compile(const char * const pattern) {
}
s += 1;
} break;
case '<': {
// XXX: make this legible
if (cs.flags & IS_AT_THE_BEGINNING
&& !(cs.flags & DO_CATCH)
&& !(cs.flags & IS_NEGATIVE)
&& whitelist[0] == '\0') {
// ---
cs.flags |= INCREMENT_STATE;
cs.flags |= DO_FORBID_START_OF_STRING;
strcat(whitelist, JEGER_CHAR_symbol_chars);
// ---
ABSOLUTE_OFFSHOOT( JEGER_SOS_STATE, JEGER_INIT_STATE+1, 0, 0, regex);
ABSOLUTE_OFFSHOOT(JEGER_INIT_STATE, JEGER_INIT_STATE+2, 1, 0, regex);
HOOK_ALL(0, whitelist, HALT_AND_CATCH_FIRE, &cs, regex);
// ---
++cs.state;
cs.width = 0;
cs.width2 = 0;
HOOK_ALL(0, whitelist, +1, &cs, regex);
cs.width = 1;
OFFSHOOT(0, +1, 1, 0, &cs, regex);
// ---
case '\\': {
if(is_hologram_escape(*(s+1))) {
++s;
} else {
HOOK_ALL(0, whitelist, +1, &cs, regex);
if ((cs.flags & DO_CATCH)
|| (cs.flags & IS_NEGATIVE)) {
OFFSHOOT(+1, +2, 1, 1, &cs, regex);
} else {
cs.flags |= INCREMENT_STATE;
}
OFFSHOOT(0, +1, 1, 0, &cs, regex);
goto DEFAULT;
}
switch(*s){
case '<': {
// XXX: make this legible
if (cs.flags & IS_AT_THE_BEGINNING
&& !(cs.flags & DO_CATCH)
&& !(cs.flags & IS_NEGATIVE)
&& whitelist[0] == '\0') {
// ---
cs.flags |= INCREMENT_STATE;
cs.flags |= DO_FORBID_START_OF_STRING;
strcat(whitelist, JEGER_CHAR_symbol_chars);
// ---
ABSOLUTE_OFFSHOOT( JEGER_SOS_STATE, JEGER_INIT_STATE+1, 0, 0, regex);
ABSOLUTE_OFFSHOOT(JEGER_INIT_STATE, JEGER_INIT_STATE+2, 1, 0, regex);
HOOK_ALL(0, whitelist, HALT_AND_CATCH_FIRE, &cs, regex);
// ---
++cs.state;
cs.width = 0;
cs.match_width = 0;
HOOK_ALL(0, whitelist, +1, &cs, regex);
cs.width = 1;
OFFSHOOT(0, +1, 1, 0, &cs, regex);
// ---
} else {
HOOK_ALL(0, whitelist, +1, &cs, regex);
if ((cs.flags & DO_CATCH)
|| (cs.flags & IS_NEGATIVE)) {
OFFSHOOT(+1, +2, 1, 1, &cs, regex);
} else {
cs.flags |= INCREMENT_STATE;
}
OFFSHOOT(0, +1, 1, 0, &cs, regex);
}
cs.flags |= IS_NEGATIVE;
strcat(blacklist, JEGER_CHAR_symbol_chars);
s += 1;
} break;
case '>': {
HOOK_ALL(0, whitelist, +1, &cs, regex);
cs.flags |= IS_NEGATIVE | INCREMENT_STATE;
strcat(blacklist, JEGER_CHAR_symbol_chars);
OFFSHOOT(+1, +2, 0, 0, &cs, regex);
++cs.state;
s += 1;
} break;
}
cs.flags |= IS_NEGATIVE;
strcat(blacklist, JEGER_CHAR_symbol_chars);
s += 1;
} break;
case '>': {
HOOK_ALL(0, whitelist, +1, &cs, regex);
cs.flags |= IS_NEGATIVE | INCREMENT_STATE;
strcat(blacklist, JEGER_CHAR_symbol_chars);
OFFSHOOT(+1, +2, 0, 0, &cs, regex);
++cs.state;
s += 1;
} break;
// quantifiers
case '=':
@ -631,6 +643,7 @@ regex_t * regex_compile(const char * const pattern) {
}
s += 1;
} break;
DEFAULT:
default: { // Literal
cs.flags |= INCREMENT_STATE;
HOOK_ALL(0, whitelist, +1, &cs, regex);
@ -653,6 +666,7 @@ regex_t * regex_compile(const char * const pattern) {
++cs.state;
}
// Purge SOS flag
cs.flags &= (~IS_AT_THE_BEGINNING);
}
@ -697,12 +711,12 @@ const offshoot_t * catch_table_lookup(const regex_t * const regex,
}
static
bool regex_assert(const regex_t * const regex,
int regex_assert(const regex_t * const regex,
const char * const string,
int state,
match_t * const match) {
if (state == HALT_AND_CATCH_FIRE) {
return false;
return HALT_AND_CATCH_FIRE;
}
bool last_stand = false;
@ -743,11 +757,16 @@ bool regex_assert(const regex_t * const regex,
do_reset = true;
}
const int r = regex_assert(regex, s + delta->pattern_width, delta->to, match);
if(r){
if(r == ASSERTION_SUCCESS){
match->width += delta->match_width;
return r;
} else if (do_reset) {
match->_pos_ptr = NULL;
} else {
if (r == ASSERTION_FAILURE) {
was_found = false;
}
if (do_reset) {
match->_pos_ptr = NULL;
}
}
}
}
@ -765,7 +784,7 @@ bool regex_assert(const regex_t * const regex,
}
}
return (state == regex->accepting_state);
return ((state == regex->accepting_state) ? ASSERTION_SUCCESS : ASSERTION_FAILURE);
}
match_t * regex_match(const regex_t * const regex,
@ -796,7 +815,8 @@ match_t * regex_match(const regex_t * const regex,
.width = 0,
};
if (regex_assert(regex, s, initial_state, match)) {
if (regex_assert(regex, s, initial_state, match) == 1) {
//printf("true: %s\n", s);
if (match->_pos_ptr) {
match->position = (match->_pos_ptr - string);
} else {
@ -808,6 +828,7 @@ match_t * regex_match(const regex_t * const regex,
s += ((match->width > 0) ? match->width : 1);
match = (match_t *)malloc(sizeof(match_t));
} else {
//printf("false: %s\n", s);
++s;
}
} while (*s != '\0');