good enough

This commit is contained in:
anon 2023-09-26 16:42:25 +02:00
parent 9b54a3f3e8
commit e61a272271
2 changed files with 87 additions and 63 deletions

View File

@ -45,6 +45,9 @@ clean:
test: chad_test test: chad_test
.PHONY: test clean install run:
hl < source/main.c
.PHONY: test clean install run
.DEFAULT_GOAL:=${TARGET} .DEFAULT_GOAL:=${TARGET}

View File

@ -121,7 +121,7 @@ typedef struct {
int flags; int flags;
int state; int state;
int width; int width;
int width2; int match_width;
char * whitelist; char * whitelist;
char * blacklist; char * blacklist;
} compiler_state; } compiler_state;
@ -131,7 +131,11 @@ typedef struct {
// ---------------------------------- // ----------------------------------
// ### Regex creation/destruction ### // ### Regex creation/destruction ###
// ---------------------------------- // ----------------------------------
static const int HALT_AND_CATCH_FIRE = INT_MIN; enum {
ASSERTION_FAILURE = 0,
ASSERTION_SUCCESS = 1,
HALT_AND_CATCH_FIRE = INT_MIN,
};
#define ASSERT_HALT(a) ((a == HALT_AND_CATCH_FIRE) ? HALT_AND_CATCH_FIRE : (cs->state + a)) #define ASSERT_HALT(a) ((a == HALT_AND_CATCH_FIRE) ? HALT_AND_CATCH_FIRE : (cs->state + a))
@ -148,7 +152,7 @@ void HOOK_ALL(const int from,
.input = *s, .input = *s,
.to = ASSERT_HALT(to), .to = ASSERT_HALT(to),
.pattern_width = cs->width, .pattern_width = cs->width,
.match_width = cs->width2, .match_width = cs->match_width,
}; };
vector_push(&regex->delta_table, vector_push(&regex->delta_table,
&delta); &delta);
@ -490,6 +494,11 @@ regex_t * regex_compile(const char * const pattern) {
char whitelist[64]; char whitelist[64];
char blacklist[64]; char blacklist[64];
static const int REGEX_PREVERSABLE_FLAGS = IS_AT_THE_BEGINNING
| FORCE_START_OF_STRING
| DO_FORBID_START_OF_STRING
;
compiler_state cs = { compiler_state cs = {
.flags = IS_AT_THE_BEGINNING, .flags = IS_AT_THE_BEGINNING,
.state = JEGER_INIT_STATE, .state = JEGER_INIT_STATE,
@ -502,9 +511,9 @@ regex_t * regex_compile(const char * const pattern) {
// Reset the compiler // Reset the compiler
whitelist[0] = '\0'; whitelist[0] = '\0';
blacklist[0] = '\0'; blacklist[0] = '\0';
cs.flags &= (IS_AT_THE_BEGINNING | FORCE_START_OF_STRING); cs.flags &= REGEX_PREVERSABLE_FLAGS;
cs.width = 1; cs.width = 1;
cs.width2 = 1; cs.match_width = 1;
// Translate char // Translate char
switch (*s) { switch (*s) {
@ -535,12 +544,6 @@ regex_t * regex_compile(const char * const pattern) {
} break; } break;
} }
/* Ew */
if (*s == '\\'
&& is_hologram_escape(*(s+1))) {
++s;
}
// Compile char // Compile char
switch (*s) { switch (*s) {
// holograms // holograms
@ -555,6 +558,13 @@ regex_t * regex_compile(const char * const pattern) {
} }
s += 1; s += 1;
} break; } break;
case '\\': {
if(is_hologram_escape(*(s+1))) {
++s;
} else {
goto DEFAULT;
}
switch(*s){
case '<': { case '<': {
// XXX: make this legible // XXX: make this legible
if (cs.flags & IS_AT_THE_BEGINNING if (cs.flags & IS_AT_THE_BEGINNING
@ -572,7 +582,7 @@ regex_t * regex_compile(const char * const pattern) {
// --- // ---
++cs.state; ++cs.state;
cs.width = 0; cs.width = 0;
cs.width2 = 0; cs.match_width = 0;
HOOK_ALL(0, whitelist, +1, &cs, regex); HOOK_ALL(0, whitelist, +1, &cs, regex);
cs.width = 1; cs.width = 1;
OFFSHOOT(0, +1, 1, 0, &cs, regex); OFFSHOOT(0, +1, 1, 0, &cs, regex);
@ -599,6 +609,8 @@ regex_t * regex_compile(const char * const pattern) {
++cs.state; ++cs.state;
s += 1; s += 1;
} break; } break;
}
} break;
// quantifiers // quantifiers
case '=': case '=':
case '?': { case '?': {
@ -631,6 +643,7 @@ regex_t * regex_compile(const char * const pattern) {
} }
s += 1; s += 1;
} break; } break;
DEFAULT:
default: { // Literal default: { // Literal
cs.flags |= INCREMENT_STATE; cs.flags |= INCREMENT_STATE;
HOOK_ALL(0, whitelist, +1, &cs, regex); HOOK_ALL(0, whitelist, +1, &cs, regex);
@ -653,6 +666,7 @@ regex_t * regex_compile(const char * const pattern) {
++cs.state; ++cs.state;
} }
// Purge SOS flag
cs.flags &= (~IS_AT_THE_BEGINNING); cs.flags &= (~IS_AT_THE_BEGINNING);
} }
@ -697,12 +711,12 @@ const offshoot_t * catch_table_lookup(const regex_t * const regex,
} }
static static
bool regex_assert(const regex_t * const regex, int regex_assert(const regex_t * const regex,
const char * const string, const char * const string,
int state, int state,
match_t * const match) { match_t * const match) {
if (state == HALT_AND_CATCH_FIRE) { if (state == HALT_AND_CATCH_FIRE) {
return false; return HALT_AND_CATCH_FIRE;
} }
bool last_stand = false; bool last_stand = false;
@ -743,15 +757,20 @@ bool regex_assert(const regex_t * const regex,
do_reset = true; do_reset = true;
} }
const int r = regex_assert(regex, s + delta->pattern_width, delta->to, match); const int r = regex_assert(regex, s + delta->pattern_width, delta->to, match);
if(r){ if(r == ASSERTION_SUCCESS){
match->width += delta->match_width; match->width += delta->match_width;
return r; return r;
} else if (do_reset) { } else {
if (r == ASSERTION_FAILURE) {
was_found = false;
}
if (do_reset) {
match->_pos_ptr = NULL; match->_pos_ptr = NULL;
} }
} }
} }
} }
}
PERFORM_CATCH_LOOKUP: { PERFORM_CATCH_LOOKUP: {
if (!was_found) { if (!was_found) {
@ -765,7 +784,7 @@ bool regex_assert(const regex_t * const regex,
} }
} }
return (state == regex->accepting_state); return ((state == regex->accepting_state) ? ASSERTION_SUCCESS : ASSERTION_FAILURE);
} }
match_t * regex_match(const regex_t * const regex, match_t * regex_match(const regex_t * const regex,
@ -796,7 +815,8 @@ match_t * regex_match(const regex_t * const regex,
.width = 0, .width = 0,
}; };
if (regex_assert(regex, s, initial_state, match)) { if (regex_assert(regex, s, initial_state, match) == 1) {
//printf("true: %s\n", s);
if (match->_pos_ptr) { if (match->_pos_ptr) {
match->position = (match->_pos_ptr - string); match->position = (match->_pos_ptr - string);
} else { } else {
@ -808,6 +828,7 @@ match_t * regex_match(const regex_t * const regex,
s += ((match->width > 0) ? match->width : 1); s += ((match->width > 0) ? match->width : 1);
match = (match_t *)malloc(sizeof(match_t)); match = (match_t *)malloc(sizeof(match_t));
} else { } else {
//printf("false: %s\n", s);
++s; ++s;
} }
} while (*s != '\0'); } while (*s != '\0');