diff --git a/Makefile b/Makefile index 60d8000..2641e1c 100644 --- a/Makefile +++ b/Makefile @@ -45,6 +45,9 @@ clean: test: chad_test -.PHONY: test clean install +run: + hl < source/main.c + +.PHONY: test clean install run .DEFAULT_GOAL:=${TARGET} diff --git a/source/jeger.c b/source/jeger.c index 343f639..2a844af 100644 --- a/source/jeger.c +++ b/source/jeger.c @@ -121,7 +121,7 @@ typedef struct { int flags; int state; int width; - int width2; + int match_width; char * whitelist; char * blacklist; } compiler_state; @@ -131,7 +131,11 @@ typedef struct { // ---------------------------------- // ### Regex creation/destruction ### // ---------------------------------- -static const int HALT_AND_CATCH_FIRE = INT_MIN; +enum { + ASSERTION_FAILURE = 0, + ASSERTION_SUCCESS = 1, + HALT_AND_CATCH_FIRE = INT_MIN, +}; #define ASSERT_HALT(a) ((a == HALT_AND_CATCH_FIRE) ? HALT_AND_CATCH_FIRE : (cs->state + a)) @@ -148,7 +152,7 @@ void HOOK_ALL(const int from, .input = *s, .to = ASSERT_HALT(to), .pattern_width = cs->width, - .match_width = cs->width2, + .match_width = cs->match_width, }; vector_push(®ex->delta_table, &delta); @@ -490,6 +494,11 @@ regex_t * regex_compile(const char * const pattern) { char whitelist[64]; char blacklist[64]; + static const int REGEX_PREVERSABLE_FLAGS = IS_AT_THE_BEGINNING + | FORCE_START_OF_STRING + | DO_FORBID_START_OF_STRING + ; + compiler_state cs = { .flags = IS_AT_THE_BEGINNING, .state = JEGER_INIT_STATE, @@ -500,11 +509,11 @@ regex_t * regex_compile(const char * const pattern) { for (const char * s = pattern; *s != '\00';) { assert(!is_quantifier(*s) && "Pattern starts with quantifier."); // Reset the compiler - whitelist[0] = '\0'; - blacklist[0] = '\0'; - cs.flags &= (IS_AT_THE_BEGINNING | FORCE_START_OF_STRING); - cs.width = 1; - cs.width2 = 1; + whitelist[0] = '\0'; + blacklist[0] = '\0'; + cs.flags &= REGEX_PREVERSABLE_FLAGS; + cs.width = 1; + cs.match_width = 1; // Translate char switch (*s) { @@ -535,12 +544,6 @@ regex_t * regex_compile(const char * const pattern) { } break; } - /* Ew */ - if (*s == '\\' - && is_hologram_escape(*(s+1))) { - ++s; - } - // Compile char switch (*s) { // holograms @@ -555,49 +558,58 @@ regex_t * regex_compile(const char * const pattern) { } s += 1; } break; - case '<': { - // XXX: make this legible - if (cs.flags & IS_AT_THE_BEGINNING - && !(cs.flags & DO_CATCH) - && !(cs.flags & IS_NEGATIVE) - && whitelist[0] == '\0') { - // --- - cs.flags |= INCREMENT_STATE; - cs.flags |= DO_FORBID_START_OF_STRING; - strcat(whitelist, JEGER_CHAR_symbol_chars); - // --- - ABSOLUTE_OFFSHOOT( JEGER_SOS_STATE, JEGER_INIT_STATE+1, 0, 0, regex); - ABSOLUTE_OFFSHOOT(JEGER_INIT_STATE, JEGER_INIT_STATE+2, 1, 0, regex); - HOOK_ALL(0, whitelist, HALT_AND_CATCH_FIRE, &cs, regex); - // --- - ++cs.state; - cs.width = 0; - cs.width2 = 0; - HOOK_ALL(0, whitelist, +1, &cs, regex); - cs.width = 1; - OFFSHOOT(0, +1, 1, 0, &cs, regex); - // --- + case '\\': { + if(is_hologram_escape(*(s+1))) { + ++s; } else { - HOOK_ALL(0, whitelist, +1, &cs, regex); - if ((cs.flags & DO_CATCH) - || (cs.flags & IS_NEGATIVE)) { - OFFSHOOT(+1, +2, 1, 1, &cs, regex); - } else { - cs.flags |= INCREMENT_STATE; - } - OFFSHOOT(0, +1, 1, 0, &cs, regex); + goto DEFAULT; + } + switch(*s){ + case '<': { + // XXX: make this legible + if (cs.flags & IS_AT_THE_BEGINNING + && !(cs.flags & DO_CATCH) + && !(cs.flags & IS_NEGATIVE) + && whitelist[0] == '\0') { + // --- + cs.flags |= INCREMENT_STATE; + cs.flags |= DO_FORBID_START_OF_STRING; + strcat(whitelist, JEGER_CHAR_symbol_chars); + // --- + ABSOLUTE_OFFSHOOT( JEGER_SOS_STATE, JEGER_INIT_STATE+1, 0, 0, regex); + ABSOLUTE_OFFSHOOT(JEGER_INIT_STATE, JEGER_INIT_STATE+2, 1, 0, regex); + HOOK_ALL(0, whitelist, HALT_AND_CATCH_FIRE, &cs, regex); + // --- + ++cs.state; + cs.width = 0; + cs.match_width = 0; + HOOK_ALL(0, whitelist, +1, &cs, regex); + cs.width = 1; + OFFSHOOT(0, +1, 1, 0, &cs, regex); + // --- + } else { + HOOK_ALL(0, whitelist, +1, &cs, regex); + if ((cs.flags & DO_CATCH) + || (cs.flags & IS_NEGATIVE)) { + OFFSHOOT(+1, +2, 1, 1, &cs, regex); + } else { + cs.flags |= INCREMENT_STATE; + } + OFFSHOOT(0, +1, 1, 0, &cs, regex); + } + cs.flags |= IS_NEGATIVE; + strcat(blacklist, JEGER_CHAR_symbol_chars); + s += 1; + } break; + case '>': { + HOOK_ALL(0, whitelist, +1, &cs, regex); + cs.flags |= IS_NEGATIVE | INCREMENT_STATE; + strcat(blacklist, JEGER_CHAR_symbol_chars); + OFFSHOOT(+1, +2, 0, 0, &cs, regex); + ++cs.state; + s += 1; + } break; } - cs.flags |= IS_NEGATIVE; - strcat(blacklist, JEGER_CHAR_symbol_chars); - s += 1; - } break; - case '>': { - HOOK_ALL(0, whitelist, +1, &cs, regex); - cs.flags |= IS_NEGATIVE | INCREMENT_STATE; - strcat(blacklist, JEGER_CHAR_symbol_chars); - OFFSHOOT(+1, +2, 0, 0, &cs, regex); - ++cs.state; - s += 1; } break; // quantifiers case '=': @@ -631,6 +643,7 @@ regex_t * regex_compile(const char * const pattern) { } s += 1; } break; + DEFAULT: default: { // Literal cs.flags |= INCREMENT_STATE; HOOK_ALL(0, whitelist, +1, &cs, regex); @@ -653,6 +666,7 @@ regex_t * regex_compile(const char * const pattern) { ++cs.state; } + // Purge SOS flag cs.flags &= (~IS_AT_THE_BEGINNING); } @@ -697,12 +711,12 @@ const offshoot_t * catch_table_lookup(const regex_t * const regex, } static -bool regex_assert(const regex_t * const regex, +int regex_assert(const regex_t * const regex, const char * const string, int state, match_t * const match) { if (state == HALT_AND_CATCH_FIRE) { - return false; + return HALT_AND_CATCH_FIRE; } bool last_stand = false; @@ -743,11 +757,16 @@ bool regex_assert(const regex_t * const regex, do_reset = true; } const int r = regex_assert(regex, s + delta->pattern_width, delta->to, match); - if(r){ + if(r == ASSERTION_SUCCESS){ match->width += delta->match_width; return r; - } else if (do_reset) { - match->_pos_ptr = NULL; + } else { + if (r == ASSERTION_FAILURE) { + was_found = false; + } + if (do_reset) { + match->_pos_ptr = NULL; + } } } } @@ -765,7 +784,7 @@ bool regex_assert(const regex_t * const regex, } } - return (state == regex->accepting_state); + return ((state == regex->accepting_state) ? ASSERTION_SUCCESS : ASSERTION_FAILURE); } match_t * regex_match(const regex_t * const regex, @@ -796,7 +815,8 @@ match_t * regex_match(const regex_t * const regex, .width = 0, }; - if (regex_assert(regex, s, initial_state, match)) { + if (regex_assert(regex, s, initial_state, match) == 1) { + //printf("true: %s\n", s); if (match->_pos_ptr) { match->position = (match->_pos_ptr - string); } else { @@ -808,6 +828,7 @@ match_t * regex_match(const regex_t * const regex, s += ((match->width > 0) ? match->width : 1); match = (match_t *)malloc(sizeof(match_t)); } else { + //printf("false: %s\n", s); ++s; } } while (*s != '\0');