|
|
@@ -24,7 +24,7 @@ bool is_magic(const char c) { |
|
|
|
if (is_quantifier(c)) { |
|
|
|
return true; |
|
|
|
} |
|
|
|
for (const char * s = "\\[]."; *s != '\00'; s++) { |
|
|
|
for (const char * s = "\\[].^"; *s != '\00'; s++) { |
|
|
|
if (*s == c) { |
|
|
|
return true; |
|
|
|
} |
|
|
@@ -47,11 +47,18 @@ typedef struct { |
|
|
|
typedef struct { |
|
|
|
int in; |
|
|
|
int to; |
|
|
|
int width; |
|
|
|
} offshoot_t; |
|
|
|
|
|
|
|
typedef struct { |
|
|
|
bool * do_catch; |
|
|
|
bool * is_negative; |
|
|
|
// these might be obsolite but im leaving them for now |
|
|
|
bool * do_loop_hook; |
|
|
|
bool * do_follow_hook; |
|
|
|
bool * do_loop_shoot; |
|
|
|
bool * do_follow_shoot; |
|
|
|
// --- |
|
|
|
int * state; |
|
|
|
int * width; |
|
|
|
char * whitelist; |
|
|
@@ -88,6 +95,9 @@ static int escape_1_to_1(const char c, compiler_state * cs) { |
|
|
|
case '.': { |
|
|
|
strcat(target_list, "."); |
|
|
|
} return 1; |
|
|
|
case '^': { |
|
|
|
strcat(target_list, "^"); |
|
|
|
} return 1; |
|
|
|
case '=': { |
|
|
|
strcat(target_list, "="); |
|
|
|
} return 1; |
|
|
@@ -365,7 +375,7 @@ void HOOK_ALL( int from, |
|
|
|
int to, |
|
|
|
compiler_state * cs) { |
|
|
|
|
|
|
|
int hook_to = (to == HALT_AND_CATCH_FIRE) ? -1 : ((*cs->state) + to); |
|
|
|
int hook_to = (to == HALT_AND_CATCH_FIRE) ? HALT_AND_CATCH_FIRE : ((*cs->state) + to); |
|
|
|
|
|
|
|
|
|
|
|
for (const char * s = str; *s != '\0'; s++) { |
|
|
@@ -379,26 +389,39 @@ void HOOK_ALL( int from, |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
void OFFSHOOT(int from, |
|
|
|
int to, |
|
|
|
compiler_state * cs) { |
|
|
|
void ABSOLUTE_OFFSHOOT(int from, |
|
|
|
int to, |
|
|
|
int width, |
|
|
|
compiler_state * cs) { |
|
|
|
offshoot_t * offshoot = malloc(sizeof(offshoot_t)); |
|
|
|
offshoot->in = *cs->state + from; |
|
|
|
offshoot->to = *cs->state + to; |
|
|
|
offshoot->in = from; |
|
|
|
offshoot->to = to; |
|
|
|
offshoot->width = width; |
|
|
|
vector_push(&cs->regex->catch_table, |
|
|
|
&offshoot); |
|
|
|
} |
|
|
|
|
|
|
|
void OFFSHOOT(int from, |
|
|
|
int to, |
|
|
|
int width, |
|
|
|
compiler_state * cs) { |
|
|
|
ABSOLUTE_OFFSHOOT(*cs->state + from, *cs->state + to, width, cs); |
|
|
|
} |
|
|
|
|
|
|
|
regex_t * regex_compile(const char * const pattern) { |
|
|
|
regex_t * regex = (regex_t *)malloc(sizeof(regex_t)); |
|
|
|
regex->str = strdup(pattern); |
|
|
|
vector_init(®ex->delta_table, sizeof(delta_t*), 0UL); |
|
|
|
vector_init(®ex->catch_table, sizeof(offshoot_t*), 0UL); |
|
|
|
|
|
|
|
int state = 0; |
|
|
|
int state = 2; |
|
|
|
|
|
|
|
bool do_catch; |
|
|
|
bool is_negative; |
|
|
|
bool do_loop_hook; |
|
|
|
bool do_follow_hook; |
|
|
|
bool do_loop_shoot; |
|
|
|
bool do_follow_shoot; |
|
|
|
int width; |
|
|
|
char whitelist[64]; |
|
|
|
char blacklist[64]; |
|
|
@@ -416,14 +439,32 @@ regex_t * regex_compile(const char * const pattern) { |
|
|
|
for (const char * s = pattern; *s != '\00';) { |
|
|
|
// Reset the compiler |
|
|
|
assert(!is_quantifier(*pattern) && "Pattern starts with quantifier."); |
|
|
|
whitelist[0] = '\00'; |
|
|
|
blacklist[0] = '\00'; |
|
|
|
do_catch = false; |
|
|
|
is_negative = false; |
|
|
|
whitelist[0] = '\0'; |
|
|
|
blacklist[0] = '\0'; |
|
|
|
do_catch = false; |
|
|
|
is_negative = false; |
|
|
|
do_loop_hook = false; |
|
|
|
do_follow_hook = false; |
|
|
|
do_loop_shoot = false; |
|
|
|
do_follow_shoot = false; |
|
|
|
width = 1; |
|
|
|
|
|
|
|
// Translate char |
|
|
|
switch (*s) { |
|
|
|
case '^': { |
|
|
|
if (s == pattern) { |
|
|
|
ABSOLUTE_OFFSHOOT(0, 2, 0, &cs); |
|
|
|
ABSOLUTE_OFFSHOOT(1, HALT_AND_CATCH_FIRE, 0, &cs); |
|
|
|
} |
|
|
|
whitelist[0] = '\n'; |
|
|
|
whitelist[1] = '\0'; |
|
|
|
HOOK_ALL(0, whitelist, 0, &cs); |
|
|
|
if (s != pattern) { |
|
|
|
state += 1; |
|
|
|
} |
|
|
|
s += 1; |
|
|
|
goto long_continue; |
|
|
|
} break; |
|
|
|
case '.': { |
|
|
|
compile_dot(&cs); |
|
|
|
} break; |
|
|
@@ -435,8 +476,8 @@ regex_t * regex_compile(const char * const pattern) { |
|
|
|
s += compile_range(s, &cs) - 1; |
|
|
|
} break; |
|
|
|
default: { |
|
|
|
whitelist[0] = *s; |
|
|
|
whitelist[1] = '\00'; |
|
|
|
whitelist[0] = *s; |
|
|
|
whitelist[1] = '\0'; |
|
|
|
} break; |
|
|
|
} |
|
|
|
|
|
|
@@ -446,37 +487,38 @@ regex_t * regex_compile(const char * const pattern) { |
|
|
|
switch (*s) { |
|
|
|
case '=': |
|
|
|
case '?': { |
|
|
|
do_loop_hook = true; |
|
|
|
HOOK_ALL(0, whitelist, +1, &cs); |
|
|
|
if (do_catch || is_negative) { |
|
|
|
OFFSHOOT(0, +1, &cs); |
|
|
|
OFFSHOOT(0, +1, 1, &cs); |
|
|
|
} |
|
|
|
s += 1; |
|
|
|
} break; |
|
|
|
case '*': { |
|
|
|
HOOK_ALL(0, whitelist, 0, &cs); |
|
|
|
if (do_catch) { |
|
|
|
OFFSHOOT(0, +1, &cs); |
|
|
|
OFFSHOOT(0, +1, 1, &cs); |
|
|
|
} else if (is_negative) { |
|
|
|
OFFSHOOT(0, 0, &cs); |
|
|
|
OFFSHOOT(0, 0, 1, &cs); |
|
|
|
} |
|
|
|
s += 1; |
|
|
|
} break; |
|
|
|
case '+': { |
|
|
|
HOOK_ALL(0, whitelist, +1, &cs); |
|
|
|
if (do_catch || is_negative) { |
|
|
|
OFFSHOOT(0, +1, &cs); |
|
|
|
OFFSHOOT(0, +1, 1, &cs); |
|
|
|
} |
|
|
|
state += 1; |
|
|
|
HOOK_ALL(0, whitelist, 0, &cs); |
|
|
|
if (do_catch || is_negative) { |
|
|
|
OFFSHOOT(0, 0, &cs); |
|
|
|
OFFSHOOT(0, 0, 1, &cs); |
|
|
|
} |
|
|
|
s += 1; |
|
|
|
} break; |
|
|
|
default: { // Literal |
|
|
|
HOOK_ALL(0, whitelist, +1, &cs); |
|
|
|
if (do_catch || is_negative) { |
|
|
|
OFFSHOOT(0, +1, &cs); |
|
|
|
OFFSHOOT(0, +1, 1, &cs); |
|
|
|
} |
|
|
|
state += 1; |
|
|
|
} break; |
|
|
@@ -489,6 +531,7 @@ regex_t * regex_compile(const char * const pattern) { |
|
|
|
filter_blacklist(whitelist, blacklist, filtered_blacklist); |
|
|
|
HOOK_ALL(0, filtered_blacklist, HALT_AND_CATCH_FIRE, &cs); |
|
|
|
} |
|
|
|
long_continue: |
|
|
|
} |
|
|
|
|
|
|
|
regex->accepting_state = state; |
|
|
@@ -509,37 +552,40 @@ int regex_free(regex_t * const regex) { |
|
|
|
// ----------------- |
|
|
|
// ### Searching ### |
|
|
|
// ----------------- |
|
|
|
static bool catch_(const regex_t * const regex, |
|
|
|
static int catch_(const regex_t * const regex, |
|
|
|
int * const state) { |
|
|
|
for (size_t i = 0; i < regex->catch_table.element_count; i++){ |
|
|
|
const offshoot_t * const offshoot = *(offshoot_t**)vector_get(®ex->catch_table, i); |
|
|
|
if (offshoot->in == *state) { |
|
|
|
*state = offshoot->to; |
|
|
|
return true; |
|
|
|
return offshoot->width; |
|
|
|
} |
|
|
|
} |
|
|
|
return false; |
|
|
|
return HALT_AND_CATCH_FIRE; |
|
|
|
} |
|
|
|
|
|
|
|
static int regex_assert(const regex_t * const regex, |
|
|
|
const char * const string, |
|
|
|
int state, |
|
|
|
int width) { |
|
|
|
for (const char * s = string; *s != '\00'; s++) { |
|
|
|
static int regex_assert(const regex_t * const regex, |
|
|
|
const char * const string, |
|
|
|
const int string_offset, |
|
|
|
int state, |
|
|
|
int width) { // XXX: im pretty sure this is actually redundant and the width should be calculated from string - s |
|
|
|
for (const char * s = (string + string_offset); *s != '\00';) { |
|
|
|
// delta |
|
|
|
for (size_t i = 0; i < regex->delta_table.element_count; i++) { |
|
|
|
const delta_t * const delta = *(delta_t**)vector_get(®ex->delta_table, i); |
|
|
|
if ((delta->in == state) |
|
|
|
&& (delta->input == *s)) { |
|
|
|
int r = regex_assert(regex, s + delta->width, delta->to, width + 1); |
|
|
|
int r = regex_assert(regex, string, (s - string) + delta->width, delta->to, width + 1); |
|
|
|
if(r){ |
|
|
|
return r; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
if (catch_(regex, &state)) { |
|
|
|
width += 1; |
|
|
|
const int catch_width = catch_(regex, &state); |
|
|
|
if ((catch_width != HALT_AND_CATCH_FIRE) |
|
|
|
&& (state != HALT_AND_CATCH_FIRE)) { |
|
|
|
s += catch_width; |
|
|
|
continue; |
|
|
|
} |
|
|
|
|
|
|
@@ -549,8 +595,10 @@ static int regex_assert(const regex_t * const regex, |
|
|
|
return false; |
|
|
|
} |
|
|
|
|
|
|
|
int regex_match( regex_t * regex, |
|
|
|
const char * const string) { |
|
|
|
int regex_match( regex_t * regex, |
|
|
|
const char * const string, |
|
|
|
const bool is_start_of_string, |
|
|
|
const int string_offset) { // XXX: remove this useless piece of shit of a parameter nigger |
|
|
|
if (regex == NULL) { |
|
|
|
return false; |
|
|
|
} |
|
|
@@ -558,11 +606,13 @@ int regex_match( regex_t * regex, |
|
|
|
return true; |
|
|
|
} |
|
|
|
|
|
|
|
return regex_assert(regex, string, 0, 0); |
|
|
|
const int initial_state = (int)(!is_start_of_string); |
|
|
|
|
|
|
|
return regex_assert(regex, string, string_offset, initial_state, 0); |
|
|
|
} |
|
|
|
|
|
|
|
bool regex_search( regex_t * regex, |
|
|
|
const char * const string) { |
|
|
|
|
|
|
|
return (bool)regex_match(regex, string); |
|
|
|
return (bool)regex_match(regex, string, true, 0); |
|
|
|
} |