diff --git a/source/hl.h b/source/hl.h index 8cae9f6..89c8fc4 100644 --- a/source/hl.h +++ b/source/hl.h @@ -130,12 +130,13 @@ int new_char_tokens(const char * characters, token_t * new_keyword_token(const char * const word, hl_group_t * const g) { - size_t word_length = strlen(word); - char * new_word = (char*)malloc(word_length + 4 + 1); + char * new_word = strdup(word); + //size_t word_length = strlen(word); + //char * new_word = (char*)malloc(word_length + 4 + 1); - memcpy(new_word, "\\<", 2); - memcpy(new_word + 2, word, word_length); - strcpy(new_word + 2 + word_length, "\\>"); + //memcpy(new_word, "\\<", 2); + //memcpy(new_word + 2, word, word_length); + //strcpy(new_word + 2 + word_length, "\\>"); token_t * mt = (token_t*)malloc(sizeof(token_t)); diff --git a/source/regex.c b/source/regex.c index 2adcfa1..1c1f7ea 100644 --- a/source/regex.c +++ b/source/regex.c @@ -11,7 +11,7 @@ // ### Char tests ### // ------------------ static bool is_quantifier(const char c) { - for (const char * s = "+*?"; *s != '\00'; s++) { + for (const char * s = "+*?="; *s != '\00'; s++) { if (*s == c) { return true; } @@ -40,6 +40,7 @@ typedef struct { int in; char input; int to; + int width; } delta_t; typedef struct { @@ -75,6 +76,9 @@ static int escape_1_to_1(const char c, char * whitelist) { case '.': { strcat(whitelist, "."); } return 1; + case '=': { + strcat(whitelist, "="); + } return 1; case '?': { strcat(whitelist, "?"); } return 1; @@ -248,12 +252,26 @@ static int escape_1_to_N(const char c, char * whitelist) { return 0; } +//static int compile_hologram(char * hologram, char * whitelist) { +// if (hologram[0] == '\\') { +// switch (hologram[1]) { +// case '<': { +// const char very_word_chars[] = "abcdefghijklmnopqrstuwxyz" +// "ABCDEFGHIJKLMNOPQRSTUWXYZ" +// "_"; +// strcat(whitelist, very_word_chars); +// is_negative = true; +// HOOK_ALL(0, whitelist, 0) +// } break; +// } +// } +//} + static int compile_range(const char * const range, char * whitelist, bool * is_negative) { - assert(range[0] == '[' && "Not a range."); + assert((range[0] == '[') && "Not a range."); - int r = 0; const char * s; if (range[1] == '^') { *is_negative = true; @@ -262,21 +280,20 @@ static int compile_range(const char * const range, s = range + 1; } for (; *s != ']'; s++) { - assert(*s != '\00' && "Unclosed range."); + assert((*s != '\0') && "Unclosed range."); char c = *s; if (escape_1_to_1(c, whitelist) || escape_1_to_N(c, whitelist)) { ; } else if (*(s+1) == '-') { char end = *(s+2); - assert(c < end && "Endless range."); + assert((c < end) && "Endless range."); for (char cc = c; cc < end+1; cc++) { strncat(whitelist, &cc, 1); - strncat(whitelist, "\00", 1); + strncat(whitelist, "\0", 1); } s += 2; } else { - ++r; strncat(whitelist, &c, 1); strncat(whitelist, "\00", 1); } @@ -288,7 +305,7 @@ static int compile_range(const char * const range, static bool catch_(const regex_t * const regex, int * const state) { - for (int i = 0; i < regex->catch_table.element_size; i++){ + for (size_t i = 0; i < regex->catch_table.element_size; i++){ const offshoot_t * const offshoot = (vector_get(®ex->catch_table, i)); if (offshoot->in == *state) { *state = offshoot->to; @@ -300,18 +317,18 @@ static bool catch_(const regex_t * const regex, #define HALT_AND_CATCH_FIRE -1 -#define HOOK_ALL(from, str, to) do { \ - int hook_to = (is_negative) ? -1 : state + to; \ - for (char * s = str; *s != '\00'; s++) { \ - vector_push(®ex->delta_table, \ - &(delta_t){state + from, *s, hook_to} \ - ); \ - } \ - if (do_catch) { \ - vector_push(®ex->catch_table, \ - &(offshoot_t){state + from, hook_to} \ - ); \ - } \ +#define HOOK_ALL(from, str, to) do { \ + int hook_to = (is_negative) ? -1 : state + to; \ + for (char * s = str; *s != '\0'; s++) { \ + vector_push(®ex->delta_table, \ + &(delta_t){state + from, *s, hook_to, width} \ + ); \ + } \ + if (do_catch || is_negative) { \ + vector_push(®ex->catch_table, \ + &(offshoot_t){state + from, hook_to} \ + ); \ + } \ } while (0) #define EAT(n) do { \ @@ -321,25 +338,30 @@ static bool catch_(const regex_t * const regex, regex_t * regex_compile(const char * const pattern) { regex_t * regex = (regex_t *)malloc(sizeof(regex_t)); regex->str = strdup(pattern); - vector_init(®ex->delta_table, sizeof(delta_t), 32); - vector_init(®ex->catch_table, sizeof(offshoot_t), 16); + vector_init(®ex->delta_table, sizeof(delta_t), 0); + vector_init(®ex->catch_table, sizeof(offshoot_t), 0); int state = 0; char whitelist[64]; bool do_catch; bool is_negative; + int width; for (const char * s = pattern; *s != '\00';) { // Get token assert(!is_quantifier(*pattern) && "Pattern starts with quantifier."); whitelist[0] = '\00'; do_catch = false; + width = 1; switch (*s) { case '.': { do_catch = true; } break; case '\\': { + //if (compile_hologram(*s, whitelist)) { + // break; + //} EAT(1); if(escape_1_to_1(*s, whitelist) || escape_1_to_N(*s, whitelist)){ @@ -361,6 +383,7 @@ regex_t * regex_compile(const char * const pattern) { // Get quantifier switch (*s) { + case '=': case '?': { HOOK_ALL(0, whitelist, +1); EAT(1); @@ -406,11 +429,11 @@ static bool regex_assert(const regex_t * const regex, for (const char * s = string; *s != '\00'; s++) { // delta - for (int i = 0; i < regex->delta_table.element_count; i++) { + for (size_t i = 0; i < regex->delta_table.element_count; i++) { const delta_t * const delta = (delta_t *)(vector_get(®ex->delta_table, i)); if ((delta->in == state) && (delta->input == *s)) { - if(regex_assert(regex, s+1, delta->to)){ + if(regex_assert(regex, s + delta->width, delta->to)){ return true; } }