From af13a1bf7f9c3c18c0eedb00b509b87d4a8ef88e Mon Sep 17 00:00:00 2001 From: anon Date: Thu, 24 Aug 2023 14:32:36 +0200 Subject: [PATCH] negated ranges should work (not tested) --- source/regex.c | 54 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/source/regex.c b/source/regex.c index 2582d99..f0a5d00 100644 --- a/source/regex.c +++ b/source/regex.c @@ -18,25 +18,6 @@ typedef struct { int to; } offshoot_t; -#define HALT_AND_CATCH_FIRE -1 - -#define HOOK_ALL(from, str, to) do { \ - for (char * s = str; *s != '\00'; s++) { \ - vector_push(®ex->delta_table, \ - &(delta_t){state + from, *s, state + to} \ - ); \ - } \ - if (do_catch) { \ - vector_push(®ex->catch_table, \ - &(offshoot_t){state + from, state + to} \ - ); \ - } \ -} while (0) - -#define EAT(n) do { \ - s += n; \ -} while (0) - static bool is_quantifier(const char c) { for (const char * s = "+*?"; *s != '\00'; s++) { if (*s == c) { @@ -192,12 +173,19 @@ static int escape_1_to_N(const char c, char * whitelist) { } static int compile_range(const char * const range, - char * whitelist) { + char * whitelist, + bool * is_negative) { assert(range[0] == '[' && "Not a range."); int r = 0; const char * s; - for (s = range+1; *s != ']'; s++) { + if (range[1] == '^') { + *is_negative = true; + s = range + 2; + } else { + s = range + 1; + } + for (; *s != ']'; s++) { assert(*s != '\00' && "Unclosed range."); char c = *s; if (escape_1_to_1(c, whitelist) @@ -221,6 +209,26 @@ static int compile_range(const char * const range, return ((s - range) + 1); } +#define HALT_AND_CATCH_FIRE -1 + +#define HOOK_ALL(from, str, to) do { \ + int hook_to = (is_negative) ? -1 : state + to; \ + for (char * s = str; *s != '\00'; s++) { \ + vector_push(®ex->delta_table, \ + &(delta_t){state + from, *s, hook_to} \ + ); \ + } \ + if (do_catch) { \ + vector_push(®ex->catch_table, \ + &(offshoot_t){state + from, hook_to} \ + ); \ + } \ +} while (0) + +#define EAT(n) do { \ + s += n; \ +} while (0) + regex_t * regex_compile(const char * const pattern) { regex_t * regex = (regex_t *)malloc(sizeof(regex_t)); regex->str = strdup(pattern); @@ -231,11 +239,13 @@ regex_t * regex_compile(const char * const pattern) { char whitelist[64]; bool do_catch; + bool is_negative; for (const char * s = pattern; *s != '\00';) { // Get token assert(!is_quantifier(*pattern) && "Pattern starts with quantifier."); whitelist[0] = '\00'; do_catch = false; + switch (*s) { case '.': { do_catch = true; @@ -250,7 +260,7 @@ regex_t * regex_compile(const char * const pattern) { } } break; case '[': { - EAT(compile_range(s, whitelist)-1); + EAT(compile_range(s, whitelist, &is_negative)-1); } break; default: { whitelist[0] = *s;