negated ranges should work (not tested)

This commit is contained in:
anon 2023-08-24 14:32:36 +02:00
parent 4011a10a32
commit af13a1bf7f

View File

@ -18,25 +18,6 @@ typedef struct {
int to; int to;
} offshoot_t; } offshoot_t;
#define HALT_AND_CATCH_FIRE -1
#define HOOK_ALL(from, str, to) do { \
for (char * s = str; *s != '\00'; s++) { \
vector_push(&regex->delta_table, \
&(delta_t){state + from, *s, state + to} \
); \
} \
if (do_catch) { \
vector_push(&regex->catch_table, \
&(offshoot_t){state + from, state + to} \
); \
} \
} while (0)
#define EAT(n) do { \
s += n; \
} while (0)
static bool is_quantifier(const char c) { static bool is_quantifier(const char c) {
for (const char * s = "+*?"; *s != '\00'; s++) { for (const char * s = "+*?"; *s != '\00'; s++) {
if (*s == c) { if (*s == c) {
@ -192,12 +173,19 @@ static int escape_1_to_N(const char c, char * whitelist) {
} }
static int compile_range(const char * const range, static int compile_range(const char * const range,
char * whitelist) { char * whitelist,
bool * is_negative) {
assert(range[0] == '[' && "Not a range."); assert(range[0] == '[' && "Not a range.");
int r = 0; int r = 0;
const char * s; const char * s;
for (s = range+1; *s != ']'; s++) { if (range[1] == '^') {
*is_negative = true;
s = range + 2;
} else {
s = range + 1;
}
for (; *s != ']'; s++) {
assert(*s != '\00' && "Unclosed range."); assert(*s != '\00' && "Unclosed range.");
char c = *s; char c = *s;
if (escape_1_to_1(c, whitelist) if (escape_1_to_1(c, whitelist)
@ -221,6 +209,26 @@ static int compile_range(const char * const range,
return ((s - range) + 1); return ((s - range) + 1);
} }
#define HALT_AND_CATCH_FIRE -1
#define HOOK_ALL(from, str, to) do { \
int hook_to = (is_negative) ? -1 : state + to; \
for (char * s = str; *s != '\00'; s++) { \
vector_push(&regex->delta_table, \
&(delta_t){state + from, *s, hook_to} \
); \
} \
if (do_catch) { \
vector_push(&regex->catch_table, \
&(offshoot_t){state + from, hook_to} \
); \
} \
} while (0)
#define EAT(n) do { \
s += n; \
} while (0)
regex_t * regex_compile(const char * const pattern) { regex_t * regex_compile(const char * const pattern) {
regex_t * regex = (regex_t *)malloc(sizeof(regex_t)); regex_t * regex = (regex_t *)malloc(sizeof(regex_t));
regex->str = strdup(pattern); regex->str = strdup(pattern);
@ -231,11 +239,13 @@ regex_t * regex_compile(const char * const pattern) {
char whitelist[64]; char whitelist[64];
bool do_catch; bool do_catch;
bool is_negative;
for (const char * s = pattern; *s != '\00';) { for (const char * s = pattern; *s != '\00';) {
// Get token // Get token
assert(!is_quantifier(*pattern) && "Pattern starts with quantifier."); assert(!is_quantifier(*pattern) && "Pattern starts with quantifier.");
whitelist[0] = '\00'; whitelist[0] = '\00';
do_catch = false; do_catch = false;
switch (*s) { switch (*s) {
case '.': { case '.': {
do_catch = true; do_catch = true;
@ -250,7 +260,7 @@ regex_t * regex_compile(const char * const pattern) {
} }
} break; } break;
case '[': { case '[': {
EAT(compile_range(s, whitelist)-1); EAT(compile_range(s, whitelist, &is_negative)-1);
} break; } break;
default: { default: {
whitelist[0] = *s; whitelist[0] = *s;