bugs go on and on, and they dont end

This commit is contained in:
anon 2023-09-23 14:52:40 +02:00
parent 283e1e48e7
commit 9ca9006a9a
3 changed files with 43 additions and 13 deletions

View File

@ -1,4 +1,8 @@
CXXFLAGS := -fuse-ld=mold -ggdb -Wall -Wextra -Wpedantic CXXFLAGS := -fuse-ld=mold -ggdb -Wall -Wextra -Wpedantic
ifeq (${DEBUG}, 1)
CXXFLAGS += -DDEBUG
endif
OUT := regtest OUT := regtest
main: main:

View File

@ -8,6 +8,9 @@
#include <string.h> #include <string.h>
#include <limits.h> #include <limits.h>
#include <stdlib.h> #include <stdlib.h>
#if DEBUG
# include <stdio.h>
#endif
#define JEGER_SOS_STATE 0 #define JEGER_SOS_STATE 0
#define JEGER_NSOS_STATE 1 #define JEGER_NSOS_STATE 1
@ -42,6 +45,16 @@ bool is_magic(const char c) {
; ;
} }
// -------------------
// ### Match tests ###
// -------------------
static inline
bool is_sentinel(const match_t * const match) {
return (match->position == -1)
&& (match->width == -1)
;
}
// ----------------- // -----------------
// ### Char sets ### // ### Char sets ###
// ----------------- // -----------------
@ -97,11 +110,12 @@ typedef struct {
} offshoot_t; } offshoot_t;
enum { enum {
DO_CATCH = 0x00000001 << 0, DO_CATCH = 0x00000001 << 0,
IS_NEGATIVE = 0x00000001 << 1, IS_NEGATIVE = 0x00000001 << 1,
IS_AT_THE_BEGINNING = 0x00000001 << 2, IS_AT_THE_BEGINNING = 0x00000001 << 2,
FORCE_START_OF_STRING = 0x00000001 << 3, FORCE_START_OF_STRING = 0x00000001 << 3,
INCREMENT_STATE = 0x00000001 << 4, DO_FORBID_START_OF_STRING = 0x00000001 << 4,
INCREMENT_STATE = 0x00000001 << 5,
}; };
typedef struct { typedef struct {
@ -483,8 +497,6 @@ regex_t * regex_compile(const char * const pattern) {
.blacklist = blacklist, .blacklist = blacklist,
}; };
bool fucku = true;
for (const char * s = pattern; *s != '\00';) { for (const char * s = pattern; *s != '\00';) {
assert(!is_quantifier(*s) && "Pattern starts with quantifier."); assert(!is_quantifier(*s) && "Pattern starts with quantifier.");
// Reset the compiler // Reset the compiler
@ -543,13 +555,14 @@ regex_t * regex_compile(const char * const pattern) {
s += 1; s += 1;
} break; } break;
case '<': { case '<': {
// XXX: make this legible
if (cs.flags & IS_AT_THE_BEGINNING if (cs.flags & IS_AT_THE_BEGINNING
&& !(cs.flags & DO_CATCH) && !(cs.flags & DO_CATCH)
&& !(cs.flags & IS_NEGATIVE) && !(cs.flags & IS_NEGATIVE)
&& whitelist[0] == '\0') { && whitelist[0] == '\0') {
// --- // ---
cs.flags |= INCREMENT_STATE; cs.flags |= INCREMENT_STATE;
fucku = false; cs.flags |= DO_FORBID_START_OF_STRING;
strcat(whitelist, JEGER_CHAR_symbol_chars); strcat(whitelist, JEGER_CHAR_symbol_chars);
// --- // ---
ABSOLUTE_OFFSHOOT( JEGER_SOS_STATE, JEGER_INIT_STATE+1, 0, 0, regex); ABSOLUTE_OFFSHOOT( JEGER_SOS_STATE, JEGER_INIT_STATE+1, 0, 0, regex);
@ -642,7 +655,7 @@ regex_t * regex_compile(const char * const pattern) {
} }
// Init state hookups // Init state hookups
if (fucku) { if (!(cs.flags & DO_FORBID_START_OF_STRING)) {
ABSOLUTE_OFFSHOOT(JEGER_SOS_STATE, JEGER_INIT_STATE, 0, 0, regex); ABSOLUTE_OFFSHOOT(JEGER_SOS_STATE, JEGER_INIT_STATE, 0, 0, regex);
} }
if (cs.flags & FORCE_START_OF_STRING) { if (cs.flags & FORCE_START_OF_STRING) {
@ -724,9 +737,6 @@ bool regex_assert(const regex_t * const regex,
was_found = true; was_found = true;
const int r = regex_assert(regex, s + delta->pattern_width, delta->to, match); const int r = regex_assert(regex, s + delta->pattern_width, delta->to, match);
if(r){ if(r){
if (match->position == -1) {
match->position = (s - string);
}
match->width += delta->match_width; match->width += delta->match_width;
return r; return r;
} }
@ -740,6 +750,9 @@ bool regex_assert(const regex_t * const regex,
if (my_catch && (!my_catch->pattern_width || !last_stand)) { if (my_catch && (!my_catch->pattern_width || !last_stand)) {
state = my_catch->to; state = my_catch->to;
s += my_catch->pattern_width; s += my_catch->pattern_width;
if (match->position < 1) {
match->position = my_catch->match_width;
}
match->width += my_catch->match_width; match->width += my_catch->match_width;
goto LOOP; goto LOOP;
} }
@ -778,7 +791,11 @@ match_t * regex_match(const regex_t * const regex,
}; };
if (regex_assert(regex, s, initial_state, match)) { if (regex_assert(regex, s, initial_state, match)) {
if(match->position == -1){
match->position = (s - string); match->position = (s - string);
}else{
match->position += (s - string);
}
vector_push(&matches, match); vector_push(&matches, match);
@ -812,7 +829,7 @@ bool regex_search(const regex_t * const regex,
const char * const string) { const char * const string) {
match_t * m = regex_match(regex, string, true); match_t * m = regex_match(regex, string, true);
const bool r = (m->position != -1); const bool r = !is_sentinel(m);
free(m); free(m);
return r; return r;

View File

@ -151,6 +151,15 @@ signed main() {
TEST2( R"del(a+a)del", " aaa", match_t{ 3, strlen("aaa")}); TEST2( R"del(a+a)del", " aaa", match_t{ 3, strlen("aaa")});
TEST2(R"del(a+\+)del", "aaa+", match_t{ 0, strlen("aaa+")}); TEST2(R"del(a+\+)del", "aaa+", match_t{ 0, strlen("aaa+")});
puts("");
puts("");
TEST2(R"del(\<while\>)del", "while", match_t{0, strlen("while")});
TEST2(R"del(\<while\>)del", " while", match_t{1, strlen("while")});
TEST2(R"del(\<while\>)del", "for while", match_t{4, strlen("while")});
TEST2(R"del(\<while\>)del", "for9while", match_t{4, strlen("while")});
TEST2(R"del(\<while\>)del", "for9while ", match_t{4, strlen("while")});
if(test_counter2 == passed_tests2) { if(test_counter2 == passed_tests2) {
fputs("\033[32m", stdout); fputs("\033[32m", stdout);
} else { } else {