This commit is contained in:
anon 2023-09-23 12:41:15 +02:00
parent 8080b44613
commit 283e1e48e7
4 changed files with 56 additions and 17 deletions

View File

@ -98,5 +98,13 @@ SOW must match:
Not only that, this combination is key, Not only that, this combination is key,
either it has to be the start of the string either it has to be the start of the string
or there has to be at least something which is not a symbol char. or there has to be at least something which is not a symbol char.
With out the last condition "eexample" would match "\\\<exaplme\\\>" With out the last condition "eexample" would match "\\\<exaple\\\>"
as the iteration of `regex_match()` reaches "example". as the iteration of `regex_match()` reaches "example".
From a more practical perspective:
``` C
\<myword\>
// Must match
"myword"
" myword"
```

View File

@ -2,3 +2,4 @@
[ ] UTF-8 support [ ] UTF-8 support
[ ] arbitrary memory support (this probably covers UTF-8 support) [ ] arbitrary memory support (this probably covers UTF-8 support)
[ ] documentation thats not shit [ ] documentation thats not shit
[ ] HOOK\_ALL / OFFSHOOT width parameter inconsistency

View File

@ -9,7 +9,9 @@
#include <limits.h> #include <limits.h>
#include <stdlib.h> #include <stdlib.h>
#define JEGER_INIT_STATE 2 #define JEGER_SOS_STATE 0
#define JEGER_NSOS_STATE 1
#define JEGER_INIT_STATE 2
// ------------------ // ------------------
// ### Char tests ### // ### Char tests ###
@ -346,7 +348,7 @@ int escape_1_to_N(const char c,
static inline static inline
int escape_to_negative(const char c, int escape_to_negative(const char c,
compiler_state * const cs) { compiler_state * const cs) {
switch (c) { switch (c) {
case 'D': { case 'D': {
const char digit_chars[] = JEGER_CHAR_SET_digits; const char digit_chars[] = JEGER_CHAR_SET_digits;
@ -481,6 +483,8 @@ regex_t * regex_compile(const char * const pattern) {
.blacklist = blacklist, .blacklist = blacklist,
}; };
bool fucku = true;
for (const char * s = pattern; *s != '\00';) { for (const char * s = pattern; *s != '\00';) {
assert(!is_quantifier(*s) && "Pattern starts with quantifier."); assert(!is_quantifier(*s) && "Pattern starts with quantifier.");
// Reset the compiler // Reset the compiler
@ -539,21 +543,37 @@ regex_t * regex_compile(const char * const pattern) {
s += 1; s += 1;
} break; } break;
case '<': { case '<': {
unsigned true_inc = 1; if (cs.flags & IS_AT_THE_BEGINNING
if ((cs.flags & DO_CATCH) && !(cs.flags & DO_CATCH)
|| (cs.flags & IS_NEGATIVE)) { && !(cs.flags & IS_NEGATIVE)
OFFSHOOT(0, +1, 1, 1, &cs, regex); && whitelist[0] == '\0') {
OFFSHOOT(+1, +2, 1, 1, &cs, regex); // ---
++true_inc;
} else {
cs.flags |= INCREMENT_STATE; cs.flags |= INCREMENT_STATE;
fucku = false;
strcat(whitelist, JEGER_CHAR_symbol_chars);
// ---
ABSOLUTE_OFFSHOOT( JEGER_SOS_STATE, JEGER_INIT_STATE+1, 0, 0, regex);
ABSOLUTE_OFFSHOOT(JEGER_INIT_STATE, JEGER_INIT_STATE+2, 1, 0, regex);
HOOK_ALL(0, whitelist, HALT_AND_CATCH_FIRE, &cs, regex);
// ---
++cs.state;
cs.width = 0;
HOOK_ALL(0, whitelist, +1, &cs, regex);
cs.width = 1;
OFFSHOOT(0, +1, 1, 0, &cs, regex);
// ---
} else {
HOOK_ALL(0, whitelist, +1, &cs, regex);
if ((cs.flags & DO_CATCH)
|| (cs.flags & IS_NEGATIVE)) {
OFFSHOOT(+1, +2, 1, 1, &cs, regex);
} else {
cs.flags |= INCREMENT_STATE;
}
OFFSHOOT(0, +1, 1, 0, &cs, regex);
} }
cs.flags |= IS_NEGATIVE; cs.flags |= IS_NEGATIVE;
if (cs.flags & IS_AT_THE_BEGINNING) {
ABSOLUTE_OFFSHOOT(0, JEGER_INIT_STATE + true_inc, 0, 0, regex);
}
strcat(blacklist, JEGER_CHAR_symbol_chars); strcat(blacklist, JEGER_CHAR_symbol_chars);
//OFFSHOOT(0 + (true_inc-1), +true_inc, 1, 0, &cs, regex);
s += 1; s += 1;
} break; } break;
case '>': { case '>': {
@ -622,11 +642,13 @@ regex_t * regex_compile(const char * const pattern) {
} }
// Init state hookups // Init state hookups
ABSOLUTE_OFFSHOOT(0, JEGER_INIT_STATE, 0, 0, regex); if (fucku) {
ABSOLUTE_OFFSHOOT(JEGER_SOS_STATE, JEGER_INIT_STATE, 0, 0, regex);
}
if (cs.flags & FORCE_START_OF_STRING) { if (cs.flags & FORCE_START_OF_STRING) {
ABSOLUTE_OFFSHOOT(1, HALT_AND_CATCH_FIRE, 0, 0, regex); ABSOLUTE_OFFSHOOT(JEGER_NSOS_STATE, HALT_AND_CATCH_FIRE, 0, 0, regex);
} else { } else {
ABSOLUTE_OFFSHOOT(1, JEGER_INIT_STATE, 0, 0, regex); ABSOLUTE_OFFSHOOT(JEGER_NSOS_STATE, JEGER_INIT_STATE, 0, 0, regex);
} }
regex->accepting_state = cs.state; regex->accepting_state = cs.state;

View File

@ -107,6 +107,14 @@ signed main() {
TEST(R"del(.\<print\>.)del", "printf", false); TEST(R"del(.\<print\>.)del", "printf", false);
TEST(R"del(.\<print\>.)del", "fprintf", false); TEST(R"del(.\<print\>.)del", "fprintf", false);
puts("");
TEST(R"del(\<while\>)del", "while", true);
TEST(R"del(\<while\>)del", " while ", true);
TEST(R"del(\<while\>)del", "9while ", true);
TEST(R"del(\<while\>)del", "for while {", true);
TEST(R"del(\<while\>)del", "for while{", true);
if (test_counter == passed_tests) { if (test_counter == passed_tests) {
fputs("\033[32m", stdout); fputs("\033[32m", stdout);
} else { } else {