Compare commits

..

No commits in common. "9ca9006a9a65ba615e9560985912dd186622f101" and "8080b44613577ca070a52f06c318c044f673c32b" have entirely different histories.

5 changed files with 26 additions and 95 deletions

View File

@ -1,8 +1,4 @@
CXXFLAGS := -fuse-ld=mold -ggdb -Wall -Wextra -Wpedantic
ifeq (${DEBUG}, 1)
CXXFLAGS += -DDEBUG
endif
OUT := regtest
main:

View File

@ -98,13 +98,5 @@ SOW must match:
Not only that, this combination is key,
either it has to be the start of the string
or there has to be at least something which is not a symbol char.
With out the last condition "eexample" would match "\\\<exaple\\\>"
With out the last condition "eexample" would match "\\\<exaplme\\\>"
as the iteration of `regex_match()` reaches "example".
From a more practical perspective:
``` C
\<myword\>
// Must match
"myword"
" myword"
```

View File

@ -2,4 +2,3 @@
[ ] UTF-8 support
[ ] arbitrary memory support (this probably covers UTF-8 support)
[ ] documentation thats not shit
[ ] HOOK\_ALL / OFFSHOOT width parameter inconsistency

View File

@ -8,12 +8,7 @@
#include <string.h>
#include <limits.h>
#include <stdlib.h>
#if DEBUG
# include <stdio.h>
#endif
#define JEGER_SOS_STATE 0
#define JEGER_NSOS_STATE 1
#define JEGER_INIT_STATE 2
// ------------------
@ -45,16 +40,6 @@ bool is_magic(const char c) {
;
}
// -------------------
// ### Match tests ###
// -------------------
static inline
bool is_sentinel(const match_t * const match) {
return (match->position == -1)
&& (match->width == -1)
;
}
// -----------------
// ### Char sets ###
// -----------------
@ -114,8 +99,7 @@ enum {
IS_NEGATIVE = 0x00000001 << 1,
IS_AT_THE_BEGINNING = 0x00000001 << 2,
FORCE_START_OF_STRING = 0x00000001 << 3,
DO_FORBID_START_OF_STRING = 0x00000001 << 4,
INCREMENT_STATE = 0x00000001 << 5,
INCREMENT_STATE = 0x00000001 << 4,
};
typedef struct {
@ -555,38 +539,21 @@ regex_t * regex_compile(const char * const pattern) {
s += 1;
} break;
case '<': {
// XXX: make this legible
if (cs.flags & IS_AT_THE_BEGINNING
&& !(cs.flags & DO_CATCH)
&& !(cs.flags & IS_NEGATIVE)
&& whitelist[0] == '\0') {
// ---
cs.flags |= INCREMENT_STATE;
cs.flags |= DO_FORBID_START_OF_STRING;
strcat(whitelist, JEGER_CHAR_symbol_chars);
// ---
ABSOLUTE_OFFSHOOT( JEGER_SOS_STATE, JEGER_INIT_STATE+1, 0, 0, regex);
ABSOLUTE_OFFSHOOT(JEGER_INIT_STATE, JEGER_INIT_STATE+2, 1, 0, regex);
HOOK_ALL(0, whitelist, HALT_AND_CATCH_FIRE, &cs, regex);
// ---
++cs.state;
cs.width = 0;
HOOK_ALL(0, whitelist, +1, &cs, regex);
cs.width = 1;
OFFSHOOT(0, +1, 1, 0, &cs, regex);
// ---
} else {
HOOK_ALL(0, whitelist, +1, &cs, regex);
unsigned true_inc = 1;
if ((cs.flags & DO_CATCH)
|| (cs.flags & IS_NEGATIVE)) {
OFFSHOOT(0, +1, 1, 1, &cs, regex);
OFFSHOOT(+1, +2, 1, 1, &cs, regex);
++true_inc;
} else {
cs.flags |= INCREMENT_STATE;
}
OFFSHOOT(0, +1, 1, 0, &cs, regex);
}
cs.flags |= IS_NEGATIVE;
if (cs.flags & IS_AT_THE_BEGINNING) {
ABSOLUTE_OFFSHOOT(0, JEGER_INIT_STATE + true_inc, 0, 0, regex);
}
strcat(blacklist, JEGER_CHAR_symbol_chars);
//OFFSHOOT(0 + (true_inc-1), +true_inc, 1, 0, &cs, regex);
s += 1;
} break;
case '>': {
@ -655,13 +622,11 @@ regex_t * regex_compile(const char * const pattern) {
}
// Init state hookups
if (!(cs.flags & DO_FORBID_START_OF_STRING)) {
ABSOLUTE_OFFSHOOT(JEGER_SOS_STATE, JEGER_INIT_STATE, 0, 0, regex);
}
ABSOLUTE_OFFSHOOT(0, JEGER_INIT_STATE, 0, 0, regex);
if (cs.flags & FORCE_START_OF_STRING) {
ABSOLUTE_OFFSHOOT(JEGER_NSOS_STATE, HALT_AND_CATCH_FIRE, 0, 0, regex);
ABSOLUTE_OFFSHOOT(1, HALT_AND_CATCH_FIRE, 0, 0, regex);
} else {
ABSOLUTE_OFFSHOOT(JEGER_NSOS_STATE, JEGER_INIT_STATE, 0, 0, regex);
ABSOLUTE_OFFSHOOT(1, JEGER_INIT_STATE, 0, 0, regex);
}
regex->accepting_state = cs.state;
@ -737,6 +702,9 @@ bool regex_assert(const regex_t * const regex,
was_found = true;
const int r = regex_assert(regex, s + delta->pattern_width, delta->to, match);
if(r){
if (match->position == -1) {
match->position = (s - string);
}
match->width += delta->match_width;
return r;
}
@ -750,9 +718,6 @@ bool regex_assert(const regex_t * const regex,
if (my_catch && (!my_catch->pattern_width || !last_stand)) {
state = my_catch->to;
s += my_catch->pattern_width;
if (match->position < 1) {
match->position = my_catch->match_width;
}
match->width += my_catch->match_width;
goto LOOP;
}
@ -791,11 +756,7 @@ match_t * regex_match(const regex_t * const regex,
};
if (regex_assert(regex, s, initial_state, match)) {
if(match->position == -1){
match->position = (s - string);
}else{
match->position += (s - string);
}
vector_push(&matches, match);
@ -829,7 +790,7 @@ bool regex_search(const regex_t * const regex,
const char * const string) {
match_t * m = regex_match(regex, string, true);
const bool r = !is_sentinel(m);
const bool r = (m->position != -1);
free(m);
return r;

View File

@ -107,14 +107,6 @@ signed main() {
TEST(R"del(.\<print\>.)del", "printf", false);
TEST(R"del(.\<print\>.)del", "fprintf", false);
puts("");
TEST(R"del(\<while\>)del", "while", true);
TEST(R"del(\<while\>)del", " while ", true);
TEST(R"del(\<while\>)del", "9while ", true);
TEST(R"del(\<while\>)del", "for while {", true);
TEST(R"del(\<while\>)del", "for while{", true);
if (test_counter == passed_tests) {
fputs("\033[32m", stdout);
} else {
@ -151,15 +143,6 @@ signed main() {
TEST2( R"del(a+a)del", " aaa", match_t{ 3, strlen("aaa")});
TEST2(R"del(a+\+)del", "aaa+", match_t{ 0, strlen("aaa+")});
puts("");
puts("");
TEST2(R"del(\<while\>)del", "while", match_t{0, strlen("while")});
TEST2(R"del(\<while\>)del", " while", match_t{1, strlen("while")});
TEST2(R"del(\<while\>)del", "for while", match_t{4, strlen("while")});
TEST2(R"del(\<while\>)del", "for9while", match_t{4, strlen("while")});
TEST2(R"del(\<while\>)del", "for9while ", match_t{4, strlen("while")});
if(test_counter2 == passed_tests2) {
fputs("\033[32m", stdout);
} else {