Compare commits
2 Commits
8080b44613
...
9ca9006a9a
Author | SHA1 | Date | |
---|---|---|---|
9ca9006a9a | |||
283e1e48e7 |
4
Makefile
4
Makefile
@ -1,4 +1,8 @@
|
||||
CXXFLAGS := -fuse-ld=mold -ggdb -Wall -Wextra -Wpedantic
|
||||
ifeq (${DEBUG}, 1)
|
||||
CXXFLAGS += -DDEBUG
|
||||
endif
|
||||
|
||||
OUT := regtest
|
||||
|
||||
main:
|
||||
|
@ -98,5 +98,13 @@ SOW must match:
|
||||
Not only that, this combination is key,
|
||||
either it has to be the start of the string
|
||||
or there has to be at least something which is not a symbol char.
|
||||
With out the last condition "eexample" would match "\\\<exaplme\\\>"
|
||||
With out the last condition "eexample" would match "\\\<exaple\\\>"
|
||||
as the iteration of `regex_match()` reaches "example".
|
||||
|
||||
From a more practical perspective:
|
||||
``` C
|
||||
\<myword\>
|
||||
// Must match
|
||||
"myword"
|
||||
" myword"
|
||||
```
|
||||
|
@ -2,3 +2,4 @@
|
||||
[ ] UTF-8 support
|
||||
[ ] arbitrary memory support (this probably covers UTF-8 support)
|
||||
[ ] documentation thats not shit
|
||||
[ ] HOOK\_ALL / OFFSHOOT width parameter inconsistency
|
||||
|
@ -8,8 +8,13 @@
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#if DEBUG
|
||||
# include <stdio.h>
|
||||
#endif
|
||||
|
||||
#define JEGER_INIT_STATE 2
|
||||
#define JEGER_SOS_STATE 0
|
||||
#define JEGER_NSOS_STATE 1
|
||||
#define JEGER_INIT_STATE 2
|
||||
|
||||
// ------------------
|
||||
// ### Char tests ###
|
||||
@ -40,6 +45,16 @@ bool is_magic(const char c) {
|
||||
;
|
||||
}
|
||||
|
||||
// -------------------
|
||||
// ### Match tests ###
|
||||
// -------------------
|
||||
static inline
|
||||
bool is_sentinel(const match_t * const match) {
|
||||
return (match->position == -1)
|
||||
&& (match->width == -1)
|
||||
;
|
||||
}
|
||||
|
||||
// -----------------
|
||||
// ### Char sets ###
|
||||
// -----------------
|
||||
@ -95,11 +110,12 @@ typedef struct {
|
||||
} offshoot_t;
|
||||
|
||||
enum {
|
||||
DO_CATCH = 0x00000001 << 0,
|
||||
IS_NEGATIVE = 0x00000001 << 1,
|
||||
IS_AT_THE_BEGINNING = 0x00000001 << 2,
|
||||
FORCE_START_OF_STRING = 0x00000001 << 3,
|
||||
INCREMENT_STATE = 0x00000001 << 4,
|
||||
DO_CATCH = 0x00000001 << 0,
|
||||
IS_NEGATIVE = 0x00000001 << 1,
|
||||
IS_AT_THE_BEGINNING = 0x00000001 << 2,
|
||||
FORCE_START_OF_STRING = 0x00000001 << 3,
|
||||
DO_FORBID_START_OF_STRING = 0x00000001 << 4,
|
||||
INCREMENT_STATE = 0x00000001 << 5,
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
@ -346,7 +362,7 @@ int escape_1_to_N(const char c,
|
||||
|
||||
static inline
|
||||
int escape_to_negative(const char c,
|
||||
compiler_state * const cs) {
|
||||
compiler_state * const cs) {
|
||||
switch (c) {
|
||||
case 'D': {
|
||||
const char digit_chars[] = JEGER_CHAR_SET_digits;
|
||||
@ -539,21 +555,38 @@ regex_t * regex_compile(const char * const pattern) {
|
||||
s += 1;
|
||||
} break;
|
||||
case '<': {
|
||||
unsigned true_inc = 1;
|
||||
if ((cs.flags & DO_CATCH)
|
||||
|| (cs.flags & IS_NEGATIVE)) {
|
||||
OFFSHOOT(0, +1, 1, 1, &cs, regex);
|
||||
OFFSHOOT(+1, +2, 1, 1, &cs, regex);
|
||||
++true_inc;
|
||||
} else {
|
||||
// XXX: make this legible
|
||||
if (cs.flags & IS_AT_THE_BEGINNING
|
||||
&& !(cs.flags & DO_CATCH)
|
||||
&& !(cs.flags & IS_NEGATIVE)
|
||||
&& whitelist[0] == '\0') {
|
||||
// ---
|
||||
cs.flags |= INCREMENT_STATE;
|
||||
cs.flags |= DO_FORBID_START_OF_STRING;
|
||||
strcat(whitelist, JEGER_CHAR_symbol_chars);
|
||||
// ---
|
||||
ABSOLUTE_OFFSHOOT( JEGER_SOS_STATE, JEGER_INIT_STATE+1, 0, 0, regex);
|
||||
ABSOLUTE_OFFSHOOT(JEGER_INIT_STATE, JEGER_INIT_STATE+2, 1, 0, regex);
|
||||
HOOK_ALL(0, whitelist, HALT_AND_CATCH_FIRE, &cs, regex);
|
||||
// ---
|
||||
++cs.state;
|
||||
cs.width = 0;
|
||||
HOOK_ALL(0, whitelist, +1, &cs, regex);
|
||||
cs.width = 1;
|
||||
OFFSHOOT(0, +1, 1, 0, &cs, regex);
|
||||
// ---
|
||||
} else {
|
||||
HOOK_ALL(0, whitelist, +1, &cs, regex);
|
||||
if ((cs.flags & DO_CATCH)
|
||||
|| (cs.flags & IS_NEGATIVE)) {
|
||||
OFFSHOOT(+1, +2, 1, 1, &cs, regex);
|
||||
} else {
|
||||
cs.flags |= INCREMENT_STATE;
|
||||
}
|
||||
OFFSHOOT(0, +1, 1, 0, &cs, regex);
|
||||
}
|
||||
cs.flags |= IS_NEGATIVE;
|
||||
if (cs.flags & IS_AT_THE_BEGINNING) {
|
||||
ABSOLUTE_OFFSHOOT(0, JEGER_INIT_STATE + true_inc, 0, 0, regex);
|
||||
}
|
||||
strcat(blacklist, JEGER_CHAR_symbol_chars);
|
||||
//OFFSHOOT(0 + (true_inc-1), +true_inc, 1, 0, &cs, regex);
|
||||
s += 1;
|
||||
} break;
|
||||
case '>': {
|
||||
@ -622,11 +655,13 @@ regex_t * regex_compile(const char * const pattern) {
|
||||
}
|
||||
|
||||
// Init state hookups
|
||||
ABSOLUTE_OFFSHOOT(0, JEGER_INIT_STATE, 0, 0, regex);
|
||||
if (!(cs.flags & DO_FORBID_START_OF_STRING)) {
|
||||
ABSOLUTE_OFFSHOOT(JEGER_SOS_STATE, JEGER_INIT_STATE, 0, 0, regex);
|
||||
}
|
||||
if (cs.flags & FORCE_START_OF_STRING) {
|
||||
ABSOLUTE_OFFSHOOT(1, HALT_AND_CATCH_FIRE, 0, 0, regex);
|
||||
ABSOLUTE_OFFSHOOT(JEGER_NSOS_STATE, HALT_AND_CATCH_FIRE, 0, 0, regex);
|
||||
} else {
|
||||
ABSOLUTE_OFFSHOOT(1, JEGER_INIT_STATE, 0, 0, regex);
|
||||
ABSOLUTE_OFFSHOOT(JEGER_NSOS_STATE, JEGER_INIT_STATE, 0, 0, regex);
|
||||
}
|
||||
|
||||
regex->accepting_state = cs.state;
|
||||
@ -702,9 +737,6 @@ bool regex_assert(const regex_t * const regex,
|
||||
was_found = true;
|
||||
const int r = regex_assert(regex, s + delta->pattern_width, delta->to, match);
|
||||
if(r){
|
||||
if (match->position == -1) {
|
||||
match->position = (s - string);
|
||||
}
|
||||
match->width += delta->match_width;
|
||||
return r;
|
||||
}
|
||||
@ -718,6 +750,9 @@ bool regex_assert(const regex_t * const regex,
|
||||
if (my_catch && (!my_catch->pattern_width || !last_stand)) {
|
||||
state = my_catch->to;
|
||||
s += my_catch->pattern_width;
|
||||
if (match->position < 1) {
|
||||
match->position = my_catch->match_width;
|
||||
}
|
||||
match->width += my_catch->match_width;
|
||||
goto LOOP;
|
||||
}
|
||||
@ -756,7 +791,11 @@ match_t * regex_match(const regex_t * const regex,
|
||||
};
|
||||
|
||||
if (regex_assert(regex, s, initial_state, match)) {
|
||||
if(match->position == -1){
|
||||
match->position = (s - string);
|
||||
}else{
|
||||
match->position += (s - string);
|
||||
}
|
||||
|
||||
vector_push(&matches, match);
|
||||
|
||||
@ -790,7 +829,7 @@ bool regex_search(const regex_t * const regex,
|
||||
const char * const string) {
|
||||
|
||||
match_t * m = regex_match(regex, string, true);
|
||||
const bool r = (m->position != -1);
|
||||
const bool r = !is_sentinel(m);
|
||||
free(m);
|
||||
|
||||
return r;
|
||||
|
@ -107,6 +107,14 @@ signed main() {
|
||||
TEST(R"del(.\<print\>.)del", "printf", false);
|
||||
TEST(R"del(.\<print\>.)del", "fprintf", false);
|
||||
|
||||
puts("");
|
||||
|
||||
TEST(R"del(\<while\>)del", "while", true);
|
||||
TEST(R"del(\<while\>)del", " while ", true);
|
||||
TEST(R"del(\<while\>)del", "9while ", true);
|
||||
TEST(R"del(\<while\>)del", "for while {", true);
|
||||
TEST(R"del(\<while\>)del", "for while{", true);
|
||||
|
||||
if (test_counter == passed_tests) {
|
||||
fputs("\033[32m", stdout);
|
||||
} else {
|
||||
@ -143,6 +151,15 @@ signed main() {
|
||||
TEST2( R"del(a+a)del", " aaa", match_t{ 3, strlen("aaa")});
|
||||
TEST2(R"del(a+\+)del", "aaa+", match_t{ 0, strlen("aaa+")});
|
||||
|
||||
puts("");
|
||||
puts("");
|
||||
|
||||
TEST2(R"del(\<while\>)del", "while", match_t{0, strlen("while")});
|
||||
TEST2(R"del(\<while\>)del", " while", match_t{1, strlen("while")});
|
||||
TEST2(R"del(\<while\>)del", "for while", match_t{4, strlen("while")});
|
||||
TEST2(R"del(\<while\>)del", "for9while", match_t{4, strlen("while")});
|
||||
TEST2(R"del(\<while\>)del", "for9while ", match_t{4, strlen("while")});
|
||||
|
||||
if(test_counter2 == passed_tests2) {
|
||||
fputs("\033[32m", stdout);
|
||||
} else {
|
||||
|
Loading…
Reference in New Issue
Block a user