Compare commits

...

40 Commits
dev ... master

Author SHA1 Message Date
e4106e1a69 . 2023-09-26 16:18:38 +02:00
99127233db backtracking fix; '\<test' == 'ttest' bug recreated 2023-09-26 16:04:45 +02:00
64fcc77fa1 running circles 2023-09-26 16:03:59 +02:00
0e99ecb8d8 yet more work on the docs 2023-09-26 14:42:47 +02:00
a1ac8cb319 typo fix 2023-09-26 14:21:05 +02:00
26597926c9 . 2023-09-26 14:20:20 +02:00
29e5c2107f last try 2023-09-26 14:19:31 +02:00
99f4d6e881 work, you bloody list 2023-09-26 14:16:35 +02:00
4ef25e924d docs 2023-09-26 14:14:59 +02:00
688ba249eb docs 2023-09-26 14:13:34 +02:00
b9910b224b docs 2023-09-26 14:11:54 +02:00
3bf23c07b5 follow type naming convention 2023-09-26 12:50:14 +02:00
b8943a596e clean target 2023-09-26 12:49:58 +02:00
0c2f494d6b . 2023-09-26 11:08:36 +02:00
b3e21c3e29 readability 2023-09-26 11:08:24 +02:00
7e81ea3f01 +todos 2023-09-26 11:07:50 +02:00
aa3dd43fc5 technical dept payed back 2023-09-25 22:35:17 +02:00
8f2fbcdb2b csope is awesome 2023-09-25 21:51:45 +02:00
01d82b8fca one more bug 2023-09-25 21:51:40 +02:00
13a150d682 fixed fauilty tests 2023-09-25 15:48:12 +02:00
c96567dc44 fixed spelling mistake 2023-09-25 15:47:18 +02:00
b645de7bdf . 2023-09-23 16:52:45 +02:00
1db2f91385 hl is cursed 2023-09-23 16:48:03 +02:00
a6e20cfe7c ignore benchmarking 2023-09-23 16:35:22 +02:00
61733e3254 progress at the price of techincal dept 2023-09-23 16:34:06 +02:00
e0a5c54b62 >most tests pass >hl completelly breaks 2023-09-23 15:55:36 +02:00
9ca9006a9a bugs go on and on, and they dont end 2023-09-23 14:52:40 +02:00
283e1e48e7 bak 2023-09-23 12:41:15 +02:00
8080b44613 ugh 2023-09-22 21:12:58 +02:00
3d56f3021b solved the \< situation; \> WIP 2023-09-22 20:37:20 +02:00
8d642f14ce print test number 2023-09-22 20:36:42 +02:00
d9a148d825 slightly improoved tests 2023-09-22 20:36:33 +02:00
900d7ecf7e documentation of some value 2023-09-22 20:36:14 +02:00
84a5d503dc todo 2023-09-22 20:36:05 +02:00
7b6ba63819 preprocessor bug noticed by Emil 2023-09-21 09:15:16 +02:00
aa4d45abaa fixup 2023-09-21 09:14:37 +02:00
bb4992b2f0 odd one out test 2023-09-21 08:38:32 +02:00
ae98518b57 some more documentation 2023-09-20 13:33:18 +02:00
0c0ee07caf syntax fix 2023-09-09 18:48:41 +02:00
a158a27697 benchmark/make is no longer a trap 2023-09-09 18:20:25 +02:00
11 changed files with 432 additions and 158 deletions

2
.gitignore vendored
View File

@ -1,4 +1,6 @@
regtest
*.out
*.so
*.data
.gdb_history
gdb.txt

View File

@ -1,4 +1,8 @@
CXXFLAGS := -fuse-ld=mold -ggdb -Wall -Wextra -Wpedantic
ifeq (${DEBUG}, 1)
CXXFLAGS += -DDEBUG
endif
OUT := regtest
main:
@ -8,3 +12,6 @@ run:
${OUT}
test: run
clean:
rm ${OUT}

View File

@ -4,18 +4,31 @@ A regex engine.
### Syntax
The aim was to follow Vim's regex syntax. Esoteric special characters such as "\zs" are not implemented, however the just is supported.
```C
match_t * regex_match(const regex_t * const regex, const char * const string, const bool start_of_string);
```
Returns a sentinel terminated array of `match_t` objects.
The sentinel object is defined as `(match_t){ .position = -1, .width = -1, };`.
```C
bool is_sentinel(const match_t * const match);
```
This is the function you must check whether a `match_t` is a sentinel or not.
I.e. make this the break condition while looping the results.
| Symbol | Meaning (TODO: fill in) |
| :----: | :---------------------: |
| . | |
| = | |
| + | |
| * | |
| ? | |
| \\< | |
| \\> | |
| ^ | |
| \t | |
| \n | |
| ? | One or zero of the previous token |
| = | Same as ? |
| * | Any number of the previous token |
| + | One or more of the previous token |
| \\< | Start of word |
| \\> | End of word |
| ^ | Start of string |
| \t | Tab |
| \n | New line |
| \b | |
| \i | |
| \I | |
@ -26,19 +39,19 @@ The aim was to follow Vim's regex syntax. Esoteric special characters such as "\
| \p | |
| \P | |
| \s | |
| \d | |
| \D | |
| \x | |
| \X | |
| \o | |
| \O | |
| \w | |
| \W | |
| \d | Digit char |
| \D | Not digit char |
| \x | Hex char|
| \X | Not hex char |
| \o | Octal char |
| \O | Not octal char |
| \w | Word char|
| \W | Not word char|
| \h | |
| \a | |
| \l | |
| \L | |
| \u | |
| \U | |
| [\<range\>] | |
| [\^\<range\>] | |
| \a | Ascii letter |
| \l | Lowercase ascii letter |
| \L | Not (lowercase ascii letter) |
| \u | Uppercase ascii letter |
| \U | Not (uppercase ascii letter) |
| [\<range\>] | Any of \<range\> |
| [\^\<range\>] | None of \<range\> |

View File

@ -3,11 +3,6 @@
CXXFLAGS := -O2
BUILD.cpp := ${CXX} ${CXXFLAGS} ${CPPFLAGS}
test: build
export LD_LIBRARY_PATH=$$(realpath .):$$LD_LIBRARY_PATH
perf stat -r 10000 ./gnu_racer.out
perf stat -r 10000 ./jeger_racer.out
build: jeger gnu
jeger: ../source/vector.c ../source/jeger.c jeger_racer.cpp
@ -18,5 +13,12 @@ jeger: ../source/vector.c ../source/jeger.c jeger_racer.cpp
gnu: gnu_racer.cpp
${BUILD.cpp} gnu_racer.cpp -o gnu_racer.out
test: build
export LD_LIBRARY_PATH=$$(realpath .):$$LD_LIBRARY_PATH
perf stat -r 10000 ./gnu_racer.out
perf stat -r 10000 ./jeger_racer.out
clean:
rm *.so *.out
.PHONY: test

View File

@ -7,15 +7,16 @@ class RegexPrinter:
# Regular shit
s += "accepting_state = " + str(self.val['accepting_state']) + ", str = " + str(self.val['str']) + ",\n"
# Delta
delta_t_ptr_ptr = gdb.lookup_type("delta_t").pointer().pointer()
delta_t_ptr_ptr_t = gdb.lookup_type("delta_t").pointer().pointer()
dt = self.val['delta_table']
s += "delta_table = {\n"
d0 = 0
for i in range(0, dt['element_count']):
s += "\t"
s += (
str(
(
dt['data'].cast(delta_t_ptr_ptr)
dt['data'].cast(delta_t_ptr_ptr_t)
+
i
).dereference().dereference()

View File

@ -1,11 +1,11 @@
# Abstraction
+---------------------+
| |
| |
| State register |
| |
| |
+---------------------+
+---------------------+
| |
| |
| State register |
| |
| |
+---------------------+
+---------------------------------+
@ -16,3 +16,110 @@
+---------------------------------+
| Fallback transition table |
+---------------------------------+
---
State transition table look up
+ success --> continue
+ fail --> look up fallback table
* success --> continue
* fail --> return
EOS ? --> look up fallback table
+ success --> is 0 width?
* success --> continue
* fail --> return
+ fail --> return
---
# Legend
| | Start | End |
| :--: | :---: | :-: |
| Line | SOS | EOS |
| Word | SOW | EOW |
##### HALT\_AND\_CATCH\_FIRE
H&C is a special state signalling that we have hit a dead end.
The reason why need it and we cant just instanly quick is backtracking.
---
##### [^example]
This is a negative range.
```
let myNegativeRange = {'e', 'x', 'a', 'm', 'p', 'l'}
```
None of the characters in `$myNegativeRange` must be accepted.
The way this is a compiled is that we first hook all chars in `$myNegativeRange` to H&C,
then define an OFFSHOOT of width 1.
Put differently:
if we read something illegal we abort this branch,
if what we read was not illegal, we deduct that it must have been legal and we continue.
Handling "negatives" this way allows us to be "alphabet agnostic" in a sense.
Many implementations will presume ASCII, with its fixed 7/8 bit width
and create look up tables.
Which is fast and cute, but this strategy becomes a giant memory hog
if we ever wanted to use it on, say UTF-8 (from 256 te/c (table entries per char) to 4'294'967'295 te/c).
#### .
This is the dot operator.
It matches any 1 char.
Similar how negative ranges are implemented,
it takes advantage of the fallback table.
It simply ignores the state transition table and rather unconditionally hooks itself to the next state.
#### ^
This is the carrot operator.
It matches the SOS.
For explanation purposes multilining (match '\n') is irrelevant.
That behaves just like a literal.
What is more interesting is how SOS is recognized.
Since `regex_assert()` is recursive the current state is continuesly passed along,
however at out first frame, it's not just always 0.
`regex_match()` decides depending on the current position of the string.
Basically we have the first 2 states (0, 1) reserved and always missing from the state transmission table.
+ 0 - SOS
+ 1 - !SOS
Normally both are _hooked_ to state 2,
and we pretend nothing has ever happened.
But when carrot operator is compiled, it sets a special compiler flag FORCE\_START\_OF\_STRING,
which forbids the hooking of state 1 to 2,
therefor when `regex_match()` calls from, say position 2,
it passes in 1 as the starting state,
no state transition table entry will be found since thats forbidden to begin with,
no jumps are found(!),
the machine checks whether the current state (1) is the accepting state (>=2)
and finally returns failiour.
#### \<
This is the SOW operator.
SOW must match:
```
^myword
[^\h]myword
```
Not only that, this combination is key,
either it has to be the SOS
or there has to be at least something which is not a symbol char.
With out the last condition "eexample" would match "\\\<exaple\\\>"
as the iteration of `regex_match()` reaches "example".
From a more practical perspective:
``` C
\<myword\>
// Must match
"myword"
" myword"
```

7
documentation/TODO.md Normal file
View File

@ -0,0 +1,7 @@
[ ] wchar\_t support
[ ] UTF-8 support
[ ] arbitrary memory support (this probably covers UTF-8 support)
[ ] documentation thats not shit
[ ] HOOK\_ALL / OFFSHOOT width parameter inconsistency
[ ] nesting is going real wild
[ ] states could be optimized by chopping of the init padding if not flagged required

View File

@ -1,4 +1,4 @@
#if __cplusplus
#ifdef __cplusplus
# pragma GCC diagnostic ignored "-Wc++20-extensions"
#endif
@ -8,8 +8,13 @@
#include <string.h>
#include <limits.h>
#include <stdlib.h>
#if DEBUG
# include <stdio.h>
#endif
#define JEGER_INIT_STATE 2
#define JEGER_SOS_STATE 0
#define JEGER_NSOS_STATE 1
#define JEGER_INIT_STATE 2
// ------------------
// ### Char tests ###
@ -40,6 +45,15 @@ bool is_magic(const char c) {
;
}
// -------------------
// ### Match tests ###
// -------------------
bool is_sentinel(const match_t * const match) {
return (match->position == -1)
&& (match->width == -1)
;
}
// -----------------
// ### Char sets ###
// -----------------
@ -52,13 +66,13 @@ bool is_magic(const char c) {
#define JEGER_CHAR_SET_lower_hex "abcdef"
#define JEGER_CHAR_SET_upper_hex "ABCDEF"
#define JEGER_CHAR_SET_oct_241_to_277 \
"\241\242\243\244\245" \
"\246\247\250\251\252" \
"\253\254\255\256\257" \
"\260\261\262\263\264" \
"\265\266\267\270\271" \
"\272\273\274\275\276" \
"\277"
"\241\242\243\244\245" \
"\246\247\250\251\252" \
"\253\254\255\256\257" \
"\260\261\262\263\264" \
"\265\266\267\270\271" \
"\272\273\274\275\276" \
"\277"
#define JEGER_CHAR_SET_oct_300_to_337 \
"\300\301\302\303\304" \
"\305\306\307\310\311" \
@ -68,13 +82,13 @@ bool is_magic(const char c) {
"\331\332\333\334\335" \
"\336\337"
#define JEGER_CHAR_SET_file_extra "/.-_+,#$%~="
#define JEGER_CHAR_SET_whitespace " \t\v\n"
#define JEGER_CHAR_SET_whitespace " " "\t\v\n"
static const char JEGER_CHAR_very_word_chars[] =
JEGER_CHAR_SET_underscore
JEGER_CHAR_SET_lower
JEGER_CHAR_SET_upper
;
static const char JEGER_CHAR_symbol_chars[] =
JEGER_CHAR_SET_underscore
JEGER_CHAR_SET_lower
JEGER_CHAR_SET_upper
;
// ----------------------
// ### Internal Types ###
@ -95,17 +109,19 @@ typedef struct {
} offshoot_t;
enum {
DO_CATCH = 0x00000001 << 0,
IS_NEGATIVE = 0x00000001 << 1,
IS_AT_THE_BEGINNING = 0x00000001 << 2,
FORCE_START_OF_STRING = 0x00000001 << 3,
INCREMENT_STATE = 0x00000001 << 4,
DO_CATCH = 0x00000001 << 0,
IS_NEGATIVE = 0x00000001 << 1,
IS_AT_THE_BEGINNING = 0x00000001 << 2,
FORCE_START_OF_STRING = 0x00000001 << 3,
DO_FORBID_START_OF_STRING = 0x00000001 << 4,
INCREMENT_STATE = 0x00000001 << 5,
};
typedef struct {
int flags;
int state;
int width;
int match_width;
char * whitelist;
char * blacklist;
} compiler_state;
@ -115,7 +131,11 @@ typedef struct {
// ----------------------------------
// ### Regex creation/destruction ###
// ----------------------------------
static const int HALT_AND_CATCH_FIRE = INT_MIN;
enum {
ASSERTION_FAILURE = 0,
ASSERTION_SUCCESS = 1,
HALT_AND_CATCH_FIRE = INT_MIN,
};
#define ASSERT_HALT(a) ((a == HALT_AND_CATCH_FIRE) ? HALT_AND_CATCH_FIRE : (cs->state + a))
@ -132,7 +152,7 @@ void HOOK_ALL(const int from,
.input = *s,
.to = ASSERT_HALT(to),
.pattern_width = cs->width,
.match_width = 1,
.match_width = cs->match_width,
};
vector_push(&regex->delta_table,
&delta);
@ -318,9 +338,9 @@ int escape_1_to_N(const char c,
return sizeof(word_chars)-1;
};
case 'h': {
// #global JEGER_CHAR_very_word_chars
strcpy(target_list, JEGER_CHAR_very_word_chars);
return sizeof(JEGER_CHAR_very_word_chars)-1;
// #global JEGER_CHAR_symbol_chars
strcpy(target_list, JEGER_CHAR_symbol_chars);
return sizeof(JEGER_CHAR_symbol_chars)-1;
};
case 'a': {
const char alpha_chars[] = JEGER_CHAR_SET_lower
@ -346,7 +366,7 @@ int escape_1_to_N(const char c,
static inline
int escape_to_negative(const char c,
compiler_state * const cs) {
compiler_state * const cs) {
switch (c) {
case 'D': {
const char digit_chars[] = JEGER_CHAR_SET_digits;
@ -454,7 +474,7 @@ void filter_blacklist(const char * whitelist,
const char * blacklist,
char * filtered) {
for (; *blacklist != '\0'; blacklist++) {
for(; *whitelist != '\0'; whitelist++) {
for (; *whitelist != '\0'; whitelist++) {
if (*blacklist == *whitelist) {
goto long_continue;
}
@ -474,6 +494,11 @@ regex_t * regex_compile(const char * const pattern) {
char whitelist[64];
char blacklist[64];
static const int REGEX_PREVERSABLE_FLAGS = IS_AT_THE_BEGINNING
| FORCE_START_OF_STRING
| DO_FORBID_START_OF_STRING
;
compiler_state cs = {
.flags = IS_AT_THE_BEGINNING,
.state = JEGER_INIT_STATE,
@ -484,10 +509,11 @@ regex_t * regex_compile(const char * const pattern) {
for (const char * s = pattern; *s != '\00';) {
assert(!is_quantifier(*s) && "Pattern starts with quantifier.");
// Reset the compiler
whitelist[0] = '\0';
blacklist[0] = '\0';
cs.flags &= (IS_AT_THE_BEGINNING | FORCE_START_OF_STRING);
cs.width = 1;
whitelist[0] = '\0';
blacklist[0] = '\0';
cs.flags &= REGEX_PREVERSABLE_FLAGS;
cs.width = 1;
cs.match_width = 1;
// Translate char
switch (*s) {
@ -503,7 +529,7 @@ regex_t * regex_compile(const char * const pattern) {
if (compile_escape(*s, &cs)) {
s += 1;
} else if (is_hologram_escape(*s)) {
;
s -= 1;
} else {
assert("Unknown escape.");
}
@ -532,20 +558,58 @@ regex_t * regex_compile(const char * const pattern) {
}
s += 1;
} break;
case '<': {
cs.flags |= IS_NEGATIVE | INCREMENT_STATE;
if (cs.flags & IS_AT_THE_BEGINNING) {
ABSOLUTE_OFFSHOOT(0, JEGER_INIT_STATE+1, 0, 0, regex);
case '\\': {
if(is_hologram_escape(*(s+1))) {
++s;
} else {
goto DEFAULT;
}
switch(*s){
case '<': {
// XXX: make this legible
if (cs.flags & IS_AT_THE_BEGINNING
&& !(cs.flags & DO_CATCH)
&& !(cs.flags & IS_NEGATIVE)
&& whitelist[0] == '\0') {
// ---
cs.flags |= INCREMENT_STATE;
cs.flags |= DO_FORBID_START_OF_STRING;
strcat(whitelist, JEGER_CHAR_symbol_chars);
// ---
ABSOLUTE_OFFSHOOT( JEGER_SOS_STATE, JEGER_INIT_STATE+1, 0, 0, regex);
ABSOLUTE_OFFSHOOT(JEGER_INIT_STATE, JEGER_INIT_STATE+2, 1, 0, regex);
HOOK_ALL(0, whitelist, HALT_AND_CATCH_FIRE, &cs, regex);
// ---
++cs.state;
cs.width = 0;
cs.match_width = 0;
HOOK_ALL(0, whitelist, +1, &cs, regex);
cs.width = 1;
OFFSHOOT(0, +1, 1, 0, &cs, regex);
// ---
} else {
HOOK_ALL(0, whitelist, +1, &cs, regex);
if ((cs.flags & DO_CATCH)
|| (cs.flags & IS_NEGATIVE)) {
OFFSHOOT(+1, +2, 1, 1, &cs, regex);
} else {
cs.flags |= INCREMENT_STATE;
}
OFFSHOOT(0, +1, 1, 0, &cs, regex);
}
cs.flags |= IS_NEGATIVE;
strcat(blacklist, JEGER_CHAR_symbol_chars);
s += 1;
} break;
case '>': {
HOOK_ALL(0, whitelist, +1, &cs, regex);
cs.flags |= IS_NEGATIVE | INCREMENT_STATE;
strcat(blacklist, JEGER_CHAR_symbol_chars);
OFFSHOOT(+1, +2, 0, 0, &cs, regex);
++cs.state;
s += 1;
} break;
}
strcat(blacklist, JEGER_CHAR_very_word_chars);
OFFSHOOT(0, 0, 1, 0, &cs, regex);
s += 1;
} break;
case '>': {
cs.flags |= IS_NEGATIVE | INCREMENT_STATE;
strcat(blacklist, JEGER_CHAR_very_word_chars);
OFFSHOOT(0, 1, 0, 0, &cs, regex);
s += 1;
} break;
// quantifiers
case '=':
@ -579,6 +643,7 @@ regex_t * regex_compile(const char * const pattern) {
}
s += 1;
} break;
DEFAULT:
default: { // Literal
cs.flags |= INCREMENT_STATE;
HOOK_ALL(0, whitelist, +1, &cs, regex);
@ -601,15 +666,18 @@ regex_t * regex_compile(const char * const pattern) {
++cs.state;
}
// Purge SOS flag
cs.flags &= (~IS_AT_THE_BEGINNING);
}
// Init state hookups
ABSOLUTE_OFFSHOOT(0, JEGER_INIT_STATE, 0, 0, regex);
if (!(cs.flags & DO_FORBID_START_OF_STRING)) {
ABSOLUTE_OFFSHOOT(JEGER_SOS_STATE, JEGER_INIT_STATE, 0, 0, regex);
}
if (cs.flags & FORCE_START_OF_STRING) {
ABSOLUTE_OFFSHOOT(1, HALT_AND_CATCH_FIRE, 0, 0, regex);
ABSOLUTE_OFFSHOOT(JEGER_NSOS_STATE, HALT_AND_CATCH_FIRE, 0, 0, regex);
} else {
ABSOLUTE_OFFSHOOT(1, JEGER_INIT_STATE, 0, 0, regex);
ABSOLUTE_OFFSHOOT(JEGER_NSOS_STATE, JEGER_INIT_STATE, 0, 0, regex);
}
regex->accepting_state = cs.state;
@ -643,12 +711,12 @@ const offshoot_t * catch_table_lookup(const regex_t * const regex,
}
static
bool regex_assert(const regex_t * const regex,
int regex_assert(const regex_t * const regex,
const char * const string,
int state,
match_t * const match) {
if (state == HALT_AND_CATCH_FIRE) {
return false;
return HALT_AND_CATCH_FIRE;
}
bool last_stand = false;
@ -682,14 +750,23 @@ bool regex_assert(const regex_t * const regex,
if ((delta->in == state)
&& (delta->input == *s)) {
bool do_reset = false;
was_found = true;
if (!match->_pos_ptr && delta->match_width) {
match->_pos_ptr = s;
do_reset = true;
}
const int r = regex_assert(regex, s + delta->pattern_width, delta->to, match);
if(r){
if (match->position == -1) {
match->position = (s - string);
}
if(r == ASSERTION_SUCCESS){
match->width += delta->match_width;
return r;
} else {
if (r == ASSERTION_FAILURE) {
was_found = false;
}
if (do_reset) {
match->_pos_ptr = NULL;
}
}
}
}
@ -707,7 +784,7 @@ bool regex_assert(const regex_t * const regex,
}
}
return (state == regex->accepting_state);
return ((state == regex->accepting_state) ? ASSERTION_SUCCESS : ASSERTION_FAILURE);
}
match_t * regex_match(const regex_t * const regex,
@ -729,23 +806,29 @@ match_t * regex_match(const regex_t * const regex,
// Find all matches
{
const char * s = string;
int initial_state;
do {
int initial_state;
initial_state = (int)(!(is_start_of_string && (s == string)));
*match = (match_t){
.position = -1,
.width = 0,
._pos_ptr = NULL,
.width = 0,
};
if (regex_assert(regex, s, initial_state, match)) {
match->position = (s - string);
if (regex_assert(regex, s, initial_state, match) == 1) {
//printf("true: %s\n", s);
if (match->_pos_ptr) {
match->position = (match->_pos_ptr - string);
} else {
match->position = (s - string);
}
vector_push(&matches, match);
s += ((match->width > 0) ? match->width : 1);
match = (match_t *)malloc(sizeof(match_t));
} else {
//printf("false: %s\n", s);
++s;
}
} while (*s != '\0');
@ -773,7 +856,7 @@ bool regex_search(const regex_t * const regex,
const char * const string) {
match_t * m = regex_match(regex, string, true);
const bool r = (m->position != -1);
const bool r = !is_sentinel(m);
free(m);
return r;

View File

@ -15,7 +15,10 @@ typedef struct {
} regex_t;
typedef struct {
int position;
union {
int position;
const char * _pos_ptr;
};
int width;
} match_t;
@ -25,5 +28,6 @@ extern bool regex_search(const regex_t * const regex, const char * const st
extern match_t * regex_match(const regex_t * const regex, const char * const string, const bool start_of_string);
extern bool is_magic(const char c);
extern bool is_sentinel(const match_t * const match);
#endif

View File

@ -5,26 +5,26 @@
#include "test.hpp"
signed main() {
TEST( R"del(abc)del", "abc", true);
TEST(R"del(efg1)del", "efg1", true);
TEST( R"del(abc)del", "abc", true);
TEST(R"del(efg1)del", "efg1", true);
TEST( R"del(nig)del", "ger", false);
TEST( R"del(ss)del", "sss", true);
TEST( R"del(ss)del", "sss", true);
TEST( R"del(sss)del", "ss", false);
puts("");
TEST( R"del(ab+c)del", "abc", true);
TEST(R"del(ef+g1)del", "effffg1", true);
TEST( R"del(ab+c)del", "abc", true);
TEST(R"del(ef+g1)del", "effffg1", true);
TEST(R"del(efg1+)del", "efg", false);
TEST(R"del(efg1+)del", "efg1", true);
TEST(R"del(efg1+)del", "efg11", true);
TEST(R"del(efg1+)del", "efg1", true);
TEST(R"del(efg1+)del", "efg11", true);
puts("");
TEST( R"del(a+a)del", "aaa", true);
TEST( R"del(a+a)del", "aa", true);
TEST( R"del(a+a)del", "aaa", true);
TEST( R"del(a+a)del", "aa", true);
TEST( R"del(a+a)del", "a", false);
TEST( R"del(a+a)del", "aaa", true);
TEST( R"del(a+a)del", "aaa", true);
TEST(R"del(a+\+)del", "aaa", false);
puts("");
@ -37,27 +37,27 @@ signed main() {
puts("");
TEST( R"del(ne.)del", "net", true);
TEST( R"del(ne.)del", "net", true);
TEST( R"del(ne.)del", "ne", false);
TEST(R"del(ne.+)del", "neoo", true);
TEST(R"del(ne.*)del", "neoo", true);
TEST(R"del(ne.*)del", "ne", true);
TEST(R"del(ne.+)del", "neoo", true);
TEST(R"del(ne.*)del", "neoo", true);
TEST(R"del(ne.*)del", "ne", true);
puts("");
TEST( R"del(ne.o)del", "neto", true);
TEST(R"del(ne.+o)del", "nettto", true);
TEST( R"del(ne.o)del", "neto", true);
TEST(R"del(ne.+o)del", "nettto", true);
TEST(R"del(ne.+o)del", "neo", false);
TEST(R"del(ne.+o)del", "neoo", true);
TEST(R"del(ne.*o)del", "neo", true);
TEST(R"del(ne.+o)del", "neoo", true);
TEST(R"del(ne.*o)del", "neo", true);
puts("");
TEST(R"del(ne.)del", "ne\t", true);
TEST(R"del(ne.)del", "ne\t", true);
TEST(R"del(ne\t)del", "ne", false);
TEST(R"del(ne\t)del", "ne\t", true);
TEST(R"del(ne\t)del", "ne\t", true);
TEST(R"del(ne\t)del", "net", false);
TEST(R"del(ne)del", "ne\t", true);
TEST(R"del(ne)del", "ne\t", true);
puts("");
@ -69,35 +69,67 @@ signed main() {
puts("");
TEST( R"del([A-Za-z]+)del", "HelloWorld", true);
TEST(R"del([A-Za-z]+g)del", "HelloWorldg", true);
TEST( R"del([A-Za-z]+)del", "HelloWorld", true);
TEST(R"del([A-Za-z]+g)del", "HelloWorldg", true);
TEST(R"del([A-Za-z]+g)del", "g", false);
TEST(R"del([A-Za-z]*g)del", "g", true);
TEST(R"del([A-Za-z]*g)del", "g", true);
TEST(R"del([A-Za-z]+1)del", "1", false);
puts("");
TEST( R"del([^0-9])del", "0", false);
TEST( R"del([^A-Za-z])del", "HelloWorld", false);
TEST(R"del([^A-Za-z]+g)del", "313g", true);
TEST( R"del([^0-9])del", "HelloWorld", true);
TEST( R"del([^a])del", "ba", true);
TEST(R"del([^A-Za-z]+g)del", "313g", true);
TEST( R"del([^0-9])del", "HelloWorld", true);
TEST( R"del([^a])del", "ba", true);
puts("");
TEST( R"del(^\^)del", "^^", true);
TEST( R"del(^\^)del", "^^", true);
TEST( R"del(^\^)del", " ^", false);
TEST(R"del(^ \^)del", " ^", true);
TEST( R"del(^a*)del", "asd", true);
TEST( R"del(^)del", "", true);
TEST(R"del(^ \^)del", " ^", true);
TEST( R"del(^a*)del", "asd", true);
TEST( R"del(^)del", "", true);
puts("");
TEST( R"del(\<test)del", "test", true);
TEST( R"del(test\>)del", "test", true);
TEST( R"del(\<test)del", "atest", false);
TEST( R"del(\<test)del", "test", true);
TEST( R"del(test\>)del", "test", true);
TEST( R"del(\<test)del", "ttest", false);
TEST( R"del(test\>)del", "testa", false);
TEST(R"del(\<test\>)del", "test", true);
TEST(R"del(\<test\>)del", "test", true);
puts("");
TEST( R"del(\<test)del", " test ", true);
TEST( R"del(test\>)del", " test ", true);
TEST( R"del(\<test)del", " ttest ", false);
TEST( R"del(test\>)del", "testa ", false);
TEST(R"del(\<test\>)del", " test ", true);
puts("");
TEST( R"del(\<int\>)del", "printf", false);
TEST(R"del(.\<print\>.)del", " print ", true);
TEST(R"del(.\<print\>.)del", "fprint", false);
TEST(R"del(.\<print\>.)del", "printf", false);
TEST(R"del(.\<print\>.)del", "fprintf", false);
puts("");
TEST(R"del(\<while\>)del", "while", true);
TEST(R"del(\<while\>)del", " while ", true);
TEST(R"del(\<while\>)del", "9while ", true);
TEST(R"del(\<while\>)del", "for while {", true);
TEST(R"del(\<while\>)del", "for while{", true);
puts("");
TEST(R"del(/\*[\d\D]*\*/)del", "908", false);
TEST(R"del(/\*[\d\D]*\*/)del", "/*908*/", true);
TEST(R"del(/\*[\d\D]*\*/)del", "/*asd*/", true);
TEST(R"del(/\*[\d\D]*\*/)del", "/* asd */", true);
TEST(R"del(/\*[\d\D]*\*/)del", "/* as* */", true);
if (test_counter == passed_tests) {
fputs("\033[32m", stdout);
@ -111,30 +143,48 @@ signed main() {
puts("");
puts("");
TEST2( R"del(abc)del", "abc", match_t{ 0, strlen("abc")});
TEST2( R"del(abc)del", "abc", match_t{ 0, strlen("abc")});
TEST2(R"del(efg1)del", "efg1", match_t{ 0, strlen("efg1")});
TEST2( R"del(nig)del", "ger", match_t{-1, -1});
TEST2( R"del(ss)del", "sss", match_t{ 0, 2});
TEST2( R"del(sss)del", "ss", match_t{-1, -1});
TEST2( R"del(nig)del", "ger", match_t{-1, -1});
TEST2( R"del(ss)del", "sss", match_t{ 0, 2});
TEST2( R"del(sss)del", "ss", match_t{-1, -1});
puts("");
puts("");
TEST2( R"del(ab+c)del", "abc", match_t{ 0, strlen("abc")});
TEST2( R"del(ab+c)del", "abc", match_t{ 0, strlen("abc")});
TEST2(R"del(ef+g1)del", "effffg1", match_t{ 0, strlen("effffg1")});
TEST2(R"del(efg1+)del", "efg", match_t{-1, -1});
TEST2(R"del(efg1+)del", "efg1", match_t{ 0, strlen("efg1")});
TEST2(R"del(efg1+)del", "efg11", match_t{ 0, strlen("efg11")});
TEST2(R"del(efg1+)del", "efg", match_t{-1, -1});
TEST2(R"del(efg1+)del", "efg1", match_t{ 0, strlen("efg1")});
TEST2(R"del(efg1+)del", "efg11", match_t{ 0, strlen("efg11")});
puts("");
puts("");
TEST2( R"del(a+a)del", " aaa", match_t{ 1, strlen("aaa")});
TEST2( R"del(a+a)del", " aa", match_t{ 1, strlen("aa")});
TEST2( R"del(a+a)del", " a", match_t{-1, -1});
TEST2( R"del(a+a)del", " aaa", match_t{ 3, strlen("aaa")});
TEST2( R"del(a+a)del", " aaa", match_t{ 1, strlen("aaa")});
TEST2( R"del(a+a)del", " aa", match_t{ 1, strlen("aa")});
TEST2( R"del(a+a)del", " a", match_t{-1, -1});
TEST2( R"del(a+a)del", " aaa", match_t{ 3, strlen("aaa")});
TEST2(R"del(a+\+)del", "aaa+", match_t{ 0, strlen("aaa+")});
puts("");
puts("");
TEST2(R"del(\<while\>)del", "while", match_t{0, strlen("while")});
TEST2(R"del(\<while\>)del", " while", match_t{1, strlen("while")});
TEST2(R"del(\<while\>)del", "for while", match_t{4, strlen("while")});
TEST2(R"del(\<while\>)del", "for9while", match_t{4, strlen("while")});
TEST2(R"del(\<while\>)del", "for9while ", match_t{4, strlen("while")});
puts("");
puts("");
TEST2(R"del(\+)del", "akjh ab+ snabd", match_t{ strlen("akjh ab+")-1, 1});
TEST2(R"del(\*)del", "a*jh abn snabd", match_t{ strlen("a*")-1, 1});
TEST2(R"del(\=)del", "ak=h abn snabd", match_t{ strlen("ak=")-1, 1});
TEST2(R"del(\?)del", "akjh abn s?abd", match_t{ strlen("akjh abn s?")-1, 1});
TEST2(R"del(\+)del", "akjh abn snab+", match_t{strlen("akjh abn snab+")-1, 1});
if(test_counter2 == passed_tests2) {
fputs("\033[32m", stdout);
} else {

View File

@ -12,7 +12,7 @@ static int passed_tests2 = 0;
static
void asprint_match_t( char * * destination,
const match_t * const match){
const match_t * const match) {
if (match) {
asprintf(destination, "%p {%d, %d}", (void *)match, match->position, match->width);
} else {
@ -21,11 +21,11 @@ void asprint_match_t( char * * destination,
}
static
void print_leader(const bool passed){
void print_leader(const bool passed, const int n) {
if (passed) {
printf("\033[32;1mSuccess\033[0;1m. - \033[0m");
printf("\033[32;1mSuccess\033[0m. %02d\033[1m - \033[0m", n);
} else {
printf("\033[31;1mFailiour\033[0;1m. - \033[0m");
printf("\033[31;1mFailure\033[0m. %02d\033[1m - \033[0m", n);
}
}
@ -39,9 +39,10 @@ void do_flush(void) {
static
void TEST(const char * const what,
const char * const on,
const bool expect){
const bool expect) {
do_flush();
++test_counter;
regex_t * r = regex_compile(what);
bool result = regex_search(r, on);
@ -51,7 +52,7 @@ void TEST(const char * const what,
expect ? ++positives : ++negatives;
print_leader(passed);
print_leader(passed, test_counter);
char * quoted_what, * quoted_on;
asprintf(&quoted_what, "'%s'", what);
@ -63,8 +64,6 @@ void TEST(const char * const what,
++passed_tests;
expect ? ++positive_successes : ++negative_successes;
}
++test_counter;
}
static
@ -73,6 +72,7 @@ void TEST2(const char * const what,
const match_t expect){
do_flush();
++test_counter2;
regex_t * r = regex_compile(what);
match_t * result = regex_match(r, on, true);
@ -80,7 +80,7 @@ void TEST2(const char * const what,
&& result->width == expect.width
);
print_leader(passed);
print_leader(passed, test_counter2);
char * quoted_what, * quoted_on;
asprintf(&quoted_what, "'%s'", what);
@ -98,6 +98,4 @@ void TEST2(const char * const what,
if (passed) {
++passed_tests2;
}
++test_counter2;
}