Compare commits

...

6 Commits

Author SHA1 Message Date
b9910b224b docs 2023-09-26 14:11:54 +02:00
3bf23c07b5 follow type naming convention 2023-09-26 12:50:14 +02:00
b8943a596e clean target 2023-09-26 12:49:58 +02:00
0c2f494d6b . 2023-09-26 11:08:36 +02:00
b3e21c3e29 readability 2023-09-26 11:08:24 +02:00
7e81ea3f01 +todos 2023-09-26 11:07:50 +02:00
6 changed files with 38 additions and 8 deletions

1
.gitignore vendored
View File

@ -3,3 +3,4 @@ regtest
*.so
*.data
.gdb_history
gdb.txt

View File

@ -12,3 +12,6 @@ run:
${OUT}
test: run
clean:
rm ${OUT}

View File

@ -7,15 +7,16 @@ class RegexPrinter:
# Regular shit
s += "accepting_state = " + str(self.val['accepting_state']) + ", str = " + str(self.val['str']) + ",\n"
# Delta
delta_t_ptr_ptr = gdb.lookup_type("delta_t").pointer().pointer()
delta_t_ptr_ptr_t = gdb.lookup_type("delta_t").pointer().pointer()
dt = self.val['delta_table']
s += "delta_table = {\n"
d0 = 0
for i in range(0, dt['element_count']):
s += "\t"
s += (
str(
(
dt['data'].cast(delta_t_ptr_ptr)
dt['data'].cast(delta_t_ptr_ptr_t)
+
i
).dereference().dereference()

View File

@ -29,6 +29,23 @@ State transition table look up
+ fail --> return
+ fail --> return
---
# Legend
| | Start | End |
| :--: | :---: | :-: |
| Line | SOS | EOS |
| Word | WOS | EOW |
#### SOS
Start Of String
#### EOS
Start Of String
#### WOS
Start Of String
---
##### HALT\_AND\_CATCH\_FIRE
H&C is a special state signalling that we have hit a dead end.
The reason why need it and we cant just instanly quick is backtracking.
@ -89,7 +106,7 @@ and finally returns failiour.
#### \<
This is the SOW (start of word) operator.
This is the SOW operator.
SOW must match:
```
^myword

View File

@ -3,3 +3,5 @@
[ ] arbitrary memory support (this probably covers UTF-8 support)
[ ] documentation thats not shit
[ ] HOOK\_ALL / OFFSHOOT width parameter inconsistency
[ ] nesting is going real wild
[ ] states could be optimized by chopping of the init padding if not flagged required

View File

@ -490,6 +490,11 @@ regex_t * regex_compile(const char * const pattern) {
char whitelist[64];
char blacklist[64];
static const int REGEX_PREVERSABLE_FLAGS = IS_AT_THE_BEGINNING
| FORCE_START_OF_STRING
| DO_FORBID_START_OF_STRING
;
compiler_state cs = {
.flags = IS_AT_THE_BEGINNING,
.state = JEGER_INIT_STATE,
@ -500,11 +505,11 @@ regex_t * regex_compile(const char * const pattern) {
for (const char * s = pattern; *s != '\00';) {
assert(!is_quantifier(*s) && "Pattern starts with quantifier.");
// Reset the compiler
whitelist[0] = '\0';
blacklist[0] = '\0';
cs.flags &= (IS_AT_THE_BEGINNING | FORCE_START_OF_STRING);
cs.width = 1;
cs.match_width = 1;
whitelist[0] = '\0';
blacklist[0] = '\0';
cs.flags &= REGEX_PREVERSABLE_FLAGS;
cs.width = 1;
cs.match_width = 1;
// Translate char
switch (*s) {
@ -657,6 +662,7 @@ regex_t * regex_compile(const char * const pattern) {
++cs.state;
}
// Purge SOS flag
cs.flags &= (~IS_AT_THE_BEGINNING);
}