Compare commits
6 Commits
aa3dd43fc5
...
b9910b224b
Author | SHA1 | Date | |
---|---|---|---|
b9910b224b | |||
3bf23c07b5 | |||
b8943a596e | |||
0c2f494d6b | |||
b3e21c3e29 | |||
7e81ea3f01 |
1
.gitignore
vendored
1
.gitignore
vendored
@ -3,3 +3,4 @@ regtest
|
||||
*.so
|
||||
*.data
|
||||
.gdb_history
|
||||
gdb.txt
|
||||
|
@ -7,15 +7,16 @@ class RegexPrinter:
|
||||
# Regular shit
|
||||
s += "accepting_state = " + str(self.val['accepting_state']) + ", str = " + str(self.val['str']) + ",\n"
|
||||
# Delta
|
||||
delta_t_ptr_ptr = gdb.lookup_type("delta_t").pointer().pointer()
|
||||
delta_t_ptr_ptr_t = gdb.lookup_type("delta_t").pointer().pointer()
|
||||
dt = self.val['delta_table']
|
||||
s += "delta_table = {\n"
|
||||
d0 = 0
|
||||
for i in range(0, dt['element_count']):
|
||||
s += "\t"
|
||||
s += (
|
||||
str(
|
||||
(
|
||||
dt['data'].cast(delta_t_ptr_ptr)
|
||||
dt['data'].cast(delta_t_ptr_ptr_t)
|
||||
+
|
||||
i
|
||||
).dereference().dereference()
|
||||
|
@ -29,6 +29,23 @@ State transition table look up
|
||||
+ fail --> return
|
||||
+ fail --> return
|
||||
---
|
||||
# Legend
|
||||
|
||||
| | Start | End |
|
||||
| :--: | :---: | :-: |
|
||||
| Line | SOS | EOS |
|
||||
| Word | WOS | EOW |
|
||||
|
||||
#### SOS
|
||||
Start Of String
|
||||
|
||||
#### EOS
|
||||
Start Of String
|
||||
|
||||
#### WOS
|
||||
Start Of String
|
||||
|
||||
---
|
||||
##### HALT\_AND\_CATCH\_FIRE
|
||||
H&C is a special state signalling that we have hit a dead end.
|
||||
The reason why need it and we cant just instanly quick is backtracking.
|
||||
@ -89,7 +106,7 @@ and finally returns failiour.
|
||||
|
||||
|
||||
#### \<
|
||||
This is the SOW (start of word) operator.
|
||||
This is the SOW operator.
|
||||
SOW must match:
|
||||
```
|
||||
^myword
|
||||
|
@ -3,3 +3,5 @@
|
||||
[ ] arbitrary memory support (this probably covers UTF-8 support)
|
||||
[ ] documentation thats not shit
|
||||
[ ] HOOK\_ALL / OFFSHOOT width parameter inconsistency
|
||||
[ ] nesting is going real wild
|
||||
[ ] states could be optimized by chopping of the init padding if not flagged required
|
||||
|
@ -490,6 +490,11 @@ regex_t * regex_compile(const char * const pattern) {
|
||||
char whitelist[64];
|
||||
char blacklist[64];
|
||||
|
||||
static const int REGEX_PREVERSABLE_FLAGS = IS_AT_THE_BEGINNING
|
||||
| FORCE_START_OF_STRING
|
||||
| DO_FORBID_START_OF_STRING
|
||||
;
|
||||
|
||||
compiler_state cs = {
|
||||
.flags = IS_AT_THE_BEGINNING,
|
||||
.state = JEGER_INIT_STATE,
|
||||
@ -500,11 +505,11 @@ regex_t * regex_compile(const char * const pattern) {
|
||||
for (const char * s = pattern; *s != '\00';) {
|
||||
assert(!is_quantifier(*s) && "Pattern starts with quantifier.");
|
||||
// Reset the compiler
|
||||
whitelist[0] = '\0';
|
||||
blacklist[0] = '\0';
|
||||
cs.flags &= (IS_AT_THE_BEGINNING | FORCE_START_OF_STRING);
|
||||
cs.width = 1;
|
||||
cs.match_width = 1;
|
||||
whitelist[0] = '\0';
|
||||
blacklist[0] = '\0';
|
||||
cs.flags &= REGEX_PREVERSABLE_FLAGS;
|
||||
cs.width = 1;
|
||||
cs.match_width = 1;
|
||||
|
||||
// Translate char
|
||||
switch (*s) {
|
||||
@ -657,6 +662,7 @@ regex_t * regex_compile(const char * const pattern) {
|
||||
++cs.state;
|
||||
}
|
||||
|
||||
// Purge SOS flag
|
||||
cs.flags &= (~IS_AT_THE_BEGINNING);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user