'=' support; ignore start end atoms for now

This commit is contained in:
anon 2023-08-24 19:17:14 +02:00
parent 823eb8b113
commit 746767571e
2 changed files with 53 additions and 29 deletions

View File

@ -130,12 +130,13 @@ int new_char_tokens(const char * characters,
token_t * new_keyword_token(const char * const word, token_t * new_keyword_token(const char * const word,
hl_group_t * const g) { hl_group_t * const g) {
size_t word_length = strlen(word); char * new_word = strdup(word);
char * new_word = (char*)malloc(word_length + 4 + 1); //size_t word_length = strlen(word);
//char * new_word = (char*)malloc(word_length + 4 + 1);
memcpy(new_word, "\\<", 2); //memcpy(new_word, "\\<", 2);
memcpy(new_word + 2, word, word_length); //memcpy(new_word + 2, word, word_length);
strcpy(new_word + 2 + word_length, "\\>"); //strcpy(new_word + 2 + word_length, "\\>");
token_t * mt = (token_t*)malloc(sizeof(token_t)); token_t * mt = (token_t*)malloc(sizeof(token_t));

View File

@ -11,7 +11,7 @@
// ### Char tests ### // ### Char tests ###
// ------------------ // ------------------
static bool is_quantifier(const char c) { static bool is_quantifier(const char c) {
for (const char * s = "+*?"; *s != '\00'; s++) { for (const char * s = "+*?="; *s != '\00'; s++) {
if (*s == c) { if (*s == c) {
return true; return true;
} }
@ -40,6 +40,7 @@ typedef struct {
int in; int in;
char input; char input;
int to; int to;
int width;
} delta_t; } delta_t;
typedef struct { typedef struct {
@ -75,6 +76,9 @@ static int escape_1_to_1(const char c, char * whitelist) {
case '.': { case '.': {
strcat(whitelist, "."); strcat(whitelist, ".");
} return 1; } return 1;
case '=': {
strcat(whitelist, "=");
} return 1;
case '?': { case '?': {
strcat(whitelist, "?"); strcat(whitelist, "?");
} return 1; } return 1;
@ -248,12 +252,26 @@ static int escape_1_to_N(const char c, char * whitelist) {
return 0; return 0;
} }
//static int compile_hologram(char * hologram, char * whitelist) {
// if (hologram[0] == '\\') {
// switch (hologram[1]) {
// case '<': {
// const char very_word_chars[] = "abcdefghijklmnopqrstuwxyz"
// "ABCDEFGHIJKLMNOPQRSTUWXYZ"
// "_";
// strcat(whitelist, very_word_chars);
// is_negative = true;
// HOOK_ALL(0, whitelist, 0)
// } break;
// }
// }
//}
static int compile_range(const char * const range, static int compile_range(const char * const range,
char * whitelist, char * whitelist,
bool * is_negative) { bool * is_negative) {
assert(range[0] == '[' && "Not a range."); assert((range[0] == '[') && "Not a range.");
int r = 0;
const char * s; const char * s;
if (range[1] == '^') { if (range[1] == '^') {
*is_negative = true; *is_negative = true;
@ -262,21 +280,20 @@ static int compile_range(const char * const range,
s = range + 1; s = range + 1;
} }
for (; *s != ']'; s++) { for (; *s != ']'; s++) {
assert(*s != '\00' && "Unclosed range."); assert((*s != '\0') && "Unclosed range.");
char c = *s; char c = *s;
if (escape_1_to_1(c, whitelist) if (escape_1_to_1(c, whitelist)
|| escape_1_to_N(c, whitelist)) { || escape_1_to_N(c, whitelist)) {
; ;
} else if (*(s+1) == '-') { } else if (*(s+1) == '-') {
char end = *(s+2); char end = *(s+2);
assert(c < end && "Endless range."); assert((c < end) && "Endless range.");
for (char cc = c; cc < end+1; cc++) { for (char cc = c; cc < end+1; cc++) {
strncat(whitelist, &cc, 1); strncat(whitelist, &cc, 1);
strncat(whitelist, "\00", 1); strncat(whitelist, "\0", 1);
} }
s += 2; s += 2;
} else { } else {
++r;
strncat(whitelist, &c, 1); strncat(whitelist, &c, 1);
strncat(whitelist, "\00", 1); strncat(whitelist, "\00", 1);
} }
@ -288,7 +305,7 @@ static int compile_range(const char * const range,
static bool catch_(const regex_t * const regex, static bool catch_(const regex_t * const regex,
int * const state) { int * const state) {
for (int i = 0; i < regex->catch_table.element_size; i++){ for (size_t i = 0; i < regex->catch_table.element_size; i++){
const offshoot_t * const offshoot = (vector_get(&regex->catch_table, i)); const offshoot_t * const offshoot = (vector_get(&regex->catch_table, i));
if (offshoot->in == *state) { if (offshoot->in == *state) {
*state = offshoot->to; *state = offshoot->to;
@ -302,12 +319,12 @@ static bool catch_(const regex_t * const regex,
#define HOOK_ALL(from, str, to) do { \ #define HOOK_ALL(from, str, to) do { \
int hook_to = (is_negative) ? -1 : state + to; \ int hook_to = (is_negative) ? -1 : state + to; \
for (char * s = str; *s != '\00'; s++) { \ for (char * s = str; *s != '\0'; s++) { \
vector_push(&regex->delta_table, \ vector_push(&regex->delta_table, \
&(delta_t){state + from, *s, hook_to} \ &(delta_t){state + from, *s, hook_to, width} \
); \ ); \
} \ } \
if (do_catch) { \ if (do_catch || is_negative) { \
vector_push(&regex->catch_table, \ vector_push(&regex->catch_table, \
&(offshoot_t){state + from, hook_to} \ &(offshoot_t){state + from, hook_to} \
); \ ); \
@ -321,25 +338,30 @@ static bool catch_(const regex_t * const regex,
regex_t * regex_compile(const char * const pattern) { regex_t * regex_compile(const char * const pattern) {
regex_t * regex = (regex_t *)malloc(sizeof(regex_t)); regex_t * regex = (regex_t *)malloc(sizeof(regex_t));
regex->str = strdup(pattern); regex->str = strdup(pattern);
vector_init(&regex->delta_table, sizeof(delta_t), 32); vector_init(&regex->delta_table, sizeof(delta_t), 0);
vector_init(&regex->catch_table, sizeof(offshoot_t), 16); vector_init(&regex->catch_table, sizeof(offshoot_t), 0);
int state = 0; int state = 0;
char whitelist[64]; char whitelist[64];
bool do_catch; bool do_catch;
bool is_negative; bool is_negative;
int width;
for (const char * s = pattern; *s != '\00';) { for (const char * s = pattern; *s != '\00';) {
// Get token // Get token
assert(!is_quantifier(*pattern) && "Pattern starts with quantifier."); assert(!is_quantifier(*pattern) && "Pattern starts with quantifier.");
whitelist[0] = '\00'; whitelist[0] = '\00';
do_catch = false; do_catch = false;
width = 1;
switch (*s) { switch (*s) {
case '.': { case '.': {
do_catch = true; do_catch = true;
} break; } break;
case '\\': { case '\\': {
//if (compile_hologram(*s, whitelist)) {
// break;
//}
EAT(1); EAT(1);
if(escape_1_to_1(*s, whitelist) if(escape_1_to_1(*s, whitelist)
|| escape_1_to_N(*s, whitelist)){ || escape_1_to_N(*s, whitelist)){
@ -361,6 +383,7 @@ regex_t * regex_compile(const char * const pattern) {
// Get quantifier // Get quantifier
switch (*s) { switch (*s) {
case '=':
case '?': { case '?': {
HOOK_ALL(0, whitelist, +1); HOOK_ALL(0, whitelist, +1);
EAT(1); EAT(1);
@ -406,11 +429,11 @@ static bool regex_assert(const regex_t * const regex,
for (const char * s = string; *s != '\00'; s++) { for (const char * s = string; *s != '\00'; s++) {
// delta // delta
for (int i = 0; i < regex->delta_table.element_count; i++) { for (size_t i = 0; i < regex->delta_table.element_count; i++) {
const delta_t * const delta = (delta_t *)(vector_get(&regex->delta_table, i)); const delta_t * const delta = (delta_t *)(vector_get(&regex->delta_table, i));
if ((delta->in == state) if ((delta->in == state)
&& (delta->input == *s)) { && (delta->input == *s)) {
if(regex_assert(regex, s+1, delta->to)){ if(regex_assert(regex, s + delta->width, delta->to)){
return true; return true;
} }
} }