'=' support; ignore start end atoms for now

This commit is contained in:
anon 2023-08-24 19:17:14 +02:00
parent 823eb8b113
commit 746767571e
2 changed files with 53 additions and 29 deletions

View File

@ -130,12 +130,13 @@ int new_char_tokens(const char * characters,
token_t * new_keyword_token(const char * const word,
hl_group_t * const g) {
size_t word_length = strlen(word);
char * new_word = (char*)malloc(word_length + 4 + 1);
char * new_word = strdup(word);
//size_t word_length = strlen(word);
//char * new_word = (char*)malloc(word_length + 4 + 1);
memcpy(new_word, "\\<", 2);
memcpy(new_word + 2, word, word_length);
strcpy(new_word + 2 + word_length, "\\>");
//memcpy(new_word, "\\<", 2);
//memcpy(new_word + 2, word, word_length);
//strcpy(new_word + 2 + word_length, "\\>");
token_t * mt = (token_t*)malloc(sizeof(token_t));

View File

@ -11,7 +11,7 @@
// ### Char tests ###
// ------------------
static bool is_quantifier(const char c) {
for (const char * s = "+*?"; *s != '\00'; s++) {
for (const char * s = "+*?="; *s != '\00'; s++) {
if (*s == c) {
return true;
}
@ -40,6 +40,7 @@ typedef struct {
int in;
char input;
int to;
int width;
} delta_t;
typedef struct {
@ -75,6 +76,9 @@ static int escape_1_to_1(const char c, char * whitelist) {
case '.': {
strcat(whitelist, ".");
} return 1;
case '=': {
strcat(whitelist, "=");
} return 1;
case '?': {
strcat(whitelist, "?");
} return 1;
@ -248,12 +252,26 @@ static int escape_1_to_N(const char c, char * whitelist) {
return 0;
}
//static int compile_hologram(char * hologram, char * whitelist) {
// if (hologram[0] == '\\') {
// switch (hologram[1]) {
// case '<': {
// const char very_word_chars[] = "abcdefghijklmnopqrstuwxyz"
// "ABCDEFGHIJKLMNOPQRSTUWXYZ"
// "_";
// strcat(whitelist, very_word_chars);
// is_negative = true;
// HOOK_ALL(0, whitelist, 0)
// } break;
// }
// }
//}
static int compile_range(const char * const range,
char * whitelist,
bool * is_negative) {
assert(range[0] == '[' && "Not a range.");
assert((range[0] == '[') && "Not a range.");
int r = 0;
const char * s;
if (range[1] == '^') {
*is_negative = true;
@ -262,21 +280,20 @@ static int compile_range(const char * const range,
s = range + 1;
}
for (; *s != ']'; s++) {
assert(*s != '\00' && "Unclosed range.");
assert((*s != '\0') && "Unclosed range.");
char c = *s;
if (escape_1_to_1(c, whitelist)
|| escape_1_to_N(c, whitelist)) {
;
} else if (*(s+1) == '-') {
char end = *(s+2);
assert(c < end && "Endless range.");
assert((c < end) && "Endless range.");
for (char cc = c; cc < end+1; cc++) {
strncat(whitelist, &cc, 1);
strncat(whitelist, "\00", 1);
strncat(whitelist, "\0", 1);
}
s += 2;
} else {
++r;
strncat(whitelist, &c, 1);
strncat(whitelist, "\00", 1);
}
@ -288,7 +305,7 @@ static int compile_range(const char * const range,
static bool catch_(const regex_t * const regex,
int * const state) {
for (int i = 0; i < regex->catch_table.element_size; i++){
for (size_t i = 0; i < regex->catch_table.element_size; i++){
const offshoot_t * const offshoot = (vector_get(&regex->catch_table, i));
if (offshoot->in == *state) {
*state = offshoot->to;
@ -300,18 +317,18 @@ static bool catch_(const regex_t * const regex,
#define HALT_AND_CATCH_FIRE -1
#define HOOK_ALL(from, str, to) do { \
int hook_to = (is_negative) ? -1 : state + to; \
for (char * s = str; *s != '\00'; s++) { \
vector_push(&regex->delta_table, \
&(delta_t){state + from, *s, hook_to} \
); \
} \
if (do_catch) { \
vector_push(&regex->catch_table, \
&(offshoot_t){state + from, hook_to} \
); \
} \
#define HOOK_ALL(from, str, to) do { \
int hook_to = (is_negative) ? -1 : state + to; \
for (char * s = str; *s != '\0'; s++) { \
vector_push(&regex->delta_table, \
&(delta_t){state + from, *s, hook_to, width} \
); \
} \
if (do_catch || is_negative) { \
vector_push(&regex->catch_table, \
&(offshoot_t){state + from, hook_to} \
); \
} \
} while (0)
#define EAT(n) do { \
@ -321,25 +338,30 @@ static bool catch_(const regex_t * const regex,
regex_t * regex_compile(const char * const pattern) {
regex_t * regex = (regex_t *)malloc(sizeof(regex_t));
regex->str = strdup(pattern);
vector_init(&regex->delta_table, sizeof(delta_t), 32);
vector_init(&regex->catch_table, sizeof(offshoot_t), 16);
vector_init(&regex->delta_table, sizeof(delta_t), 0);
vector_init(&regex->catch_table, sizeof(offshoot_t), 0);
int state = 0;
char whitelist[64];
bool do_catch;
bool is_negative;
int width;
for (const char * s = pattern; *s != '\00';) {
// Get token
assert(!is_quantifier(*pattern) && "Pattern starts with quantifier.");
whitelist[0] = '\00';
do_catch = false;
width = 1;
switch (*s) {
case '.': {
do_catch = true;
} break;
case '\\': {
//if (compile_hologram(*s, whitelist)) {
// break;
//}
EAT(1);
if(escape_1_to_1(*s, whitelist)
|| escape_1_to_N(*s, whitelist)){
@ -361,6 +383,7 @@ regex_t * regex_compile(const char * const pattern) {
// Get quantifier
switch (*s) {
case '=':
case '?': {
HOOK_ALL(0, whitelist, +1);
EAT(1);
@ -406,11 +429,11 @@ static bool regex_assert(const regex_t * const regex,
for (const char * s = string; *s != '\00'; s++) {
// delta
for (int i = 0; i < regex->delta_table.element_count; i++) {
for (size_t i = 0; i < regex->delta_table.element_count; i++) {
const delta_t * const delta = (delta_t *)(vector_get(&regex->delta_table, i));
if ((delta->in == state)
&& (delta->input == *s)) {
if(regex_assert(regex, s+1, delta->to)){
if(regex_assert(regex, s + delta->width, delta->to)){
return true;
}
}