'=' support; ignore start end atoms for now
This commit is contained in:
parent
823eb8b113
commit
746767571e
11
source/hl.h
11
source/hl.h
@ -130,12 +130,13 @@ int new_char_tokens(const char * characters,
|
||||
|
||||
token_t * new_keyword_token(const char * const word,
|
||||
hl_group_t * const g) {
|
||||
size_t word_length = strlen(word);
|
||||
char * new_word = (char*)malloc(word_length + 4 + 1);
|
||||
char * new_word = strdup(word);
|
||||
//size_t word_length = strlen(word);
|
||||
//char * new_word = (char*)malloc(word_length + 4 + 1);
|
||||
|
||||
memcpy(new_word, "\\<", 2);
|
||||
memcpy(new_word + 2, word, word_length);
|
||||
strcpy(new_word + 2 + word_length, "\\>");
|
||||
//memcpy(new_word, "\\<", 2);
|
||||
//memcpy(new_word + 2, word, word_length);
|
||||
//strcpy(new_word + 2 + word_length, "\\>");
|
||||
|
||||
token_t * mt = (token_t*)malloc(sizeof(token_t));
|
||||
|
||||
|
@ -11,7 +11,7 @@
|
||||
// ### Char tests ###
|
||||
// ------------------
|
||||
static bool is_quantifier(const char c) {
|
||||
for (const char * s = "+*?"; *s != '\00'; s++) {
|
||||
for (const char * s = "+*?="; *s != '\00'; s++) {
|
||||
if (*s == c) {
|
||||
return true;
|
||||
}
|
||||
@ -40,6 +40,7 @@ typedef struct {
|
||||
int in;
|
||||
char input;
|
||||
int to;
|
||||
int width;
|
||||
} delta_t;
|
||||
|
||||
typedef struct {
|
||||
@ -75,6 +76,9 @@ static int escape_1_to_1(const char c, char * whitelist) {
|
||||
case '.': {
|
||||
strcat(whitelist, ".");
|
||||
} return 1;
|
||||
case '=': {
|
||||
strcat(whitelist, "=");
|
||||
} return 1;
|
||||
case '?': {
|
||||
strcat(whitelist, "?");
|
||||
} return 1;
|
||||
@ -248,12 +252,26 @@ static int escape_1_to_N(const char c, char * whitelist) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
//static int compile_hologram(char * hologram, char * whitelist) {
|
||||
// if (hologram[0] == '\\') {
|
||||
// switch (hologram[1]) {
|
||||
// case '<': {
|
||||
// const char very_word_chars[] = "abcdefghijklmnopqrstuwxyz"
|
||||
// "ABCDEFGHIJKLMNOPQRSTUWXYZ"
|
||||
// "_";
|
||||
// strcat(whitelist, very_word_chars);
|
||||
// is_negative = true;
|
||||
// HOOK_ALL(0, whitelist, 0)
|
||||
// } break;
|
||||
// }
|
||||
// }
|
||||
//}
|
||||
|
||||
static int compile_range(const char * const range,
|
||||
char * whitelist,
|
||||
bool * is_negative) {
|
||||
assert(range[0] == '[' && "Not a range.");
|
||||
assert((range[0] == '[') && "Not a range.");
|
||||
|
||||
int r = 0;
|
||||
const char * s;
|
||||
if (range[1] == '^') {
|
||||
*is_negative = true;
|
||||
@ -262,21 +280,20 @@ static int compile_range(const char * const range,
|
||||
s = range + 1;
|
||||
}
|
||||
for (; *s != ']'; s++) {
|
||||
assert(*s != '\00' && "Unclosed range.");
|
||||
assert((*s != '\0') && "Unclosed range.");
|
||||
char c = *s;
|
||||
if (escape_1_to_1(c, whitelist)
|
||||
|| escape_1_to_N(c, whitelist)) {
|
||||
;
|
||||
} else if (*(s+1) == '-') {
|
||||
char end = *(s+2);
|
||||
assert(c < end && "Endless range.");
|
||||
assert((c < end) && "Endless range.");
|
||||
for (char cc = c; cc < end+1; cc++) {
|
||||
strncat(whitelist, &cc, 1);
|
||||
strncat(whitelist, "\00", 1);
|
||||
strncat(whitelist, "\0", 1);
|
||||
}
|
||||
s += 2;
|
||||
} else {
|
||||
++r;
|
||||
strncat(whitelist, &c, 1);
|
||||
strncat(whitelist, "\00", 1);
|
||||
}
|
||||
@ -288,7 +305,7 @@ static int compile_range(const char * const range,
|
||||
static bool catch_(const regex_t * const regex,
|
||||
int * const state) {
|
||||
|
||||
for (int i = 0; i < regex->catch_table.element_size; i++){
|
||||
for (size_t i = 0; i < regex->catch_table.element_size; i++){
|
||||
const offshoot_t * const offshoot = (vector_get(®ex->catch_table, i));
|
||||
if (offshoot->in == *state) {
|
||||
*state = offshoot->to;
|
||||
@ -302,12 +319,12 @@ static bool catch_(const regex_t * const regex,
|
||||
|
||||
#define HOOK_ALL(from, str, to) do { \
|
||||
int hook_to = (is_negative) ? -1 : state + to; \
|
||||
for (char * s = str; *s != '\00'; s++) { \
|
||||
for (char * s = str; *s != '\0'; s++) { \
|
||||
vector_push(®ex->delta_table, \
|
||||
&(delta_t){state + from, *s, hook_to} \
|
||||
&(delta_t){state + from, *s, hook_to, width} \
|
||||
); \
|
||||
} \
|
||||
if (do_catch) { \
|
||||
if (do_catch || is_negative) { \
|
||||
vector_push(®ex->catch_table, \
|
||||
&(offshoot_t){state + from, hook_to} \
|
||||
); \
|
||||
@ -321,25 +338,30 @@ static bool catch_(const regex_t * const regex,
|
||||
regex_t * regex_compile(const char * const pattern) {
|
||||
regex_t * regex = (regex_t *)malloc(sizeof(regex_t));
|
||||
regex->str = strdup(pattern);
|
||||
vector_init(®ex->delta_table, sizeof(delta_t), 32);
|
||||
vector_init(®ex->catch_table, sizeof(offshoot_t), 16);
|
||||
vector_init(®ex->delta_table, sizeof(delta_t), 0);
|
||||
vector_init(®ex->catch_table, sizeof(offshoot_t), 0);
|
||||
|
||||
int state = 0;
|
||||
|
||||
char whitelist[64];
|
||||
bool do_catch;
|
||||
bool is_negative;
|
||||
int width;
|
||||
for (const char * s = pattern; *s != '\00';) {
|
||||
// Get token
|
||||
assert(!is_quantifier(*pattern) && "Pattern starts with quantifier.");
|
||||
whitelist[0] = '\00';
|
||||
do_catch = false;
|
||||
width = 1;
|
||||
|
||||
switch (*s) {
|
||||
case '.': {
|
||||
do_catch = true;
|
||||
} break;
|
||||
case '\\': {
|
||||
//if (compile_hologram(*s, whitelist)) {
|
||||
// break;
|
||||
//}
|
||||
EAT(1);
|
||||
if(escape_1_to_1(*s, whitelist)
|
||||
|| escape_1_to_N(*s, whitelist)){
|
||||
@ -361,6 +383,7 @@ regex_t * regex_compile(const char * const pattern) {
|
||||
|
||||
// Get quantifier
|
||||
switch (*s) {
|
||||
case '=':
|
||||
case '?': {
|
||||
HOOK_ALL(0, whitelist, +1);
|
||||
EAT(1);
|
||||
@ -406,11 +429,11 @@ static bool regex_assert(const regex_t * const regex,
|
||||
|
||||
for (const char * s = string; *s != '\00'; s++) {
|
||||
// delta
|
||||
for (int i = 0; i < regex->delta_table.element_count; i++) {
|
||||
for (size_t i = 0; i < regex->delta_table.element_count; i++) {
|
||||
const delta_t * const delta = (delta_t *)(vector_get(®ex->delta_table, i));
|
||||
if ((delta->in == state)
|
||||
&& (delta->input == *s)) {
|
||||
if(regex_assert(regex, s+1, delta->to)){
|
||||
if(regex_assert(regex, s + delta->width, delta->to)){
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user