'=' support; ignore start end atoms for now
This commit is contained in:
parent
823eb8b113
commit
746767571e
11
source/hl.h
11
source/hl.h
@ -130,12 +130,13 @@ int new_char_tokens(const char * characters,
|
|||||||
|
|
||||||
token_t * new_keyword_token(const char * const word,
|
token_t * new_keyword_token(const char * const word,
|
||||||
hl_group_t * const g) {
|
hl_group_t * const g) {
|
||||||
size_t word_length = strlen(word);
|
char * new_word = strdup(word);
|
||||||
char * new_word = (char*)malloc(word_length + 4 + 1);
|
//size_t word_length = strlen(word);
|
||||||
|
//char * new_word = (char*)malloc(word_length + 4 + 1);
|
||||||
|
|
||||||
memcpy(new_word, "\\<", 2);
|
//memcpy(new_word, "\\<", 2);
|
||||||
memcpy(new_word + 2, word, word_length);
|
//memcpy(new_word + 2, word, word_length);
|
||||||
strcpy(new_word + 2 + word_length, "\\>");
|
//strcpy(new_word + 2 + word_length, "\\>");
|
||||||
|
|
||||||
token_t * mt = (token_t*)malloc(sizeof(token_t));
|
token_t * mt = (token_t*)malloc(sizeof(token_t));
|
||||||
|
|
||||||
|
@ -11,7 +11,7 @@
|
|||||||
// ### Char tests ###
|
// ### Char tests ###
|
||||||
// ------------------
|
// ------------------
|
||||||
static bool is_quantifier(const char c) {
|
static bool is_quantifier(const char c) {
|
||||||
for (const char * s = "+*?"; *s != '\00'; s++) {
|
for (const char * s = "+*?="; *s != '\00'; s++) {
|
||||||
if (*s == c) {
|
if (*s == c) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -40,6 +40,7 @@ typedef struct {
|
|||||||
int in;
|
int in;
|
||||||
char input;
|
char input;
|
||||||
int to;
|
int to;
|
||||||
|
int width;
|
||||||
} delta_t;
|
} delta_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
@ -75,6 +76,9 @@ static int escape_1_to_1(const char c, char * whitelist) {
|
|||||||
case '.': {
|
case '.': {
|
||||||
strcat(whitelist, ".");
|
strcat(whitelist, ".");
|
||||||
} return 1;
|
} return 1;
|
||||||
|
case '=': {
|
||||||
|
strcat(whitelist, "=");
|
||||||
|
} return 1;
|
||||||
case '?': {
|
case '?': {
|
||||||
strcat(whitelist, "?");
|
strcat(whitelist, "?");
|
||||||
} return 1;
|
} return 1;
|
||||||
@ -248,12 +252,26 @@ static int escape_1_to_N(const char c, char * whitelist) {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//static int compile_hologram(char * hologram, char * whitelist) {
|
||||||
|
// if (hologram[0] == '\\') {
|
||||||
|
// switch (hologram[1]) {
|
||||||
|
// case '<': {
|
||||||
|
// const char very_word_chars[] = "abcdefghijklmnopqrstuwxyz"
|
||||||
|
// "ABCDEFGHIJKLMNOPQRSTUWXYZ"
|
||||||
|
// "_";
|
||||||
|
// strcat(whitelist, very_word_chars);
|
||||||
|
// is_negative = true;
|
||||||
|
// HOOK_ALL(0, whitelist, 0)
|
||||||
|
// } break;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//}
|
||||||
|
|
||||||
static int compile_range(const char * const range,
|
static int compile_range(const char * const range,
|
||||||
char * whitelist,
|
char * whitelist,
|
||||||
bool * is_negative) {
|
bool * is_negative) {
|
||||||
assert(range[0] == '[' && "Not a range.");
|
assert((range[0] == '[') && "Not a range.");
|
||||||
|
|
||||||
int r = 0;
|
|
||||||
const char * s;
|
const char * s;
|
||||||
if (range[1] == '^') {
|
if (range[1] == '^') {
|
||||||
*is_negative = true;
|
*is_negative = true;
|
||||||
@ -262,21 +280,20 @@ static int compile_range(const char * const range,
|
|||||||
s = range + 1;
|
s = range + 1;
|
||||||
}
|
}
|
||||||
for (; *s != ']'; s++) {
|
for (; *s != ']'; s++) {
|
||||||
assert(*s != '\00' && "Unclosed range.");
|
assert((*s != '\0') && "Unclosed range.");
|
||||||
char c = *s;
|
char c = *s;
|
||||||
if (escape_1_to_1(c, whitelist)
|
if (escape_1_to_1(c, whitelist)
|
||||||
|| escape_1_to_N(c, whitelist)) {
|
|| escape_1_to_N(c, whitelist)) {
|
||||||
;
|
;
|
||||||
} else if (*(s+1) == '-') {
|
} else if (*(s+1) == '-') {
|
||||||
char end = *(s+2);
|
char end = *(s+2);
|
||||||
assert(c < end && "Endless range.");
|
assert((c < end) && "Endless range.");
|
||||||
for (char cc = c; cc < end+1; cc++) {
|
for (char cc = c; cc < end+1; cc++) {
|
||||||
strncat(whitelist, &cc, 1);
|
strncat(whitelist, &cc, 1);
|
||||||
strncat(whitelist, "\00", 1);
|
strncat(whitelist, "\0", 1);
|
||||||
}
|
}
|
||||||
s += 2;
|
s += 2;
|
||||||
} else {
|
} else {
|
||||||
++r;
|
|
||||||
strncat(whitelist, &c, 1);
|
strncat(whitelist, &c, 1);
|
||||||
strncat(whitelist, "\00", 1);
|
strncat(whitelist, "\00", 1);
|
||||||
}
|
}
|
||||||
@ -288,7 +305,7 @@ static int compile_range(const char * const range,
|
|||||||
static bool catch_(const regex_t * const regex,
|
static bool catch_(const regex_t * const regex,
|
||||||
int * const state) {
|
int * const state) {
|
||||||
|
|
||||||
for (int i = 0; i < regex->catch_table.element_size; i++){
|
for (size_t i = 0; i < regex->catch_table.element_size; i++){
|
||||||
const offshoot_t * const offshoot = (vector_get(®ex->catch_table, i));
|
const offshoot_t * const offshoot = (vector_get(®ex->catch_table, i));
|
||||||
if (offshoot->in == *state) {
|
if (offshoot->in == *state) {
|
||||||
*state = offshoot->to;
|
*state = offshoot->to;
|
||||||
@ -302,12 +319,12 @@ static bool catch_(const regex_t * const regex,
|
|||||||
|
|
||||||
#define HOOK_ALL(from, str, to) do { \
|
#define HOOK_ALL(from, str, to) do { \
|
||||||
int hook_to = (is_negative) ? -1 : state + to; \
|
int hook_to = (is_negative) ? -1 : state + to; \
|
||||||
for (char * s = str; *s != '\00'; s++) { \
|
for (char * s = str; *s != '\0'; s++) { \
|
||||||
vector_push(®ex->delta_table, \
|
vector_push(®ex->delta_table, \
|
||||||
&(delta_t){state + from, *s, hook_to} \
|
&(delta_t){state + from, *s, hook_to, width} \
|
||||||
); \
|
); \
|
||||||
} \
|
} \
|
||||||
if (do_catch) { \
|
if (do_catch || is_negative) { \
|
||||||
vector_push(®ex->catch_table, \
|
vector_push(®ex->catch_table, \
|
||||||
&(offshoot_t){state + from, hook_to} \
|
&(offshoot_t){state + from, hook_to} \
|
||||||
); \
|
); \
|
||||||
@ -321,25 +338,30 @@ static bool catch_(const regex_t * const regex,
|
|||||||
regex_t * regex_compile(const char * const pattern) {
|
regex_t * regex_compile(const char * const pattern) {
|
||||||
regex_t * regex = (regex_t *)malloc(sizeof(regex_t));
|
regex_t * regex = (regex_t *)malloc(sizeof(regex_t));
|
||||||
regex->str = strdup(pattern);
|
regex->str = strdup(pattern);
|
||||||
vector_init(®ex->delta_table, sizeof(delta_t), 32);
|
vector_init(®ex->delta_table, sizeof(delta_t), 0);
|
||||||
vector_init(®ex->catch_table, sizeof(offshoot_t), 16);
|
vector_init(®ex->catch_table, sizeof(offshoot_t), 0);
|
||||||
|
|
||||||
int state = 0;
|
int state = 0;
|
||||||
|
|
||||||
char whitelist[64];
|
char whitelist[64];
|
||||||
bool do_catch;
|
bool do_catch;
|
||||||
bool is_negative;
|
bool is_negative;
|
||||||
|
int width;
|
||||||
for (const char * s = pattern; *s != '\00';) {
|
for (const char * s = pattern; *s != '\00';) {
|
||||||
// Get token
|
// Get token
|
||||||
assert(!is_quantifier(*pattern) && "Pattern starts with quantifier.");
|
assert(!is_quantifier(*pattern) && "Pattern starts with quantifier.");
|
||||||
whitelist[0] = '\00';
|
whitelist[0] = '\00';
|
||||||
do_catch = false;
|
do_catch = false;
|
||||||
|
width = 1;
|
||||||
|
|
||||||
switch (*s) {
|
switch (*s) {
|
||||||
case '.': {
|
case '.': {
|
||||||
do_catch = true;
|
do_catch = true;
|
||||||
} break;
|
} break;
|
||||||
case '\\': {
|
case '\\': {
|
||||||
|
//if (compile_hologram(*s, whitelist)) {
|
||||||
|
// break;
|
||||||
|
//}
|
||||||
EAT(1);
|
EAT(1);
|
||||||
if(escape_1_to_1(*s, whitelist)
|
if(escape_1_to_1(*s, whitelist)
|
||||||
|| escape_1_to_N(*s, whitelist)){
|
|| escape_1_to_N(*s, whitelist)){
|
||||||
@ -361,6 +383,7 @@ regex_t * regex_compile(const char * const pattern) {
|
|||||||
|
|
||||||
// Get quantifier
|
// Get quantifier
|
||||||
switch (*s) {
|
switch (*s) {
|
||||||
|
case '=':
|
||||||
case '?': {
|
case '?': {
|
||||||
HOOK_ALL(0, whitelist, +1);
|
HOOK_ALL(0, whitelist, +1);
|
||||||
EAT(1);
|
EAT(1);
|
||||||
@ -406,11 +429,11 @@ static bool regex_assert(const regex_t * const regex,
|
|||||||
|
|
||||||
for (const char * s = string; *s != '\00'; s++) {
|
for (const char * s = string; *s != '\00'; s++) {
|
||||||
// delta
|
// delta
|
||||||
for (int i = 0; i < regex->delta_table.element_count; i++) {
|
for (size_t i = 0; i < regex->delta_table.element_count; i++) {
|
||||||
const delta_t * const delta = (delta_t *)(vector_get(®ex->delta_table, i));
|
const delta_t * const delta = (delta_t *)(vector_get(®ex->delta_table, i));
|
||||||
if ((delta->in == state)
|
if ((delta->in == state)
|
||||||
&& (delta->input == *s)) {
|
&& (delta->input == *s)) {
|
||||||
if(regex_assert(regex, s+1, delta->to)){
|
if(regex_assert(regex, s + delta->width, delta->to)){
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user