Browse Source

i deserve a blowjob

master
anon 7 months ago
parent
commit
9b54a3f3e8
3 changed files with 112 additions and 46 deletions
  1. +5
    -1
      include/jeger.h
  2. +2
    -2
      source/hl.c
  3. +105
    -43
      source/jeger.c

+ 5
- 1
include/jeger.h View File

@@ -15,7 +15,10 @@ typedef struct {
} regex_t;

typedef struct {
int position;
union {
int position;
const char * _pos_ptr;
};
int width;
} match_t;

@@ -25,5 +28,6 @@ extern bool regex_search(const regex_t * const regex, const char * const st
extern match_t * regex_match(const regex_t * const regex, const char * const string, const bool start_of_string);

extern bool is_magic(const char c);
extern bool is_sentinel(const match_t * const match);

#endif

+ 2
- 2
source/hl.c View File

@@ -195,7 +195,7 @@ void render_string(const char * const string,
token_t * t = *(token_t**)vector_get(&token_table,
i);
match_t * match = regex_match(t->syntax, string, true);
if (match->position == -1) {
if (is_sentinel(match)) {
free(match);
continue;
}
@@ -212,7 +212,7 @@ void render_string(const char * const string,
max = &sentinel;
for (int h = 0; h < rrs; h++) {
result_t * const current_result = r + h;
for (int j = 0; current_result->m[j].position != -1; j++) {
for (int j = 0; !is_sentinel(&(current_result->m[j])); j++) {
if (current_result->m[j].position == (s - string)) {
if (current_result->m[j].width > max->m->width) {
current_result->i = j;


+ 105
- 43
source/jeger.c View File

@@ -8,8 +8,13 @@
#include <string.h>
#include <limits.h>
#include <stdlib.h>
#if DEBUG
# include <stdio.h>
#endif

#define JEGER_INIT_STATE 2
#define JEGER_SOS_STATE 0
#define JEGER_NSOS_STATE 1
#define JEGER_INIT_STATE 2

// ------------------
// ### Char tests ###
@@ -40,6 +45,15 @@ bool is_magic(const char c) {
;
}

// -------------------
// ### Match tests ###
// -------------------
bool is_sentinel(const match_t * const match) {
return (match->position == -1)
&& (match->width == -1)
;
}

// -----------------
// ### Char sets ###
// -----------------
@@ -52,13 +66,13 @@ bool is_magic(const char c) {
#define JEGER_CHAR_SET_lower_hex "abcdef"
#define JEGER_CHAR_SET_upper_hex "ABCDEF"
#define JEGER_CHAR_SET_oct_241_to_277 \
"\241\242\243\244\245" \
"\246\247\250\251\252" \
"\253\254\255\256\257" \
"\260\261\262\263\264" \
"\265\266\267\270\271" \
"\272\273\274\275\276" \
"\277"
"\241\242\243\244\245" \
"\246\247\250\251\252" \
"\253\254\255\256\257" \
"\260\261\262\263\264" \
"\265\266\267\270\271" \
"\272\273\274\275\276" \
"\277"
#define JEGER_CHAR_SET_oct_300_to_337 \
"\300\301\302\303\304" \
"\305\306\307\310\311" \
@@ -68,13 +82,13 @@ bool is_magic(const char c) {
"\331\332\333\334\335" \
"\336\337"
#define JEGER_CHAR_SET_file_extra "/.-_+,#$%~="
#define JEGER_CHAR_SET_whitespace " \t\v\n"
#define JEGER_CHAR_SET_whitespace " " "\t\v\n"

static const char JEGER_CHAR_very_word_chars[] =
JEGER_CHAR_SET_underscore
JEGER_CHAR_SET_lower
JEGER_CHAR_SET_upper
;
static const char JEGER_CHAR_symbol_chars[] =
JEGER_CHAR_SET_underscore
JEGER_CHAR_SET_lower
JEGER_CHAR_SET_upper
;

// ----------------------
// ### Internal Types ###
@@ -95,17 +109,19 @@ typedef struct {
} offshoot_t;

enum {
DO_CATCH = 0x00000001 << 0,
IS_NEGATIVE = 0x00000001 << 1,
IS_AT_THE_BEGINNING = 0x00000001 << 2,
FORCE_START_OF_STRING = 0x00000001 << 3,
INCREMENT_STATE = 0x00000001 << 4,
DO_CATCH = 0x00000001 << 0,
IS_NEGATIVE = 0x00000001 << 1,
IS_AT_THE_BEGINNING = 0x00000001 << 2,
FORCE_START_OF_STRING = 0x00000001 << 3,
DO_FORBID_START_OF_STRING = 0x00000001 << 4,
INCREMENT_STATE = 0x00000001 << 5,
};

typedef struct {
int flags;
int state;
int width;
int width2;
char * whitelist;
char * blacklist;
} compiler_state;
@@ -132,7 +148,7 @@ void HOOK_ALL(const int from,
.input = *s,
.to = ASSERT_HALT(to),
.pattern_width = cs->width,
.match_width = 1,
.match_width = cs->width2,
};
vector_push(&regex->delta_table,
&delta);
@@ -318,9 +334,9 @@ int escape_1_to_N(const char c,
return sizeof(word_chars)-1;
};
case 'h': {
// #global JEGER_CHAR_very_word_chars
strcpy(target_list, JEGER_CHAR_very_word_chars);
return sizeof(JEGER_CHAR_very_word_chars)-1;
// #global JEGER_CHAR_symbol_chars
strcpy(target_list, JEGER_CHAR_symbol_chars);
return sizeof(JEGER_CHAR_symbol_chars)-1;
};
case 'a': {
const char alpha_chars[] = JEGER_CHAR_SET_lower
@@ -346,7 +362,7 @@ int escape_1_to_N(const char c,

static inline
int escape_to_negative(const char c,
compiler_state * const cs) {
compiler_state * const cs) {
switch (c) {
case 'D': {
const char digit_chars[] = JEGER_CHAR_SET_digits;
@@ -488,6 +504,7 @@ regex_t * regex_compile(const char * const pattern) {
blacklist[0] = '\0';
cs.flags &= (IS_AT_THE_BEGINNING | FORCE_START_OF_STRING);
cs.width = 1;
cs.width2 = 1;

// Translate char
switch (*s) {
@@ -503,7 +520,7 @@ regex_t * regex_compile(const char * const pattern) {
if (compile_escape(*s, &cs)) {
s += 1;
} else if (is_hologram_escape(*s)) {
;
s -= 1;
} else {
assert("Unknown escape.");
}
@@ -518,6 +535,12 @@ regex_t * regex_compile(const char * const pattern) {
} break;
}

/* Ew */
if (*s == '\\'
&& is_hologram_escape(*(s+1))) {
++s;
}

// Compile char
switch (*s) {
// holograms
@@ -533,18 +556,47 @@ regex_t * regex_compile(const char * const pattern) {
s += 1;
} break;
case '<': {
cs.flags |= IS_NEGATIVE | INCREMENT_STATE;
if (cs.flags & IS_AT_THE_BEGINNING) {
ABSOLUTE_OFFSHOOT(0, JEGER_INIT_STATE+1, 0, 0, regex);
// XXX: make this legible
if (cs.flags & IS_AT_THE_BEGINNING
&& !(cs.flags & DO_CATCH)
&& !(cs.flags & IS_NEGATIVE)
&& whitelist[0] == '\0') {
// ---
cs.flags |= INCREMENT_STATE;
cs.flags |= DO_FORBID_START_OF_STRING;
strcat(whitelist, JEGER_CHAR_symbol_chars);
// ---
ABSOLUTE_OFFSHOOT( JEGER_SOS_STATE, JEGER_INIT_STATE+1, 0, 0, regex);
ABSOLUTE_OFFSHOOT(JEGER_INIT_STATE, JEGER_INIT_STATE+2, 1, 0, regex);
HOOK_ALL(0, whitelist, HALT_AND_CATCH_FIRE, &cs, regex);
// ---
++cs.state;
cs.width = 0;
cs.width2 = 0;
HOOK_ALL(0, whitelist, +1, &cs, regex);
cs.width = 1;
OFFSHOOT(0, +1, 1, 0, &cs, regex);
// ---
} else {
HOOK_ALL(0, whitelist, +1, &cs, regex);
if ((cs.flags & DO_CATCH)
|| (cs.flags & IS_NEGATIVE)) {
OFFSHOOT(+1, +2, 1, 1, &cs, regex);
} else {
cs.flags |= INCREMENT_STATE;
}
OFFSHOOT(0, +1, 1, 0, &cs, regex);
}
strcat(blacklist, JEGER_CHAR_very_word_chars);
OFFSHOOT(0, 0, 1, 0, &cs, regex);
cs.flags |= IS_NEGATIVE;
strcat(blacklist, JEGER_CHAR_symbol_chars);
s += 1;
} break;
case '>': {
HOOK_ALL(0, whitelist, +1, &cs, regex);
cs.flags |= IS_NEGATIVE | INCREMENT_STATE;
strcat(blacklist, JEGER_CHAR_very_word_chars);
OFFSHOOT(0, 1, 0, 0, &cs, regex);
strcat(blacklist, JEGER_CHAR_symbol_chars);
OFFSHOOT(+1, +2, 0, 0, &cs, regex);
++cs.state;
s += 1;
} break;
// quantifiers
@@ -605,11 +657,13 @@ regex_t * regex_compile(const char * const pattern) {
}

// Init state hookups
ABSOLUTE_OFFSHOOT(0, JEGER_INIT_STATE, 0, 0, regex);
if (!(cs.flags & DO_FORBID_START_OF_STRING)) {
ABSOLUTE_OFFSHOOT(JEGER_SOS_STATE, JEGER_INIT_STATE, 0, 0, regex);
}
if (cs.flags & FORCE_START_OF_STRING) {
ABSOLUTE_OFFSHOOT(1, HALT_AND_CATCH_FIRE, 0, 0, regex);
ABSOLUTE_OFFSHOOT(JEGER_NSOS_STATE, HALT_AND_CATCH_FIRE, 0, 0, regex);
} else {
ABSOLUTE_OFFSHOOT(1, JEGER_INIT_STATE, 0, 0, regex);
ABSOLUTE_OFFSHOOT(JEGER_NSOS_STATE, JEGER_INIT_STATE, 0, 0, regex);
}

regex->accepting_state = cs.state;
@@ -682,14 +736,18 @@ bool regex_assert(const regex_t * const regex,

if ((delta->in == state)
&& (delta->input == *s)) {
bool do_reset = false;
was_found = true;
if (!match->_pos_ptr && delta->match_width) {
match->_pos_ptr = s;
do_reset = true;
}
const int r = regex_assert(regex, s + delta->pattern_width, delta->to, match);
if(r){
if (match->position == -1) {
match->position = (s - string);
}
match->width += delta->match_width;
return r;
} else if (do_reset) {
match->_pos_ptr = NULL;
}
}
}
@@ -729,17 +787,21 @@ match_t * regex_match(const regex_t * const regex,
// Find all matches
{
const char * s = string;
int initial_state;
do {
int initial_state;
initial_state = (int)(!(is_start_of_string && (s == string)));

*match = (match_t){
.position = -1,
.width = 0,
._pos_ptr = NULL,
.width = 0,
};

if (regex_assert(regex, s, initial_state, match)) {
match->position = (s - string);
if (match->_pos_ptr) {
match->position = (match->_pos_ptr - string);
} else {
match->position = (s - string);
}

vector_push(&matches, match);

@@ -773,7 +835,7 @@ bool regex_search(const regex_t * const regex,
const char * const string) {

match_t * m = regex_match(regex, string, true);
const bool r = (m->position != -1);
const bool r = !is_sentinel(m);
free(m);

return r;


Loading…
Cancel
Save