diff --git a/BUGS.md b/BUGS.md new file mode 100644 index 0000000..9736a8e --- /dev/null +++ b/BUGS.md @@ -0,0 +1,5 @@ +## Bugs + + segfaults under -O2 + + i cannot decypher the valgrind warnings + + a sinle character right before keywords is always highlighted; the bug is understood, the resolution design is under contemplation + + newlines are not yet given special treatment in regex_match() diff --git a/Makefile b/Makefile index b6b3473..b364514 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ include chad.mk DEBUG:=1 -CFLAGS:=-std=c99 -O2 -Wvla -Wshadow -Wundef $(if ${DEBUG}, ${CHAD_DEBUG},'') +CFLAGS:=-std=c99 -O2 $(if ${DEBUG}, ${CHAD_DEBUG},'') CPPFLAGS:=-D_FORTIFY_SOURCE=2 SRC.dir:=source/ diff --git a/chad.mk b/chad.mk index 161bf90..0b30673 100644 --- a/chad.mk +++ b/chad.mk @@ -8,14 +8,15 @@ CHAD_DEBUG:=-Og -ggdb -pg -fno-inline # Programs to check warnings for as defined by the Chad standard GCC:=gcc -GCC.warnings:=-Wall -Wextra -Wpedantic +GCC.warnings:=-Wall -Wextra -Wpedantic -Wvla -Wshadow -Wundef CLANG:=clang CLANG.warnings:=-Weverything VALGRIND:=valgrind +VALGRIND.flags:=--track-origins=yes --leak-check=full --show-leak-kinds=all chad_test: ${GCC} ${GCC.warnings} ${SRC} -o ${OUT} ${CLANG} ${GCC.warnings} ${SRC} -o ${OUT} - ${VALGRIND} ${OUT} ${OUTARGS} + ${VALGRIND} ${VALGRIND.flags} ${OUT} ${OUTARGS} .DEFAULT_GOAL:=main diff --git a/source/hl.h b/source/hl.h index 693ee73..a831a47 100644 --- a/source/hl.h +++ b/source/hl.h @@ -3,6 +3,7 @@ #include #include #include "chad.h" +#include "regex.h" typedef void (*attribute_callback_t)(const char * const string, const int length, @@ -39,22 +40,93 @@ typedef struct { token_t * token_table[1000]; int token_table_top = 0; -token_t * new_token(const char * const syntax, - const token_type_t t, - const hl_group_t * const g) { +int append_token(token_t * token){ + token_table[token_table_top++] = token; + return 0; +} + +token_t * new_symbol_token(const char * const word, + hl_group_t * const g) { + + char * new_word = strdup(word); + token_t * mt = (token_t*)malloc(sizeof(token_t)); mt->hl = g; - mt->t = t; - mt->syntax = syntax; - token_table[token_table_top++] = mt; + mt->t = KEYSYMBOL; + mt->syntax = new_word; + append_token(mt); + return mt; + +} + +int new_symbol_tokens(const char * const * symbols, + hl_group_t * const g) { + + int i = 0; + while (*symbols) { + if(new_symbol_token(*symbols, g)){ + ++i; + } + ++symbols; + } + + return i; +} + +int new_char_tokens(const char * characters, + hl_group_t * const g) { + int i = 0; + char buffer[2]; + buffer[1] = '\00'; + for(const char * s = characters; *s != '\00'; s++){ + buffer[0] = *s; + if(new_symbol_token(buffer, g)){ + ++i; + } + } + return i; +} + +token_t * new_keyword_token(const char * const word, + hl_group_t * const g) { + + size_t word_length = strlen(word); + char * new_word = (char*)malloc(word_length + 4 + 1); + memcpy(new_word, "\\<", 2); + memcpy(new_word + 2, word, word_length); + strcpy(new_word + 2 + word_length, "\\>"); + + token_t * mt = (token_t*)malloc(sizeof(token_t)); + mt->hl = g; + mt->t = KEYWORD; + mt->syntax = new_word; + append_token(mt); return mt; } -void new_keyword_tokens(const char * const * words, - hl_group_t * const g) { +token_t * new_token(const char * const word, + const token_type_t t, + hl_group_t * const g) { + switch(t){ + case KEYSYMBOL: { + return new_symbol_token(word, g); + }; + case KEYWORD: { + return new_keyword_token(word, g); + }; + case MATCH: { + } break; + case REGION: { + } break; + } + // XXX: implement the rest +} + +int new_keyword_tokens(const char * const * words, + hl_group_t * const g) { int i = 0; while (*words) { - if(new_token(*words, KEYWORD, g)){ + if(new_keyword_token(*words, g)){ ++i; } ++words; @@ -63,30 +135,16 @@ void new_keyword_tokens(const char * const * words, return i; } -int token_fits(const char* const pattern, - const char* const to) { +int token_fits(const token_t* const token, + const char* const to) { + + const char * const pattern = token->syntax; + if (pattern == NULL) { return true; } - for (int i = 0;; i++) { - if (pattern[i] == '\00') { - return i; - } - if (to[i] == '\00' - || pattern[i] != to[i]) { - return false; - } - } -} -bool is_word_separator(const char character) { - if (( isascii(character)) - && (!isalnum(character)) - && ( character != '_')) { - return 1; - } else { - return 0; - } + return regex_match(pattern, to); } void render_string(const char * const string, @@ -95,7 +153,7 @@ void render_string(const char * const string, int f; int i = 0; for (; i < token_table_top; i++) { - f = token_fits(token_table[i]->syntax, s); + f = token_fits(token_table[i], s); if(f){ break; } } // diff --git a/source/main.c b/source/main.c index f924641..a43c8cf 100644 --- a/source/main.c +++ b/source/main.c @@ -1,3 +1,5 @@ +//register +//putchar() #include #include #include @@ -10,21 +12,24 @@ static char * buffer = NULL; static size_t buffer_size = 0; typedef struct { - int attribute; - int foreground_color; - int background_color; + char * attribute; + char * foreground_color; + char * background_color; } terminal_hl_t; void cterm_render_callback(const char * const string, const int length, void * const attributes) { if(!length){ + fputs(TERMINAL_STYLE_BOLD, stdout); putchar(*string); + fputs(TERMINAL_RESET, stdout); return; } - UNUSED(attributes); - fputs(TERMINAL_STYLE_BOLD, stdout); + terminal_hl_t * term_hl = (terminal_hl_t*)attributes; + fputs(term_hl->attribute, stdout); + fputs(term_hl->foreground_color, stdout); for (int i = 0; i < length; i++) { putchar(*(string+i)); } @@ -68,25 +73,50 @@ int main(int argc, NULL }; - terminal_hl_t my_hl = (terminal_hl_t) { - .attribute = 1 - }; - + // display_t * cterm = &(display_t) { .key = "cterm", .callback = cterm_render_callback }; - hl_group_t mygroup = (hl_group_t) { - .link = NULL + // + terminal_hl_t terminal_keyword_hl = (terminal_hl_t) { + .attribute = TERMINAL_STYLE_BOLD, + .foreground_color = TERMINAL_COLOR_FG_GREEN, + .background_color = NULL }; + hl_group_t keyword_hl = (hl_group_t) { + .link = NULL, + .attributes = (void*)&terminal_keyword_hl + }; + // + terminal_hl_t terminal_preprocessor_hl = (terminal_hl_t) { + .attribute = TERMINAL_STYLE_BOLD, + .foreground_color = TERMINAL_COLOR_FG_BLUE, + .background_color = NULL + }; + hl_group_t preprocessor_hl = (hl_group_t) { + .link = NULL, + .attributes = (void*)&terminal_preprocessor_hl + }; + // + terminal_hl_t terminal_symbol_hl = (terminal_hl_t) { + .attribute = TERMINAL_STYLE_BOLD, + .foreground_color = TERMINAL_COLOR_FG_YELLOW, + .background_color = NULL + }; + hl_group_t symbol_hl = (hl_group_t) { + .link = NULL, + .attributes = (void*)&terminal_symbol_hl + }; + // new_display_mode(cterm); - new_keyword_tokens(c_keywords, &mygroup); - new_keyword_tokens(preprocessor_keywords, &mygroup); - + new_keyword_tokens(c_keywords, &keyword_hl); + new_keyword_tokens(preprocessor_keywords, &preprocessor_hl); + new_char_tokens("&|()[]{}*,", &symbol_hl); // render_string(buffer, "cterm"); putchar('\n'); - free (buffer); + free(buffer); return 0; } diff --git a/source/regex.c b/source/regex.c new file mode 100644 index 0000000..e9e2787 --- /dev/null +++ b/source/regex.c @@ -0,0 +1,199 @@ +#include "regex.h" + +bool is_case_on = true; + +static bool is_next_valid(const char * const s) { + return *(s + 1); +} + +static bool char_in_range(const char start, + const char end, + const char character) { + if (start > end){ + return false; + } + + for (char c = start; c != end; c++) { + if (character == c) { + return true; + } + } + + return false; +} + +static bool is_word_separator(const char character) { + return (( isascii(character)) + && (!isalnum(character)) + && ( character != '_')); +} + +static bool magic(const char magic_char, const char to_enchant) { + switch(magic_char){ + // \i identifier character (see 'isident' option) + // \I like "\i", but excluding digits + // \k keyword character (see 'iskeyword' option) + // \K like "\k", but excluding digits + // \f file name character (see 'isfname' option) + // \F like "\f", but excluding digits + // \p printable character (see 'isprint' option) + // \P like "\p", but excluding digits + case 's': { + return ((to_enchant == ' ') || (to_enchant == '\t')); + } + case 'S': { + return !((to_enchant == ' ') || (to_enchant == '\t')); + } + case 'd': { // [0-9] + return char_in_range('0', '9', to_enchant); + }; + case 'D': { // [^0-9] + return !char_in_range('0', '9', to_enchant); + }; + case 'x': { // [0-9A-Fa-f] + return char_in_range('0', '9', to_enchant) || char_in_range('A', 'F', to_enchant) || char_in_range('a', 'f', to_enchant); + }; + case 'X': { // [^0-9A-Fa-f] + return !char_in_range('0', '9', to_enchant) && !char_in_range('A', 'F', to_enchant) && !char_in_range('a', 'f', to_enchant); + }; + case 'o': { // [0-7] + return char_in_range('0', '7', to_enchant); + }; + case 'O': { // [^0-7] + return !char_in_range('0', '7', to_enchant); + }; + case 'w': { // [0-9A-Za-z_] + return char_in_range('0', '9', to_enchant) || char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant) || (to_enchant == '_'); + }; + case 'W': { // [^0-9A-Za-z_] + return !(char_in_range('0', '9', to_enchant) || char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant) || (to_enchant == '_')); + }; + case 'h': { // [A-Za-z_] + return char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant) || (to_enchant == '_'); + }; + case 'H': { // [^A-Za-z_] + return !(char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant) || (to_enchant == '_')); + }; + case 'a': { // [A-Za-z] + return char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant); + }; + case 'A': { // [A-Za-z] + return !(char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant)); + }; + case 'l': { // [a-z] + return char_in_range('a', 'z', to_enchant); + }; + case 'L': { // [^a-z] + return !(char_in_range('a', 'z', to_enchant)); + }; + case 'u': { // [A-Z] + return char_in_range('A', 'Z', to_enchant); + }; + case 'U': { // [^A-Z] + return !(char_in_range('A', 'Z', to_enchant)); + }; + } + + return false; +} + +int regex_match(const char * const pattern, + const char * const string) { + const char * pattern_pointer = pattern; + const char * string_pointer = string; + + while (1488) { + // End of one of the arguments + if (!(*pattern_pointer)) { + break; + } + if (!(*string_pointer)) { + return false; + } + + // Escape character + if (*pattern_pointer == '\\') { + if (!is_next_valid(pattern_pointer)) { + return false; + } + + switch(*(pattern_pointer + 1)){ + case 't': { + if (*(string_pointer + 1) == '\t') { + pattern_pointer += 2; + string_pointer += 1; + } else { + return false; + } + } break; + case 'r': { + if (*(string_pointer + 1) == '\r') { + pattern_pointer += 2; + string_pointer += 1; + } else { + return false; + } + } break; + case 'e': { + if (*(string_pointer + 1) == '\033') { + pattern_pointer += 2; + string_pointer += 1; + } else { + return false; + } + } break; + case 'b': { + if (*(string_pointer + 1) == '\010') { + pattern_pointer += 2; + string_pointer += 1; + } else { + return false; + } + } break; + } + + if (*(pattern_pointer + 1) == '\\') { + if (*string_pointer == '\\') { + pattern_pointer += 2; + string_pointer += 1; + continue; + } + } + + if (*(pattern_pointer + 1) == '<' + && is_word_separator(*string_pointer)) { + pattern_pointer += 2; + string_pointer += 1; + continue; + } + + if (*(pattern_pointer + 1) == '>') { + if (is_word_separator(*(string_pointer + 1))) { + pattern_pointer += 2; + continue; + } + if (*(string_pointer + 1) == '\00') { + break; + } + } + + if (magic(*(pattern_pointer + 1), *string_pointer)) { + pattern_pointer += 2; + string_pointer += 1; + continue; + } + + return false; + } + + // Literal + if (*pattern_pointer != *string_pointer) { + return false; + } else { + ++pattern_pointer; + ++string_pointer; + } + } + + return (string_pointer - string); +} diff --git a/source/regex.h b/source/regex.h new file mode 100644 index 0000000..daea895 --- /dev/null +++ b/source/regex.h @@ -0,0 +1,6 @@ +#include "chad.h" +#include + +extern bool is_case_on; + +int regex_match(const char * const pattern, const char * const string);