From 3b7fb8dd10418fea47351c2bb99e964d12214285 Mon Sep 17 00:00:00 2001 From: Emil Date: Wed, 20 Sep 2023 22:47:04 +0000 Subject: [PATCH 1/5] globs and warning fixes --- Makefile | 4 +- include/chad.h | 8 - include/hl.h | 5 +- regex.c | 618 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ source/hl.c | 6 +- source/jeger.c | 2 +- 6 files changed, 626 insertions(+), 17 deletions(-) delete mode 100644 include/chad.h create mode 100644 regex.c diff --git a/Makefile b/Makefile index 0284d34..4f5cb81 100644 --- a/Makefile +++ b/Makefile @@ -20,8 +20,8 @@ include chad.mk SRC.dir := source OBJ.dir := object -SRC:=$(shell find ${SRC.dir} -iname '*.c') -HDR:=$(shell find ${SRC.dir} -iname '*.h') +SRC:=hl.c jeger.c main.c terminal.c vector.c +HDR:=chad.h hl.h jeger.h terminal.h vector.h OBJ:=$(subst $(SRC.dir),$(OBJ.dir),$(SRC:.c=.o)) VPATH=${SRC.dir} ${OBJ.dir} diff --git a/include/chad.h b/include/chad.h deleted file mode 100644 index 02935aa..0000000 --- a/include/chad.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef CHAD_H - -#include - -#define UNUSED(x) ((void)x) /* much like this header */ - -#define CHAD_H -#endif diff --git a/include/hl.h b/include/hl.h index 271f1f3..890491d 100644 --- a/include/hl.h +++ b/include/hl.h @@ -5,10 +5,11 @@ #include #include #include -#include "chad.h" #include "vector.h" #include "jeger.h" +#define UNUSED(x) ((void)x) /* much like this header */ + // ------------------- // ### Definitions ### // ------------------- @@ -87,8 +88,6 @@ extern token_t * new_region_token(const char * start, const char * end, hl_group_t * g); -// TODO: ALIGN PROPERLY... - extern int token_fits(const token_t * const token, const char * const to, const int string_offset, diff --git a/regex.c b/regex.c new file mode 100644 index 0000000..db0ad97 --- /dev/null +++ b/regex.c @@ -0,0 +1,618 @@ +/* regex.c + * Copyright 2023 Anon Anonson, Ognjen 'xolatile' Milan Robovic, Emil Williams + * SPDX Identifier: GPL-3.0-only / NO WARRANTY / NO GUARANTEE */ + +#include "regex.h" + +#include +#include +#include +#include +#include + +// ------------------ +// ### Char tests ### +// ------------------ +static bool is_quantifier(const char c) { + for (const char * s = "+*?="; *s != '\00'; s++) { + if (*s == c) { + return true; + } + } + return false; +} + +bool is_magic(const char c) { + if (is_quantifier(c)) { + return true; + } + for (const char * s = "\\[].^"; *s != '\00'; s++) { + if (*s == c) { + return true; + } + } + return false; +} + +// ---------------------- +// ### Internal Types ### +// ---------------------- +typedef struct { + int in; + char input; + int to; + int width; +} delta_t; + +typedef struct { + int in; + int to; + int width; +} offshoot_t; + +typedef struct { + bool * do_catch; + bool * is_negative; +// these might be obsolite but im leaving them for now + bool * do_loop_hook; + bool * do_follow_hook; + bool * do_loop_shoot; + bool * do_follow_shoot; +// --- + int * state; + int * width; + char * whitelist; + char * blacklist; + regex_t * regex; +} compiler_state; + + + +// ---------------------------------- +// ### Regex creation/destruction ### +// ---------------------------------- +static int escape_1_to_1(const char c, compiler_state * cs) { + char * target_list = (*cs->is_negative) ? cs->blacklist : cs->whitelist; + switch (c) { + case 't': { + strcat(target_list, "\t"); + } return 1; + case 'n': { + strcat(target_list, "\n"); + } return 1; + case 'r': { + strcat(target_list, "\r"); + } return 1; + case 'b': { + strcat(target_list, "\b"); + } return 1; + case '[': { + strcat(target_list, "["); + } return 1; + case ']': { + strcat(target_list, "]"); + } return 1; + case '.': { + strcat(target_list, "."); + } return 1; + case '^': { + strcat(target_list, "^"); + } return 1; + case '=': { + strcat(target_list, "="); + } return 1; + case '?': { + strcat(target_list, "?"); + } return 1; + case '+': { + strcat(target_list, "+"); + } return 1; + case '*': { + strcat(target_list, "*"); + } return 1; + case '\\': { + strcat(target_list, "\\"); + } return 1; + } + + return 0; +} + +static int escape_1_to_N(const char c, compiler_state * cs) { + char * target_list = (*cs->is_negative) ? cs->blacklist : cs->whitelist; + switch(c) { + case 'i': { + const char identifier_chars[] = "@0123456789_" + "\300\301\302\303\304" + "\305\306\307\310\311" + "\312\313\314\315\316" + "\317\320\321\322\323" + "\324\325\326\327\330" + "\331\332\333\334\335" + "\336\337"; + strcpy(target_list, identifier_chars); + return sizeof(identifier_chars)-1; + }; + case 'I': { + const char identifier_chars[] = "@_" + "\300\301\302\303\304" + "\305\306\307\310\311" + "\312\313\314\315\316" + "\317\320\321\322\323" + "\324\325\326\327\330" + "\331\332\333\334\335" + "\336\337"; + strcpy(target_list, identifier_chars); + return sizeof(identifier_chars)-1; + }; + case 'k': { + const char keyword_chars[] = "@0123456789_" + "\300\301\302\303\304" + "\305\306\307\310\311" + "\312\313\314\315\316" + "\317\320\321\322\323" + "\324\325\326\327\330" + "\331\332\333\334\335" + "\336\337"; + strcpy(target_list, keyword_chars); + return sizeof(keyword_chars)-1; + }; + case 'K': { + const char keyword_chars[] = "@_" + "\300\301\302\303\304" + "\305\306\307\310\311" + "\312\313\314\315\316" + "\317\320\321\322\323" + "\324\325\326\327\330" + "\331\332\333\334\335" + "\336\337"; + strcpy(target_list, keyword_chars); + return sizeof(keyword_chars)-1; + }; + case 'f': { + const char filename_chars[] = "@0123456789/.-_+,#$%~="; + strcpy(target_list, filename_chars); + return sizeof(filename_chars)-1; + }; + case 'F': { + const char filename_chars[] = "@/.-_+,#$%~="; + strcpy(target_list, filename_chars); + return sizeof(filename_chars)-1; + }; + case 'p': { + const char printable_chars[] = "@" + "\241\242\243\244\245" + "\246\247\250\251\252" + "\253\254\255\256\257" + "\260\261\262\263\264" + "\265\266\267\270\271" + "\272\273\274\275\276" + "\277" + "\300\301\302\303\304" + "\305\306\307\310\311" + "\312\313\314\315\316" + "\317\320\321\322\323" + "\324\325\326\327\330" + "\331\332\333\334\335" + "\336\337"; + strcpy(target_list, printable_chars); + return sizeof(printable_chars)-1; + }; + case 'P': { + const char printable_chars[] = "@" + "\241\242\243\244\245" + "\246\247\250\251\252" + "\253\254\255\256\257" + "\260\261\262\263\264" + "\265\266\267\270\271" + "\272\273\274\275\276" + "\277" + "\300\301\302\303\304" + "\305\306\307\310\311" + "\312\313\314\315\316" + "\317\320\321\322\323" + "\324\325\326\327\330" + "\331\332\333\334\335" + "\336\337"; + strcpy(target_list, printable_chars); + return sizeof(printable_chars)-1; + }; + case 's': { + const char whitespace_chars[] = " \t\v\n"; + strcpy(target_list, whitespace_chars); + return sizeof(whitespace_chars)-1; + }; + case 'd': { + const char digit_chars[] = "0123456789"; + strcpy(target_list, digit_chars); + return sizeof(digit_chars)-1; + }; + case 'x': { + const char hex_chars[] = "0123456789" + "abcdef" + "ABCDEF"; + strcpy(target_list, hex_chars); + return sizeof(hex_chars)-1; + }; + case 'o': { + const char oct_chars[] = "01234567"; + strcpy(target_list, oct_chars); + return sizeof(oct_chars)-1; + }; + case 'w': { + const char word_chars[] = "0123456789" + "abcdefghijklmnopqrstuwxyz" + "ABCDEFGHIJKLMNOPQRSTUWXYZ" + "_"; + strcpy(target_list, word_chars); + return sizeof(word_chars)-1; + }; + case 'h': { + const char very_word_chars[] = "abcdefghijklmnopqrstuwxyz" + "ABCDEFGHIJKLMNOPQRSTUWXYZ" + "_"; + strcpy(target_list, very_word_chars); + return sizeof(very_word_chars)-1; + }; + case 'a': { + const char alpha_chars[] = "abcdefghijklmnopqrstuwxyz" + "ABCDEFGHIJKLMNOPQRSTUWXYZ"; + strcpy(target_list, alpha_chars); + return sizeof(alpha_chars)-1; + }; + case 'l': { + const char lower_alpha_chars[] = "abcdefghijklmnopqrstuwxyz"; + strcpy(target_list, lower_alpha_chars); + return sizeof(lower_alpha_chars)-1; + }; + case 'u': { + const char upper_alpha_chars[] = "ABCDEFGHIJKLMNOPQRSTUWXYZ"; + strcpy(target_list, upper_alpha_chars); + return sizeof(upper_alpha_chars)-1; + }; + } + + return 0; +} + +static int escape_to_negative(const char c, + compiler_state * cs) { + switch (c) { + case 'D': { + const char digit_chars[] = "0123456789"; + strcpy(cs->blacklist, digit_chars); + *cs->is_negative = true; + return sizeof(digit_chars)-1; + }; + } + + return 0; +} + +//static int compile_hologram(char * hologram, char * whitelist) { +// if (hologram[0] == '\\') { +// switch (hologram[1]) { +// case '<': { +// const char very_word_chars[] = "abcdefghijklmnopqrstuwxyz" +// "ABCDEFGHIJKLMNOPQRSTUWXYZ" +// "_"; +// strcat(whitelist, very_word_chars); +// is_negative = true; +// HOOK_ALL(0, whitelist, 0) +// } break; +// } +// } +//} + +static int compile_dot(compiler_state * cs) { + *cs->do_catch = true; + return true; +} + +static int compile_escape(const char c, + compiler_state * cs) { + + return escape_1_to_1(c, cs) + || escape_1_to_N(c, cs) + || escape_to_negative(c, cs) + //|| compile_hologram(*s, whitelist) + ; +} + +static int compile_range(const char * const range, + compiler_state * cs) { + assert((range[0] == '[') && "Not a range."); + + char * target_list = (*cs->is_negative) ? cs->blacklist : cs->whitelist; + + const char * s; + if (range[1] == '^') { + *cs->is_negative = true; + s = range + 2; + } else { + s = range + 1; + } + for (; *s != ']'; s++) { + assert((*s != '\0') && "Unclosed range."); + char c = *s; + if (c == '\\') { + s += 1; + assert(compile_escape(*s, cs) && "Unknown escape."); + } else if (*(s+1) == '-') { + char end = *(s+2); + assert((c < end) && "Endless range."); + for (char cc = c; cc < end+1; cc++) { + strncat(target_list, &cc, 1); + strncat(target_list, "\0", 1); + } + s += 2; + } else { + strncat(target_list, &c, 1); + } + } + + return ((s - range) + 1); +} + +void filter_blacklist(const char * whitelist, + const char * blacklist, + char * filtered) { + for (; *blacklist != '\0'; blacklist++) { + for(; *whitelist != '\0'; whitelist++) { + if (*blacklist == *whitelist) { + goto long_continue; + } + } + strncat(filtered, blacklist, 1); + long_continue:; + } +} + +#define HALT_AND_CATCH_FIRE INT_MIN + +void HOOK_ALL( int from, + const char * const str, + int to, + compiler_state * cs) { + + int hook_to = (to == HALT_AND_CATCH_FIRE) ? HALT_AND_CATCH_FIRE : ((*cs->state) + to); + + + for (const char * s = str; *s != '\0'; s++) { + delta_t * delta = malloc(sizeof(delta_t)); + delta->in = *cs->state + from; + delta->input = *s; + delta->to = hook_to; + delta->width = *cs->width; + vector_push(&cs->regex->delta_table, + &delta); + } +} + +void ABSOLUTE_OFFSHOOT(int from, + int to, + int width, + compiler_state * cs) { + offshoot_t * offshoot = malloc(sizeof(offshoot_t)); + offshoot->in = from; + offshoot->to = to; + offshoot->width = width; + vector_push(&cs->regex->catch_table, + &offshoot); +} + +void OFFSHOOT(int from, + int to, + int width, + compiler_state * cs) { + ABSOLUTE_OFFSHOOT(*cs->state + from, *cs->state + to, width, cs); +} + +regex_t * regex_compile(const char * const pattern) { + regex_t * regex = (regex_t *)malloc(sizeof(regex_t)); + regex->str = strdup(pattern); + vector_init(®ex->delta_table, sizeof(delta_t*), 0UL); + vector_init(®ex->catch_table, sizeof(offshoot_t*), 0UL); + + int state = 2; + + bool do_catch; + bool is_negative; + bool do_loop_hook; + bool do_follow_hook; + bool do_loop_shoot; + bool do_follow_shoot; + int width; + char whitelist[64]; + char blacklist[64]; + + compiler_state cs = { + .do_catch = &do_catch, + .is_negative = &is_negative, + .state = &state, + .width = &width, + .whitelist = whitelist, + .blacklist = blacklist, + .regex = regex, + }; + + for (const char * s = pattern; *s != '\00';) { + // Reset the compiler + assert(!is_quantifier(*pattern) && "Pattern starts with quantifier."); + whitelist[0] = '\0'; + blacklist[0] = '\0'; + do_catch = false; + is_negative = false; + do_loop_hook = false; + do_follow_hook = false; + do_loop_shoot = false; + do_follow_shoot = false; + width = 1; + + // Translate char + switch (*s) { + case '^': { + if (s == pattern) { + ABSOLUTE_OFFSHOOT(0, 2, 0, &cs); + ABSOLUTE_OFFSHOOT(1, HALT_AND_CATCH_FIRE, 0, &cs); + } + whitelist[0] = '\n'; + whitelist[1] = '\0'; + HOOK_ALL(0, whitelist, 0, &cs); + if (s != pattern) { + state += 1; + } + s += 1; + goto long_continue; + } break; + case '.': { + compile_dot(&cs); + } break; + case '\\': { + s += 1; + assert(compile_escape(*s, &cs) && "Unknown escape."); + } break; + case '[': { + s += compile_range(s, &cs) - 1; + } break; + default: { + whitelist[0] = *s; + whitelist[1] = '\0'; + } break; + } + + s += 1; + + // Compile with quantifier + switch (*s) { + case '=': + case '?': { + do_loop_hook = true; + HOOK_ALL(0, whitelist, +1, &cs); + if (do_catch || is_negative) { + OFFSHOOT(0, +1, 1, &cs); + } + s += 1; + } break; + case '*': { + HOOK_ALL(0, whitelist, 0, &cs); + if (do_catch) { + OFFSHOOT(0, +1, 1, &cs); + } else if (is_negative) { + OFFSHOOT(0, 0, 1, &cs); + } + s += 1; + } break; + case '+': { + HOOK_ALL(0, whitelist, +1, &cs); + if (do_catch || is_negative) { + OFFSHOOT(0, +1, 1, &cs); + } + state += 1; + HOOK_ALL(0, whitelist, 0, &cs); + if (do_catch || is_negative) { + OFFSHOOT(0, 0, 1, &cs); + } + s += 1; + } break; + default: { // Literal + HOOK_ALL(0, whitelist, +1, &cs); + if (do_catch || is_negative) { + OFFSHOOT(0, +1, 1, &cs); + } + state += 1; + } break; + } + + // Compile blacklist + if (*blacklist) { + char filtered_blacklist[64]; + filtered_blacklist[0] = '\0'; + filter_blacklist(whitelist, blacklist, filtered_blacklist); + HOOK_ALL(0, filtered_blacklist, HALT_AND_CATCH_FIRE, &cs); + } + long_continue:; + } + + regex->accepting_state = state; + + return regex; +} + +int regex_free(regex_t * const regex) { + free(regex->str); + vector_free(®ex->delta_table); + vector_free(®ex->catch_table); + free(regex); + return 0; +} + + + +// ----------------- +// ### Searching ### +// ----------------- +static int catch_(const regex_t * const regex, + int * const state) { + for (size_t i = 0; i < regex->catch_table.element_count; i++){ + const offshoot_t * const offshoot = *(offshoot_t**)vector_get(®ex->catch_table, i); + if (offshoot->in == *state) { + *state = offshoot->to; + return offshoot->width; + } + } + return HALT_AND_CATCH_FIRE; +} + +static int regex_assert(const regex_t * const regex, + const char * const string, + const int string_offset, + int state, + int width) { // XXX: im pretty sure this is actually redundant and the width should be calculated from string - s + for (const char * s = (string + string_offset); *s != '\00';) { + // delta + for (size_t i = 0; i < regex->delta_table.element_count; i++) { + const delta_t * const delta = *(delta_t**)vector_get(®ex->delta_table, i); + if ((delta->in == state) + && (delta->input == *s)) { + int r = regex_assert(regex, string, (s - string) + delta->width, delta->to, width + 1); + if(r){ + return r; + } + } + } + + const int catch_width = catch_(regex, &state); + if ((catch_width != HALT_AND_CATCH_FIRE) + && (state != HALT_AND_CATCH_FIRE)) { + s += catch_width; + continue; + } + + return (state == regex->accepting_state) ? width : false; + } + + return false; +} + +int regex_match( regex_t * regex, + const char * const string, + const bool is_start_of_string, + const int string_offset) { // XXX: remove this useless piece of shit of a parameter nigger + if (regex == NULL) { + return false; + } + if (string == NULL) { + return true; + } + + const int initial_state = (int)(!is_start_of_string); + + return regex_assert(regex, string, string_offset, initial_state, 0); +} + +bool regex_search( regex_t * regex, + const char * const string) { + + return (bool)regex_match(regex, string, true, 0); +} diff --git a/source/hl.c b/source/hl.c index 5c415d9..c50d7d3 100644 --- a/source/hl.c +++ b/source/hl.c @@ -193,8 +193,8 @@ int token_fits(const token_t * const token, return 0; } - const int r = matches->width; - match_offset = matches->position; + const int r = matches->width; + *match_offset = matches->position; free(matches); @@ -216,7 +216,7 @@ void render_string(const char * const string, result_t * const r = (result_t *)malloc(sizeof(result_t) * 1024); // XXX: dont int rrs = 0; - for (int i = 0; i < token_table.element_count; i++) { + for (size_t i = 0; i < token_table.element_count; i++) { token_t * t = *(token_t**)vector_get(&token_table, i); match_t * match = regex_match(t->syntax, string, true); diff --git a/source/jeger.c b/source/jeger.c index c244fe2..5074182 100644 --- a/source/jeger.c +++ b/source/jeger.c @@ -1,4 +1,4 @@ -#if __cplusplus +#ifdef __cplusplus # pragma GCC diagnostic ignored "-Wc++20-extensions" #endif From 19962b577232dc105c93db81cbef2732c96f1370 Mon Sep 17 00:00:00 2001 From: Emil Date: Wed, 20 Sep 2023 22:48:34 +0000 Subject: [PATCH 2/5] use object/ --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4f5cb81..60d8000 100644 --- a/Makefile +++ b/Makefile @@ -20,7 +20,7 @@ include chad.mk SRC.dir := source OBJ.dir := object -SRC:=hl.c jeger.c main.c terminal.c vector.c +SRC:=$(addprefix $(SRC.dir)/,hl.c jeger.c main.c terminal.c vector.c) HDR:=chad.h hl.h jeger.h terminal.h vector.h OBJ:=$(subst $(SRC.dir),$(OBJ.dir),$(SRC:.c=.o)) From 757ab99f3eed95001504f5dfc8db96b4b2158868 Mon Sep 17 00:00:00 2001 From: Emil Date: Wed, 20 Sep 2023 22:52:20 +0000 Subject: [PATCH 3/5] Gemmy indention error, tab plague continues to surge --- source/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/main.c b/source/main.c index bf1e043..9f51c64 100644 --- a/source/main.c +++ b/source/main.c @@ -63,7 +63,7 @@ get_stdin(void) /* TODO: fix the shit going on with syntax/c.h , replace with a function, * and ideally how make it hotswappable. */ int -main(int argc, +main(int argc, char ** argv) { int arg = 0; int syn = 0; From ac4a07e9d425258b120c99af6147c654bc18eb3a Mon Sep 17 00:00:00 2001 From: Emil Date: Wed, 20 Sep 2023 23:49:36 +0000 Subject: [PATCH 4/5] Improved main.c and syntax for the sake of the future children of the world Removed division from get_stdin, and made it fail properly on failed allocation Fixed the retard brain #include syntax/c.h shit (god I hope I didn't break highlightlighting I'm not - I checked and it outputs the same under the last commit, it's probably fine anon'll fix it what a cuck) Much better input handling, properly using perror and handling multible files even under a noexist condition, probably fixed a seggy on noexist condition --- include/syntax/c.h | 46 +++++++++++++++------------- include/syntax/syntax.h | 6 ++++ source/main.c | 81 +++++++++++++++++++------------------------------ 3 files changed, 62 insertions(+), 71 deletions(-) create mode 100644 include/syntax/syntax.h diff --git a/include/syntax/c.h b/include/syntax/c.h index dffb5a9..9624072 100644 --- a/include/syntax/c.h +++ b/include/syntax/c.h @@ -1,23 +1,27 @@ -const char * c_keywords[] = { - "register", "volatile", "auto", "const", "static", "extern", "if", "else", - "do", "while", "for", "continue", "switch", "case", "default", "break", - "enum", "union", "struct", "typedef", "goto", "void", "return", "sizeof", - "char", "short", "int", "long", "signed", "unsigned", "float", "double", - NULL -}; +void +highlight_c(void) +{ + const char * c_keywords[] = { + "register", "volatile", "auto", "const", "static", "extern", "if", "else", + "do", "while", "for", "continue", "switch", "case", "default", "break", + "enum", "union", "struct", "typedef", "goto", "void", "return", "sizeof", + "char", "short", "int", "long", "signed", "unsigned", "float", "double", + NULL + }; -const char * preprocessor_keywords[] = { - "#include", "#pragma", "#define", "#undef", "#ifdef", "#ifndef", "#elifdef", "#elifndef", - "#if", "#elif", "#else", "#endif", "#embed", "#line", "#error", "#warning", - NULL -}; + const char * preprocessor_keywords[] = { + "#include", "#pragma", "#define", "#undef", "#ifdef", "#ifndef", "#elifdef", "#elifndef", + "#if", "#elif", "#else", "#endif", "#embed", "#line", "#error", "#warning", + NULL + }; -new_char_tokens("+-&|.()[]{}", operator_hl); -new_keyword_tokens(c_keywords, control_hl); -new_keyword_tokens(preprocessor_keywords, special_hl); -new_region_token("/\\*", "\\*/", comment_hl); -new_region_token("//", "\\n", comment_hl); -new_region_token("\"", "\"", string_literal_hl); -new_region_token("<", ">", string_literal_hl); -new_keyword_token("keyword", special_hl); -new_keyword_token("while", operator_hl); + new_char_tokens("+-&|.()[]{}", operator_hl); + new_keyword_tokens(c_keywords, control_hl); + new_keyword_tokens(preprocessor_keywords, special_hl); + new_region_token("/\\*", "\\*/", comment_hl); + new_region_token("//", "\\n", comment_hl); + new_region_token("\"", "\"", string_literal_hl); + new_region_token("<", ">", string_literal_hl); + new_keyword_token("keyword", special_hl); + new_keyword_token("while", operator_hl); +} diff --git a/include/syntax/syntax.h b/include/syntax/syntax.h new file mode 100644 index 0000000..b19ca37 --- /dev/null +++ b/include/syntax/syntax.h @@ -0,0 +1,6 @@ +#ifndef SYNTAX_H_ + +#include "c.h" + +#define SYNTAX_H_ +#endif diff --git a/source/main.c b/source/main.c index 9f51c64..b21d9d9 100644 --- a/source/main.c +++ b/source/main.c @@ -9,13 +9,14 @@ #include #include "terminal.h" +#include "syntax/syntax.h" #define ALLOCATION_CHUNK (128UL) static const char * argv0; static char * -slurp(const char * fn) +read_entire_file(const char * fn) { FILE * fp = fopen(fn, "r"); if (fp) @@ -26,8 +27,7 @@ slurp(const char * fn) len = ftell(fp); rewind(fp); b = malloc(len + 1); - if (b && fread(b, 1, len, fp)) - { + if (b && fread(b, 1, len, fp)) { b[len] = '\0'; } fclose(fp); @@ -41,88 +41,69 @@ static char * get_stdin(void) { size_t buffer_size = 0; + size_t n = 1; char * buffer = malloc(ALLOCATION_CHUNK); + if (!buffer) + { return NULL; } do { - if (!((buffer_size + 1) % ALLOCATION_CHUNK)) { - buffer = realloc(buffer, (((buffer_size + 1) / ALLOCATION_CHUNK) + 1) * ALLOCATION_CHUNK); + if (buffer_size + 1 >= (ALLOCATION_CHUNK * n)) { + buffer = realloc(buffer, ALLOCATION_CHUNK * ++n + 1); + if (!buffer) + { return NULL; } + buffer[ALLOCATION_CHUNK * n] = '\0'; } - buffer[buffer_size] = '\0'; if (read(STDIN_FILENO, &buffer[buffer_size], sizeof (*buffer)) == -1) { free(buffer); - fprintf(stderr, "%s: Failed to read from STDIN\n", argv0); + fprintf(stderr, "%s: Failed to read from stdin\n", argv0); return NULL; } - ++buffer_size; - } while (buffer[buffer_size - 1]); + } while (buffer[buffer_size++]); buffer[buffer_size - 1] = '\0'; return buffer; } -/* TODO: fix the shit going on with syntax/c.h , replace with a function, - * and ideally how make it hotswappable. */ int main(int argc, char ** argv) { int arg = 0; - int syn = 0; + int ret = 0; char * buffer = NULL; - argv0 = argv[0]; - terminal_hl_init(); - + highlight_c(); /* this mustn't break overrides (but definitely does) */ while (++argv, - --argc) - { - if (**argv == '-') - { - syn = 1; - /* fprintf(stderr, "handle '%s'\n", *argv+1); */ - /* lazy as hell, TODO use uthash */ - if (strcmp(*argv+1, "c") == 0) - { - #include "syntax/c.h" + --argc) { + if (**argv == '-') { + /* TODO use uthash */ + if (strcmp(*argv+1, "c") == 0) { + highlight_c(); } - else - { + else { fprintf(stderr, "%s: Unimplemented syntax '%s'\n", argv0, *argv+1); return 1; } } - else - { - if (!syn) - { - #include "syntax/c.h" - } + else { free(buffer); arg = 1; - buffer = slurp(*argv); - render_string(buffer, "cterm"); - if (!buffer) - { - perror(argv0); - return 1; + buffer = read_entire_file(*argv); + if (!buffer) { + fprintf(stderr,"%s: cannot access '%s': ", argv0, *argv); + perror(NULL); + ret = 2; } + else + { render_string(buffer, "cterm"); } } } - if (!arg) - { - if (!syn) - { - #include "syntax/c.h" - } + if (!arg) { buffer = get_stdin(); render_string(buffer, "cterm"); } - fflush(stdout); hl_deinit(); free(buffer); - - //terminal_hl_deinit(); - - return 0; + return ret; } From 8392de4e54e350be8cce0f03a23185fd59513e1b Mon Sep 17 00:00:00 2001 From: Emil Date: Wed, 20 Sep 2023 23:53:33 +0000 Subject: [PATCH 5/5] NOTABSWAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA --- include/syntax/c.h | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/include/syntax/c.h b/include/syntax/c.h index 9624072..3356bdb 100644 --- a/include/syntax/c.h +++ b/include/syntax/c.h @@ -1,27 +1,27 @@ void highlight_c(void) { - const char * c_keywords[] = { - "register", "volatile", "auto", "const", "static", "extern", "if", "else", - "do", "while", "for", "continue", "switch", "case", "default", "break", - "enum", "union", "struct", "typedef", "goto", "void", "return", "sizeof", - "char", "short", "int", "long", "signed", "unsigned", "float", "double", - NULL - }; + const char * c_keywords[] = { + "register", "volatile", "auto", "const", "static", "extern", "if", "else", + "do", "while", "for", "continue", "switch", "case", "default", "break", + "enum", "union", "struct", "typedef", "goto", "void", "return", "sizeof", + "char", "short", "int", "long", "signed", "unsigned", "float", "double", + NULL + }; - const char * preprocessor_keywords[] = { - "#include", "#pragma", "#define", "#undef", "#ifdef", "#ifndef", "#elifdef", "#elifndef", - "#if", "#elif", "#else", "#endif", "#embed", "#line", "#error", "#warning", - NULL - }; + const char * preprocessor_keywords[] = { + "#include", "#pragma", "#define", "#undef", "#ifdef", "#ifndef", "#elifdef", "#elifndef", + "#if", "#elif", "#else", "#endif", "#embed", "#line", "#error", "#warning", + NULL + }; - new_char_tokens("+-&|.()[]{}", operator_hl); - new_keyword_tokens(c_keywords, control_hl); - new_keyword_tokens(preprocessor_keywords, special_hl); - new_region_token("/\\*", "\\*/", comment_hl); - new_region_token("//", "\\n", comment_hl); - new_region_token("\"", "\"", string_literal_hl); - new_region_token("<", ">", string_literal_hl); - new_keyword_token("keyword", special_hl); - new_keyword_token("while", operator_hl); + new_char_tokens("+-&|.()[]{}", operator_hl); + new_keyword_tokens(c_keywords, control_hl); + new_keyword_tokens(preprocessor_keywords, special_hl); + new_region_token("/\\*", "\\*/", comment_hl); + new_region_token("//", "\\n", comment_hl); + new_region_token("\"", "\"", string_literal_hl); + new_region_token("<", ">", string_literal_hl); + new_keyword_token("keyword", special_hl); + new_keyword_token("while", operator_hl); }