diff --git a/Makefile b/Makefile index 816bb8c..a4108e5 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ TARGET:=hl -CFLAGS:=-std=c99 +CFLAGS:=-std=c99 -Wall -Wextra -Wpedantic -Wshadow -Wundef CPPFLAGS:=-Iinclude -D_GNU_SOURCE -D_FORTIFY_SOURCE=2 DEBUG=1 @@ -29,8 +29,8 @@ VPATH=${SRC.dir} ${OBJ.dir} ${OBJ.dir}/%.o: ${SRC.dir}/%.c ${COMPILE.c} $< -o $@ -${TARGET}: ${HDR} | ${OBJ} - ${LINK.c} $| -o $@ +${TARGET}: ${OBJ} | ${HDR} + ${LINK.c} $+ -o $@ ${SRC} ${HDR}: @@ -42,7 +42,6 @@ uninstall: clean: -rm ${OBJ} ${TARGET} - -rm *.out test: chad_test diff --git a/include/hl.h b/include/hl.h index 00ed239..dbad3b2 100644 --- a/include/hl.h +++ b/include/hl.h @@ -1,6 +1,4 @@ -/* hl.h - * Copyright 2023 Anon Anonson, Ognjen 'xolatile' Milan Robovic, Emil Williams - * SPDX Identifier: GPL-3.0-only / NO WARRANTY / NO GUARANTEE */ +#ifndef HL_H_ #include #include @@ -50,6 +48,15 @@ extern hl_group_t * keyword_hl; extern hl_group_t * preprocessor_hl; extern hl_group_t * symbol_hl; +extern hl_group_t * special_hl; +extern hl_group_t * control_hl; +extern hl_group_t * keyword_hl; +extern hl_group_t * block_hl; +extern hl_group_t * separator_hl; +extern hl_group_t * operator_hl; +extern hl_group_t * comment_hl; +extern hl_group_t * string_literal_hl; + extern void new_display_mode(display_t * mode); extern int free_token(token_t * token); extern int append_token(token_t * token); @@ -75,6 +82,10 @@ extern token_t * new_token(const char * const word, const token_type_t t, hl_group_t * const g); +extern token_t * new_region_token(const char * start, + const char * end, + hl_group_t * g); + // TODO: ALIGN PROPERLY... extern int token_fits(const token_t * const token, @@ -89,248 +100,5 @@ extern void render_string(const char * const string, extern int hl_init(void); extern int hl_deinit(void); -// GLOBALS - -vector_t token_table = { - .data = NULL, - .element_size = sizeof(token_t *), - .element_count = 0UL -}; - -display_t * display_table = NULL; - -// -------------------------------- -// ### Constructors/Destructors ### -// -------------------------------- - -void new_display_mode(display_t * mode) { - HASH_ADD_STR(display_table, - key, - mode); -} - -int free_token(token_t * token) { - free(token->hl); - regex_free(token->syntax); - - return 0; -} - -int append_token(token_t * token) { - vector_push(&token_table, &token); - - return 0; -} - -token_t * new_symbol_token(const char * const c, - hl_group_t * const g) { - - token_t * mt = (token_t*)malloc(sizeof(token_t)); - - mt->hl = g; - mt->t = KEYSYMBOL; - mt->syntax = regex_compile(c); - - append_token(mt); - - return mt; -} - -int new_symbol_tokens(const char * const * symbols, - hl_group_t * const g) { - int i = 0; - - while (*symbols) { - if(new_symbol_token(*symbols, g)) { - ++i; - } else { - assert(!(bool)"Kinda failed to new symbol token thing."); - } - ++symbols; - } - - return i; -} - -int new_char_tokens(const char * str, - hl_group_t * const g) { - int i = 0; - - char buffer[3]; - buffer[0] = '\\'; - buffer[2] = '\0'; - - for(const char * s = str; *s != '\0'; s++) { - buffer[1] = *s; - if(new_symbol_token(is_magic(*s) ? buffer : buffer + 1, g)) { - ++i; - } else { - assert(!(bool)"Kinda failed to new char token thing."); - } - } - - return i; -} - -token_t * new_keyword_token(const char * const word, - hl_group_t * const g) { - size_t word_length = strlen(word); - char * new_word = (char*)malloc(word_length + 4 + 1); - - memcpy(new_word, "\\<", 2); - memcpy(new_word + 2, word, word_length); - strcpy(new_word + 2 + word_length, "\\>"); - - token_t * mt = (token_t*)malloc(sizeof(token_t)); - - mt->hl = g; - mt->t = KEYWORD; - mt->syntax = regex_compile(new_word); - - append_token(mt); - - return mt; -} - -int new_keyword_tokens(const char * const * words, - hl_group_t * const g) { - int i = 0; - - while (*words) { - if(new_keyword_token(*words, g)) { - ++i; - } - ++words; - } - - return i; -} - -token_t * new_region_token(const char * start, - const char * end, - hl_group_t * g) { - char buffer[100]; - buffer[0] = '\0'; - strcat(buffer, start); - strcat(buffer, "[\\d\\D]*"); - strcat(buffer, end); - - token_t * mt = (token_t*)malloc(sizeof(token_t)); - - mt->hl = g; - mt->t = KEYSYMBOL; - mt->syntax = regex_compile(buffer); - - append_token(mt); - - return mt; -} - -token_t * new_token(const char * const word, - const token_type_t t, - hl_group_t * const g) { - switch (t) { - case KEYSYMBOL: { - return new_symbol_token(word, g); - } - case KEYWORD: { - return new_keyword_token(word, g); - } - case MATCH: { - token_t * mt = (token_t*)malloc(sizeof(token_t)); - mt->hl = g; - mt->t = MATCH; - mt->syntax = regex_compile(word); - append_token(mt); - } break; - case REGION: { - } break; - } - - return NULL; -} - -// -------------------- -// ### Highlighting ### -// -------------------- - -// XXX: meditate on this shit -int token_fits(const token_t * const token, - const char * const to, - const int string_offset, - const bool is_start_of_line, - int * match_offset) { // XXX: rm this - UNUSED(match_offset); - //return regex_match(pattern, to, string_offset, match_offset); - match_t * m = regex_match(token->syntax, to, is_start_of_line, string_offset); - return (m ? m->width : 0);// XXX: nigger leaks -} - -void render_string(const char * const string, - const char * const mode) { - for (const char * s = string; *s != '\00';) { - int f = 0; - size_t token_index = 0; - int offset = 0; - - for (; token_index < token_table.element_count; token_index++) { - token_t * t = *(token_t**)vector_get(&token_table, - token_index); - const bool is_start_of_line = (s == string) || (*s == '\n'); - f = token_fits(t, string, (int)(s - string), is_start_of_line, &offset); - if (f) { - break; - } - } - // - display_t * display; - HASH_FIND_STR(display_table, - mode, - display); - // - if (f) { - for (int i = 0; i < offset; i++) { - token_t * t = *(token_t**)vector_get(&token_table, - token_index); - display->callback(s + i, - 0, - t->hl->attributes); - } - token_t * t = *(token_t**)vector_get(&token_table, - token_index); - display->callback(s + offset, - f, - t->hl->attributes); - s += f + offset; - } else { - display->callback(s, - 0, - NULL); - ++s; - } - } -} - -// ------------------------- -// ### Library Mangement ### -// ------------------------- -hl_group_t * special_hl = NULL; -hl_group_t * control_hl = NULL; -hl_group_t * keyword_hl = NULL; -hl_group_t * block_hl = NULL; -hl_group_t * separator_hl = NULL; -hl_group_t * operator_hl = NULL; -hl_group_t * comment_hl = NULL; -hl_group_t * string_literal_hl = NULL; - -int hl_init(void) { - return 0; -} - -int hl_deinit(void) { - for (size_t i = 0; i < token_table.element_count; i++) { - free_token(*(token_t**)vector_get(&token_table, i)); - } - - return 0; -} +#define HL_H_ +#endif diff --git a/include/regex.h b/include/regex.h index a93d5ae..19af105 100644 --- a/include/regex.h +++ b/include/regex.h @@ -1,5 +1,4 @@ #ifndef REGEX_H -#define REGEX_H #include @@ -21,4 +20,5 @@ extern int regex_match(regex_t * regex, const char * const string, const b extern bool is_magic(const char c); +#define REGEX_H #endif diff --git a/include/terminal.h b/include/terminal.h new file mode 100644 index 0000000..8b1abf1 --- /dev/null +++ b/include/terminal.h @@ -0,0 +1,45 @@ +#ifndef TERMINAL_H_ + +#include "hl.h" + +// Terminal manipulation +#define TERMINAL_RESET "\033[0m" + +#define TERMINAL_COLOR_FG_BLACK "\033[30m" +#define TERMINAL_COLOR_FG_RED "\033[31m" +#define TERMINAL_COLOR_FG_GREEN "\033[32m" +#define TERMINAL_COLOR_FG_YELLOW "\033[33m" +#define TERMINAL_COLOR_FG_BLUE "\033[34m" +#define TERMINAL_COLOR_FG_MAGENTA "\033[35m" +#define TERMINAL_COLOR_FG_CYAN "\033[36m" +#define TERMINAL_COLOR_FG_WHITE "\033[37m" + +#define TERMINAL_COLOR_BG_BLACK "\033[40m" +#define TERMINAL_COLOR_BG_RED "\033[41m" +#define TERMINAL_COLOR_BG_GREEN "\033[42m" +#define TERMINAL_COLOR_BG_YELLOW "\033[43m" +#define TERMINAL_COLOR_BG_BLUE "\033[44m" +#define TERMINAL_COLOR_BG_MAGENTA "\033[45m" +#define TERMINAL_COLOR_BG_CYAN "\033[46m" +#define TERMINAL_COLOR_BG_WHITE "\033[47m" + +#define TERMINAL_STYLE_BOLD "\033[1m" +#define TERMINAL_STYLE_ITALICS "\033[3m" +#define TERMINAL_STYLE_REVERSE "\033[7m" + +typedef struct { + const char * attribute; + const char * foreground_color; + const char * background_color; +} terminal_hl_t; + +extern display_t * cterm; + +extern void cterm_render_callback(const char * const string, + const int length, + void * const attributes); + +extern int terminal_hl_init(void); + +#define TERMINAL_H_ +#endif diff --git a/include/vector.h b/include/vector.h index 170e56b..cc7e52b 100644 --- a/include/vector.h +++ b/include/vector.h @@ -1,5 +1,4 @@ #ifndef VECTOR_H -#define VECTOR_H #include @@ -30,4 +29,5 @@ extern void vector_set(vector_t * vector, extern void vector_free(vector_t * vector); +#define VECTOR_H #endif diff --git a/source/hl.c b/source/hl.c new file mode 100644 index 0000000..eb66179 --- /dev/null +++ b/source/hl.c @@ -0,0 +1,247 @@ +#include "hl.h" + +#include + +vector_t token_table = { + .data = NULL, + .element_size = sizeof(token_t *), + .element_count = 0UL +}; + +display_t * display_table = NULL; + +// ------------------------- +// ### Library Mangement ### +// ------------------------- +hl_group_t * special_hl = NULL; +hl_group_t * control_hl = NULL; +hl_group_t * keyword_hl = NULL; +hl_group_t * block_hl = NULL; +hl_group_t * separator_hl = NULL; +hl_group_t * operator_hl = NULL; +hl_group_t * comment_hl = NULL; +hl_group_t * string_literal_hl = NULL; + +// -------------------------------- +// ### Constructors/Destructors ### +// -------------------------------- + +void new_display_mode(display_t * mode) { + HASH_ADD_STR(display_table, + key, + mode); +} + +int free_token(token_t * token) { + free(token->hl); + regex_free(token->syntax); + + return 0; +} + +int append_token(token_t * token) { + vector_push(&token_table, &token); + + return 0; +} + +token_t * new_symbol_token(const char * const c, + hl_group_t * const g) { + + token_t * mt = (token_t*)malloc(sizeof(token_t)); + + mt->hl = g; + mt->t = KEYSYMBOL; + mt->syntax = regex_compile(c); + + append_token(mt); + + return mt; +} + +int new_symbol_tokens(const char * const * symbols, + hl_group_t * const g) { + int i = 0; + + while (*symbols) { + if(new_symbol_token(*symbols, g)) { + ++i; + } else { + assert(!(bool)"Kinda failed to new symbol token thing."); + } + ++symbols; + } + + return i; +} + +int new_char_tokens(const char * str, + hl_group_t * const g) { + int i = 0; + + char buffer[3]; + buffer[0] = '\\'; + buffer[2] = '\0'; + + for(const char * s = str; *s != '\0'; s++) { + buffer[1] = *s; + if(new_symbol_token(is_magic(*s) ? buffer : buffer + 1, g)) { + ++i; + } else { + assert(!(bool)"Kinda failed to new char token thing."); + } + } + + return i; +} + +token_t * new_keyword_token(const char * const word, + hl_group_t * const g) { + //char * new_word = strdup(word); + //size_t word_length = strlen(word); + //char * new_word = (char*)malloc(word_length + 4 + 1); + + //memcpy(new_word, "\\<", 2); + //memcpy(new_word + 2, word, word_length); + //strcpy(new_word + 2 + word_length, "\\>"); + + token_t * mt = (token_t*)malloc(sizeof(token_t)); + + mt->hl = g; + mt->t = KEYWORD; + //mt->syntax = regex_compile(new_word); + mt->syntax = regex_compile(word); + + append_token(mt); + + return mt; +} + +int new_keyword_tokens(const char * const * words, + hl_group_t * const g) { + int i = 0; + + while (*words) { + if(new_keyword_token(*words, g)) { + ++i; + } + ++words; + } + + return i; +} + +token_t * new_region_token(const char * start, + const char * end, + hl_group_t * g) { + char buffer[100]; + buffer[0] = '\0'; + strcat(buffer, start); + strcat(buffer, "[\\d\\D]*"); + strcat(buffer, end); + + token_t * mt = (token_t*)malloc(sizeof(token_t)); + + mt->hl = g; + mt->t = KEYSYMBOL; + mt->syntax = regex_compile(buffer); + + append_token(mt); + + return mt; +} + +token_t * new_token(const char * const word, + const token_type_t t, + hl_group_t * const g) { + switch (t) { + case KEYSYMBOL: { + return new_symbol_token(word, g); + } + case KEYWORD: { + return new_keyword_token(word, g); + } + case MATCH: { + token_t * mt = (token_t*)malloc(sizeof(token_t)); + mt->hl = g; + mt->t = MATCH; + mt->syntax = regex_compile(word); + append_token(mt); + } break; + case REGION: { + } break; + } + + return NULL; +} + +// -------------------- +// ### Highlighting ### +// -------------------- + +int token_fits(const token_t * const token, + const char * const to, + const int string_offset, + const bool is_start_of_line, + int * match_offset) { + UNUSED(match_offset); + //return regex_match(pattern, to, string_offset, match_offset); + return regex_match(token->syntax, to, is_start_of_line, string_offset); +} + +void render_string(const char * const string, + const char * const mode) { + for (const char * s = string; *s != '\00';) { + int f = 0; + size_t token_index = 0; + int offset = 0; + + for (; token_index < token_table.element_count; token_index++) { + token_t * t = *(token_t**)vector_get(&token_table, + token_index); + const bool is_start_of_line = (s == string) || (*s == '\n'); + f = token_fits(t, string, (int)(s - string), is_start_of_line, &offset); + if (f) { + break; + } + } + // + display_t * display; + HASH_FIND_STR(display_table, + mode, + display); + // + if (f) { + for (int i = 0; i < offset; i++) { + token_t * t = *(token_t**)vector_get(&token_table, + token_index); + display->callback(s + i, + 0, + t->hl->attributes); + } + token_t * t = *(token_t**)vector_get(&token_table, + token_index); + display->callback(s + offset, + f, + t->hl->attributes); + s += f + offset; + } else { + display->callback(s, + 0, + NULL); + ++s; + } + } +} + +int hl_init(void) { + return 0; +} + +int hl_deinit(void) { + for (size_t i = 0; i < token_table.element_count; i++) { + free_token(*(token_t**)vector_get(&token_table, i)); + } + + return 0; +} diff --git a/source/main.c b/source/main.c index 43359e5..c499a93 100644 --- a/source/main.c +++ b/source/main.c @@ -8,38 +8,116 @@ #include #include -#include "terminal_hl.h" +#include "terminal.h" #define ALLOCATION_CHUNK (10UL) -static char * buffer = NULL; -static size_t buffer_size = 0; +static const char * argv0; -int main(void) { - // Buffer init - buffer = realloc(buffer, ALLOCATION_CHUNK); +static char * +slurp(const char * fn) +{ + FILE * fp = fopen(fn, "r"); + if (fp) + { + size_t len; + char * b; + fseek(fp, 0, SEEK_END); + len = ftell(fp); + rewind(fp); + b = malloc(len + 1); + if (b && fread(b, 1, len, fp)) + { + b[len] = '\0'; + } + fclose(fp); + return b; + } + else + { return NULL; } +} +static char * +get_stdin(void) +{ + size_t buffer_size = 0; + char * buffer = malloc(ALLOCATION_CHUNK); do { if (!((buffer_size + 1) % ALLOCATION_CHUNK)) { - size_t chunks = (buffer_size + 1) / ALLOCATION_CHUNK; - buffer = realloc(buffer, ++chunks * ALLOCATION_CHUNK); + buffer = realloc(buffer, ((buffer_size + 1) / ALLOCATION_CHUNK + 1) * ALLOCATION_CHUNK); } buffer[buffer_size] = '\0'; - /* TODO handle me */ - assert(read(STDIN_FILENO, &buffer[buffer_size], sizeof (*buffer)) != -1); + if (read(STDIN_FILENO, &buffer[buffer_size], sizeof (*buffer)) == -1) + { + free(buffer); + fprintf(stderr, "%s: Failed to read from STDIN\n", argv0); + return NULL; + } ++buffer_size; } while (buffer[buffer_size - 1]); buffer[buffer_size - 1] = '\0'; + return buffer; +} + +/* TODO: fix the shit going on with syntax/c.h , replace with a function, + * and ideally how make it hotswappable. */ +int +main(int argc, + char ** argv) { + int arg = 0; + int syn = 0; + char * buffer = NULL; + + argv0 = argv[0]; - // Highlight init terminal_hl_init(); - // - #include "syntax/c.h" - // - render_string(buffer, "cterm"); - putchar('\n'); + while (++argv, + --argc) + { + if (**argv == '-') + { + syn = 1; + /* fprintf(stderr, "handle '%s'\n", *argv+1); */ + /* lazy as hell, TODO use uthash */ + if (strcmp(*argv+1, "c") == 0) + { + #include "syntax/c.h" + } + else + { + fprintf(stderr, "%s: Unimplemented syntax '%s'\n", argv0, *argv+1); + return 1; + } + } + else + { + if (!syn) + { + #include "syntax/c.h" + } + free(buffer); + arg = 1; + buffer = slurp(*argv); + render_string(buffer, "cterm"); + if (!buffer) + { + perror(argv0); + return 1; + } + } + } + if (!arg) + { + if (!syn) + { + #include "syntax/c.h" + } + buffer = get_stdin(); + render_string(buffer, "cterm"); + } + fflush(stdout); //hl_deinit(); free(buffer); diff --git a/source/regex.c b/source/regex.c index 23db44d..de553b2 100644 --- a/source/regex.c +++ b/source/regex.c @@ -564,6 +564,7 @@ regex_t * regex_compile(const char * const pattern) { long_continue: cs.is_at_the_beginning = false; + long_continue:; } regex->accepting_state = state; diff --git a/include/terminal_hl.h b/source/terminal.c similarity index 53% rename from include/terminal_hl.h rename to source/terminal.c index 6bbff84..1583279 100644 --- a/include/terminal_hl.h +++ b/source/terminal.c @@ -1,43 +1,4 @@ -#include "hl.h" - -// Terminal manipulation -#define TERMINAL_RESET "\033[0m" - -#define TERMINAL_COLOR_FG_BLACK "\033[30m" -#define TERMINAL_COLOR_FG_RED "\033[31m" -#define TERMINAL_COLOR_FG_GREEN "\033[32m" -#define TERMINAL_COLOR_FG_YELLOW "\033[33m" -#define TERMINAL_COLOR_FG_BLUE "\033[34m" -#define TERMINAL_COLOR_FG_MAGENTA "\033[35m" -#define TERMINAL_COLOR_FG_CYAN "\033[36m" -#define TERMINAL_COLOR_FG_WHITE "\033[37m" - -#define TERMINAL_COLOR_BG_BLACK "\033[40m" -#define TERMINAL_COLOR_BG_RED "\033[41m" -#define TERMINAL_COLOR_BG_GREEN "\033[42m" -#define TERMINAL_COLOR_BG_YELLOW "\033[43m" -#define TERMINAL_COLOR_BG_BLUE "\033[44m" -#define TERMINAL_COLOR_BG_MAGENTA "\033[45m" -#define TERMINAL_COLOR_BG_CYAN "\033[46m" -#define TERMINAL_COLOR_BG_WHITE "\033[47m" - -#define TERMINAL_STYLE_BOLD "\033[1m" -#define TERMINAL_STYLE_ITALICS "\033[3m" -#define TERMINAL_STYLE_REVERSE "\033[7m" - -typedef struct { - const char * attribute; - const char * foreground_color; - const char * background_color; -} terminal_hl_t; - -extern display_t * cterm; - -extern void cterm_render_callback(const char * const string, - const int length, - void * const attributes); - -extern int terminal_hl_init(void); +#include "terminal.h" display_t * cterm = &(display_t) { .key = "cterm",