From e52cb513c91f803375cdcb68ed10659165adeddb Mon Sep 17 00:00:00 2001 From: xolatile Date: Thu, 16 Nov 2023 15:10:49 -0500 Subject: [PATCH] Work in progress, reimplementing selection algorithm... --- chapter/chapter_4.c | 170 +++++++++++++++++++++++++++++++++------------------- compile.sh | 2 +- program/example.c | 33 ++++++++++ xhartae.c | 13 +--- 4 files changed, 142 insertions(+), 76 deletions(-) create mode 100644 program/example.c diff --git a/chapter/chapter_4.c b/chapter/chapter_4.c index bcc545a..87822a4 100644 --- a/chapter/chapter_4.c +++ b/chapter/chapter_4.c @@ -40,30 +40,41 @@ void preview_unhighlighted_text_file (char * text_file, int x, int y) { text_data = deallocate (text_data); } @ + +So, lets write very basic C programming language syntax highlighting, explain how can we easily do it in little more than 100 lines of (scarily verbose and nicely aligned) code +and why we don't need regular expressions for it. You can use these 'syntax_*' functions to tokenize some source code, highlight the syntax of it or something else that I didn't +even think about if you're creative. Of course, we can use it to highlight syntax of some other programming language, not only C, and we'll use it later to highlight assembly, +Ada, C++, and maybe few more programming languages. */ -static int syntax_count = 0; -static int syntax_active = 0; -static int * syntax_enrange = NULL; -static int * syntax_derange = NULL; -static char * * syntax_begin = NULL; -static char * * syntax_end = NULL; -static char * syntax_escape = NULL; -static int * syntax_colour = NULL; -static int * syntax_effect = NULL; -static char * syntax_separator = NULL; +static int syntax_count = 0; // Number of previously defined syntax rules. +static int syntax_active = FALSE; // Syntax "library" or subprogram was initialized if this value is TRUE. +static int * syntax_enrange = NULL; // Syntax rule can start with any character from 'syntax_begin' if this value is TRUE. +static int * syntax_derange = NULL; // Syntax rule can start with any character from 'syntax_end' if this value is TRUE. +static char * * syntax_begin = NULL; // Strings containing valid character (sub)sequence for begining the scan. +static char * * syntax_end = NULL; // Strings containing valid character (sub)sequence for ending the scan. +static char * syntax_escape = NULL; // Escape sequence for the rule, useful for line-breaks in C macros and line-based languages. +static int * syntax_colour = NULL; // Colour for our token, these two could be completely independent, but I like to keep them here. +static int * syntax_effect = NULL; // Effect for our token. static void syntax_delete (void) { + int offset; + if (syntax_active == FALSE) { return; } - --syntax_count; + // We could reverse-loop through this without a local variable 'offset' using this approach, but I consider this bad for readability. + // --syntax_count; + // do { + // syntax_begin [syntax_count] = deallocate (syntax_begin [syntax_count]); + // syntax_end [syntax_count] = deallocate (syntax_end [syntax_count]); + // } while (--syntax_count != -1); - do { + for (offset = 0; offset < syntax_count; ++offset) { syntax_begin [syntax_count] = deallocate (syntax_begin [syntax_count]); syntax_end [syntax_count] = deallocate (syntax_end [syntax_count]); - } while (--syntax_count != -1); + } syntax_enrange = deallocate (syntax_enrange); syntax_derange = deallocate (syntax_derange); @@ -72,7 +83,6 @@ static void syntax_delete (void) { syntax_escape = deallocate (syntax_escape); syntax_colour = deallocate (syntax_colour); syntax_effect = deallocate (syntax_effect); - syntax_separator = deallocate (syntax_separator); syntax_active = FALSE; syntax_count = 0; @@ -98,15 +108,15 @@ static int syntax_define (int enrange, int derange, char * begin, char * end, ch syntax_colour = reallocate (syntax_colour, syntax_count * (int) sizeof (* syntax_colour)); syntax_effect = reallocate (syntax_effect, syntax_count * (int) sizeof (* syntax_effect)); - syntax_begin [syntax_count - 1] = allocate ((string_length (begin) + 1) * (int) sizeof (* * syntax_begin)); - syntax_end [syntax_count - 1] = allocate ((string_length (end) + 1) * (int) sizeof (* * syntax_end)); - syntax_enrange [syntax_count - 1] = enrange; syntax_derange [syntax_count - 1] = derange; syntax_escape [syntax_count - 1] = escape; syntax_colour [syntax_count - 1] = colour; syntax_effect [syntax_count - 1] = effect; + syntax_begin [syntax_count - 1] = allocate ((string_length (begin) + 1) * (int) sizeof (* * syntax_begin)); + syntax_end [syntax_count - 1] = allocate ((string_length (end) + 1) * (int) sizeof (* * syntax_end)); + string_copy (syntax_begin [syntax_count - 1], begin); string_copy (syntax_end [syntax_count - 1], end); @@ -114,43 +124,36 @@ static int syntax_define (int enrange, int derange, char * begin, char * end, ch } static int syntax_select (char * string, int * length) { - int offset = 0; - int select = 0; - - fatal_failure (syntax_active == FALSE, "Syntax is not active."); + int offset, subset, select; - fatal_failure (string == NULL, "syntax_select: String is null."); - fatal_failure (length == NULL, "syntax_select: Length is null."); + fatal_failure (syntax_active == FALSE, "syntax_select: Syntax is not active."); + fatal_failure (string == NULL, "syntax_select: String is null."); + fatal_failure (length == NULL, "syntax_select: Length is null."); - do { - if (syntax_enrange [select] == 0) { + for (select = offset = 0; select != syntax_count; ++select) { + if (syntax_enrange [select] == FALSE) { if (string_compare_limit (string, syntax_begin [select], string_length (syntax_begin [select])) != 0) { - if (syntax_derange [select] == 0) { - break; - } else { - if (character_compare_array (string [string_length (syntax_begin [select])], syntax_end [select]) != 0) { - break; - } - } + goto selected; } } else { - int subset = 0; - do { + for (subset = 0; subset != string_length (syntax_begin [select]); ++subset) { if (string [offset] == syntax_begin [select] [subset]) { goto selected; } - } while (++subset != (int) string_length (syntax_begin [select])); + } } - } while (++select != syntax_count); + } selected: - if (select == syntax_count) { + if (select >= syntax_count) { * length = 1; - return (0); + return (syntax_count); } + offset = 0; // TODO: Left of to fix this. Since I changed Xyntax, I need to adapt it. + do { ++offset; @@ -165,7 +168,7 @@ static int syntax_select (char * string, int * length) { return (select); } } else { - int subset = 0; + subset = 0; if (string_compare (syntax_end [select], "") == 0) { break; } do { @@ -173,51 +176,92 @@ static int syntax_select (char * string, int * length) { * length = offset; return (select); } - } while (++subset != (int) string_length (syntax_end [select])); + } while (++subset != string_length (syntax_end [select])); } } while (string [offset - 1] != '\0'); return (select); } +static void syntax_highlight_c (void) { + char * separators = ".,:;<=>+-*/%!&~^?|()[]{}'\" \t\r\n"; + + char * keywords [] = { + "register", "volatile", "auto", "const", "static", "extern", "if", "else", + "do", "while", "for", "continue", "switch", "case", "default", "break", + "enum", "union", "struct", "typedef", "goto", "void", "return", "sizeof", + "char", "short", "int", "long", "signed", "unsigned", "float", "double" + }; + + int word; + + syntax_define (FALSE, FALSE, "/*", "*/", '\0', COLOUR_GREY, EFFECT_BOLD); + syntax_define (FALSE, FALSE, "//", "\n", '\0', COLOUR_GREY, EFFECT_BOLD); + syntax_define (FALSE, FALSE, "#", "\n", '\\', COLOUR_PINK, EFFECT_NORMAL); + syntax_define (FALSE, FALSE, "'", "'", '\\', COLOUR_PINK, EFFECT_BOLD); + syntax_define (FALSE, FALSE, "\"", "\"", '\\', COLOUR_RED, EFFECT_NORMAL); + + for (word = 0; word != (int) (sizeof (keywords) / sizeof (keywords [0])); ++word) { + syntax_define (FALSE, TRUE, keywords [word], separators, '\0', COLOUR_BLUE, EFFECT_NORMAL); + } + + syntax_define (TRUE, FALSE, ".,:;<=>+*-/%!&~^?|()[]{}", "", '\0', COLOUR_BLUE, EFFECT_BOLD); + + syntax_define (TRUE, TRUE, "0123456789", separators, '\0', COLOUR_CYAN, EFFECT_BOLD); + syntax_define (TRUE, TRUE, "abcdefghijklmnopqrstuvwxyz", separators, '\0', COLOUR_WHITE, EFFECT_NORMAL); + syntax_define (TRUE, TRUE, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", separators, '\0', COLOUR_WHITE, EFFECT_BOLD); + syntax_define (TRUE, TRUE, "_", separators, '\0', COLOUR_GREY, EFFECT_BOLD); +} + void preview_c_file (char * text_file, int width, int height, int x, int y) { char * text_data; int reset_x = x; int reset_y = y; + (void) width; + (void) height; + + syntax_highlight_c (); + text_data = file_record (text_file); for (curses_active = 1; curses_active != 0; ) { - int offset, colour, effect, string; + int offset, select, length; curses_render_background (' ', COLOUR_WHITE, EFFECT_NORMAL); - x = reset_x; - y = reset_y; - - string = 0; - colour = COLOUR_WHITE; - effect = EFFECT_NORMAL; - - for (offset = 0; offset != string_length (text_data); ++offset) { - switch (text_data [offset]) { - case '"': - string = ! string; - colour = (string != 0) ? COLOUR_RED : COLOUR_WHITE; - effect = (string != 0) ? EFFECT_BOLD : EFFECT_NORMAL; - break; - default: - break; - } + x = reset_x; + y = reset_y; + select = syntax_count; + length = 0; + + for (offset = 0; offset < string_length (text_data); offset += length) { + int suboffset, colour, effect; + + select = syntax_select (& text_data [offset], & length); - curses_render_character (text_data [offset], colour, effect, x, y); + if (select >= syntax_count) { + colour = COLOUR_WHITE; + effect = EFFECT_NORMAL; + } else { + colour = syntax_colour [select]; + effect = syntax_effect [select]; + } - switch (text_data [offset]) { - case '\t': x += 8; break; - case '\n': y += 1; x = reset_x; break; - default: x += 1; break; + for (suboffset = 0; suboffset < length; ++suboffset) { + if (text_data [offset + suboffset] == CHARACTER_LINE_FEED) { + x = reset_x; + y += 1; + } else if (text_data [offset + suboffset] == CHARACTER_TAB_HORIZONTAL) { + x += 8; + } else { + curses_render_character (text_data [offset + suboffset], colour, effect, x, y); + x += 1; + } } + + // curses_render_string_limit (& text_data [offset], length, syntax_colour [select], syntax_effect [select], x, y); } curses_synchronize (); diff --git a/compile.sh b/compile.sh index 9dc3028..98f4464 100644 --- a/compile.sh +++ b/compile.sh @@ -26,6 +26,6 @@ gcc -o xhartae xhartae.o chapter/chapter_0.o chapter/chapter_1.o chapter/chapter #~splint -weak -warnposix -retvalother -syntax -type chapter/chapter_4.c #~splint -weak -warnposix -retvalother -syntax -type xhartae.c -valgrind --show-leak-kinds=all --leak-check=full ./xhartae +#~valgrind --show-leak-kinds=all --leak-check=full ./xhartae exit diff --git a/program/example.c b/program/example.c new file mode 100644 index 0000000..91442ad --- /dev/null +++ b/program/example.c @@ -0,0 +1,33 @@ +enum { + REGISTER_0_64, REGISTER_1_64, REGISTER_2_64, REGISTER_3_64, REGISTER_4_64, REGISTER_5_64, REGISTER_6_64, REGISTER_7_64, + REGISTER_8_64, REGISTER_9_64, REGISTER_A_64, REGISTER_B_64, REGISTER_C_64, REGISTER_D_64, REGISTER_E_64, REGISTER_F_64, + REGISTER_0_32, REGISTER_1_32, REGISTER_2_32, REGISTER_3_32, REGISTER_4_32, REGISTER_5_32, REGISTER_6_32, REGISTER_7_32, + REGISTER_8_32, REGISTER_9_32, REGISTER_A_32, REGISTER_B_32, REGISTER_C_32, REGISTER_D_32, REGISTER_E_32, REGISTER_F_32, + REGISTER_0_16, REGISTER_1_16, REGISTER_2_16, REGISTER_3_16, REGISTER_4_16, REGISTER_5_16, REGISTER_6_16, REGISTER_7_16, + REGISTER_8_16, REGISTER_9_16, REGISTER_A_16, REGISTER_B_16, REGISTER_C_16, REGISTER_D_16, REGISTER_E_16, REGISTER_F_16, + REGISTER_0_8, REGISTER_1_8, REGISTER_2_8, REGISTER_3_8, REGISTER_4_8, REGISTER_5_8, REGISTER_6_8, REGISTER_7_8, + REGISTER_8_8, REGISTER_9_8, REGISTER_A_8, REGISTER_B_8, REGISTER_C_8, REGISTER_D_8, REGISTER_E_8, REGISTER_F_8, + REGISTER_0_8X, REGISTER_1_8X, REGISTER_2_8X, REGISTER_3_8X, + REGISTER_COUNT +}; + +static void (* encode [FORMAT_COUNT] [INSTRUCTION_COUNT]) (int instruction, int left, int right); + +static int data [16] = { 0 }; + +static void data_push (int byte) { data [0]++; data [data [0]] = byte; } +static void data_pop (void) { data [data [0]] = 0X00; data [0]--; } +static int data_look (int look) { return (data [look]); } +static int data_size (void) { return (data [0]); } +static void data_free (void) { data [0] = 0; } + +static void data_echo (void) { + char byte [4] = " "; + int i; + + for (i = 0; i != data [0]; ++i) { + byte [0] = "0123456789ABCDEF" [data [i + 1] / 16]; + byte [1] = "0123456789ABCDEF" [data [i + 1] % 16]; + echo (byte); + } +} diff --git a/xhartae.c b/xhartae.c index c6f2691..bbd013b 100644 --- a/xhartae.c +++ b/xhartae.c @@ -130,18 +130,7 @@ int main (int argc, char * * argv) { curses_configure (); - // We could simply write a while loop, but I prefer to be consistent with using for loops, and not to rely on default comparison to zero. - // while (curses_active) { ... - for (; curses_active != 0; ) { - curses_render_background ('.', COLOUR_GREY, EFFECT_BOLD); - curses_render_character ('@', COLOUR_RED, EFFECT_BOLD, 1, 1); - curses_render_string ("Heyo world!", COLOUR_GREEN, EFFECT_ITALIC, 2, 2); - curses_render_number_limit (-420, 6, COLOUR_YELLOW, EFFECT_UNDERLINE, 0, 3); - - curses_synchronize (); - } - - preview_c_file ("program/hello_world.c", curses_screen_width, curses_screen_height, 0, 0); + preview_c_file ("program/example.c", curses_screen_width, curses_screen_height, 0, 0); return (EXIT_SUCCESS); }