/* Copyright (c) 2023 : Ognjen 'xolatile' Milan Robovic Xhartae is free software! You will redistribute it or modify it under the terms of the GNU General Public License by Free Software Foundation. And when you do redistribute it or modify it, it will use either version 3 of the License, or (at yours truly opinion) any later version. It is distributed in the hope that it will be useful or harmful, it really depends... But no warranty what so ever, seriously. See GNU/GPLv3. */ #ifndef CHAPTER_4_SOURCE #define CHAPTER_4_SOURCE #include "chapter_4.h" /* Of course, we could just write something like 'preview_unhighlighted_text_file' function (name is obviously a joke), but this would stylize (apply colour and effect character attributes) to our entire text file. When we're writing programs, syntax highlighting makes a lot of difference to readability, the same way the code formatting does, and initial program design structure. If you use a lot of external variables everywhere, the entire programs starts to be messy or difficult to maintain, write and debug (or both). However, if you use a lot of variables, and you pass each of them separately into functions, or if you have one huge monolithic structure (this time literal 'struct), you aren't doing much better, except the compiler will have easier time to optimize your code, even tho that kind of code becomes pain to write. So, having few external functions, that do one thing well, and having few external variables, that won't be edited outside of that file is the best in my opinion. Your function calls won't be long, if you don't want to make those external ("global") variables visible to some other file, just move them to C source file instead of C header file, and redeclare them as 'static', making them internal variables. Keep in mind, you have to use brain more in that case, and think about what you're modifying, where and why. Well, if you want to write programs, and not to think about them, just close this and read Jin Ping Mei instead or something. There's no cheatsheet for making good programs, you choose your constraints, your program design structure, and you start working on it. @C void preview_unhighlighted_text_file (char * text_file, int x, int y) { char * text_data; text_data = file_record (text_file); for (curses_active = 1; curses_active != 0; ) { curses_render_background (' ', COLOUR_WHITE, EFFECT_NORMAL); curses_render_string (text_data, COLOUR_WHITE, EFFECT_NORMAL, x, x); curses_synchronize (); } text_data = deallocate (text_data); } @ So, lets write very basic C programming language syntax highlighting, explain how can we easily do it in little more than 100 lines of (scarily verbose and nicely aligned) code and why we don't need regular expressions for it. You can use these 'syntax_*' functions to tokenize some source code, highlight the syntax of it or something else that I didn't even think about if you're creative. Of course, we can use it to highlight syntax of some other programming language, not only C, and we'll use it later to highlight assembly, Ada, C++, and maybe few more programming languages. */ static int syntax_count = 0; // Number of previously defined syntax rules. static int syntax_active = FALSE; // Syntax "library" or subprogram was initialized if this value is TRUE. static int * syntax_enrange = NULL; // Syntax rule can start with any character from 'syntax_begin' if this value is TRUE. static int * syntax_derange = NULL; // Syntax rule can start with any character from 'syntax_end' if this value is TRUE. static char * * syntax_begin = NULL; // Strings containing valid character (sub)sequence for begining the scan. static char * * syntax_end = NULL; // Strings containing valid character (sub)sequence for ending the scan. static char * syntax_escape = NULL; // Escape sequence for the rule, useful for line-breaks in C macros and line-based languages. static int * syntax_colour = NULL; // Colour for our token, these two could be completely independent, but I like to keep them here. static int * syntax_effect = NULL; // Effect for our token. static void syntax_delete (void) { int offset; if (syntax_active == FALSE) { return; } // We could reverse-loop through this without a local variable 'offset' using this approach, but I consider this bad for readability. // --syntax_count; // do { // syntax_begin [syntax_count] = deallocate (syntax_begin [syntax_count]); // syntax_end [syntax_count] = deallocate (syntax_end [syntax_count]); // } while (--syntax_count != -1); for (offset = 0; offset < syntax_count; ++offset) { syntax_begin [offset] = deallocate (syntax_begin [offset]); syntax_end [offset] = deallocate (syntax_end [offset]); } syntax_enrange = deallocate (syntax_enrange); syntax_derange = deallocate (syntax_derange); syntax_begin = deallocate (syntax_begin); syntax_end = deallocate (syntax_end); syntax_escape = deallocate (syntax_escape); syntax_colour = deallocate (syntax_colour); syntax_effect = deallocate (syntax_effect); syntax_active = FALSE; syntax_count = 0; } static int syntax_define (int enrange, int derange, char * begin, char * end, char escape, int colour, int effect) { if (syntax_active == FALSE) { syntax_active = TRUE; atexit (syntax_delete); } fatal_failure (begin == NULL, "syntax_define: Begin string is null pointer."); fatal_failure (end == NULL, "syntax_define: End string is null pointer."); ++syntax_count; syntax_enrange = reallocate (syntax_enrange, syntax_count * (int) sizeof (* syntax_enrange)); syntax_derange = reallocate (syntax_derange, syntax_count * (int) sizeof (* syntax_derange)); syntax_begin = reallocate (syntax_begin, syntax_count * (int) sizeof (* syntax_begin)); syntax_end = reallocate (syntax_end, syntax_count * (int) sizeof (* syntax_end)); syntax_escape = reallocate (syntax_escape, syntax_count * (int) sizeof (* syntax_escape)); syntax_colour = reallocate (syntax_colour, syntax_count * (int) sizeof (* syntax_colour)); syntax_effect = reallocate (syntax_effect, syntax_count * (int) sizeof (* syntax_effect)); syntax_enrange [syntax_count - 1] = enrange; syntax_derange [syntax_count - 1] = derange; syntax_escape [syntax_count - 1] = escape; syntax_colour [syntax_count - 1] = colour; syntax_effect [syntax_count - 1] = effect; syntax_begin [syntax_count - 1] = allocate ((string_length (begin) + 1) * (int) sizeof (* * syntax_begin)); syntax_end [syntax_count - 1] = allocate ((string_length (end) + 1) * (int) sizeof (* * syntax_end)); string_copy (syntax_begin [syntax_count - 1], begin); string_copy (syntax_end [syntax_count - 1], end); return (syntax_count - 1); } static int syntax_select (char * string, int * length) { int offset, subset, select; fatal_failure (syntax_active == FALSE, "syntax_select: Syntax is not active."); fatal_failure (string == NULL, "syntax_select: String is null."); fatal_failure (length == NULL, "syntax_select: Length is null."); for (select = offset = 0; select != syntax_count; ++select) { if (syntax_enrange [select] == FALSE) { if (string_compare_limit (string, syntax_begin [select], string_length (syntax_begin [select])) == TRUE) { break; // We need to limit our string comparisson function. } else { continue; } } else { for (subset = 0; subset != string_length (syntax_begin [select]); ++subset) { if (string [offset] == syntax_begin [select] [subset]) { goto selected; // We can't use 'break' here, because it will exit only one loop, not both of them. } else { continue; } } } } selected: if (select >= syntax_count) { * length = 1; return (syntax_count); } for (offset = 1; string [offset - 1] != '\0'; ++offset) { if (string [offset] == syntax_escape [select]) { ++offset; continue; } if (syntax_derange [select] == FALSE) { if (string_compare_limit (& string [offset], syntax_end [select], string_length (syntax_end [select])) == TRUE) { * length = offset + string_length (syntax_end [select]); return (select); } } else { subset = 0; if (string_compare (syntax_end [select], "") == TRUE) { break; } do { if (string [offset] == syntax_end [select] [subset]) { * length = offset; goto finished; } } while (++subset != string_length (syntax_end [select])); } } finished: return (select); } static void syntax_highlight_c (void) { char * separators = ".,:;<=>+*-/%!&~^?|()[]{}'\" \t\r\n"; char * keywords [] = { "register", "volatile", "auto", "const", "static", "extern", "if", "else", "do", "while", "for", "continue", "switch", "case", "default", "break", "enum", "union", "struct", "typedef", "goto", "void", "return", "sizeof", "char", "short", "int", "long", "signed", "unsigned", "float", "double" }; int word; syntax_define (FALSE, FALSE, "/*", "*/", '\0', COLOUR_GREY, EFFECT_BOLD); syntax_define (FALSE, FALSE, "//", "\n", '\0', COLOUR_GREY, EFFECT_BOLD); syntax_define (FALSE, FALSE, "#", "\n", '\\', COLOUR_YELLOW, EFFECT_ITALIC); syntax_define (FALSE, FALSE, "'", "'", '\\', COLOUR_PINK, EFFECT_BOLD); syntax_define (FALSE, FALSE, "\"", "\"", '\\', COLOUR_PINK, EFFECT_NORMAL); for (word = 0; word != (int) (sizeof (keywords) / sizeof (keywords [0])); ++word) { syntax_define (FALSE, TRUE, keywords [word], separators, '\0', COLOUR_YELLOW, EFFECT_BOLD); } syntax_define (TRUE, FALSE, "()[]{}", "", '\0', COLOUR_BLUE, EFFECT_NORMAL); syntax_define (TRUE, FALSE, ".,:;<=>+*-/%!&~^?|", "", '\0', COLOUR_CYAN, EFFECT_NORMAL); syntax_define (TRUE, TRUE, "0123456789", separators, '\0', COLOUR_PINK, EFFECT_BOLD); syntax_define (TRUE, TRUE, "abcdefghijklmnopqrstuvwxyz", separators, '\0', COLOUR_WHITE, EFFECT_NORMAL); syntax_define (TRUE, TRUE, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", separators, '\0', COLOUR_WHITE, EFFECT_BOLD); syntax_define (TRUE, TRUE, "_", separators, '\0', COLOUR_WHITE, EFFECT_ITALIC); } void preview_c_file (char * text_file, int width, int height, int x, int y) { char * text_data; int reset_x = x; int reset_y = y; (void) width; (void) height; syntax_highlight_c (); text_data = file_record (text_file); for (curses_active = 1; curses_active != 0; ) { int offset, select, length; curses_render_background (' ', COLOUR_WHITE, EFFECT_NORMAL); x = reset_x; y = reset_y; select = syntax_count; length = 0; for (offset = 0; offset < string_length (text_data); offset += length) { int suboffset, colour, effect; select = syntax_select (& text_data [offset], & length); if (select >= syntax_count) { colour = COLOUR_WHITE; effect = EFFECT_NORMAL; } else { colour = syntax_colour [select]; effect = syntax_effect [select]; } for (suboffset = 0; suboffset < length; ++suboffset) { if (text_data [offset + suboffset] == CHARACTER_LINE_FEED) { x = reset_x; y += 1; } else if (text_data [offset + suboffset] == CHARACTER_TAB_HORIZONTAL) { x += 8; } else { curses_render_character (text_data [offset + suboffset], colour, effect, x, y); x += 1; } } // curses_render_string_limit (& text_data [offset], length, syntax_colour [select], syntax_effect [select], x, y); } curses_synchronize (); } text_data = deallocate (text_data); } #endif