anon prototype is (kinda) pretty now

2023-08-20 00:49:10 +02:00 · 2023-08-20 00:49:10 +02:00 · 966b90056e
commit 966b90056e
parent c284f3f283
7 changed files with 347 additions and 48 deletions
--- a/BUGS.md
+++ b/BUGS.md
@ -0,0 +1,5 @@
+## Bugs
+ + segfaults under -O2
+ + i cannot decypher the valgrind warnings
+ + a sinle character right before keywords is always highlighted; the bug is understood, the resolution design is under contemplation
+ + newlines are not yet given special treatment in regex_match()
--- a/2
+++ b/2
@ -1,6 +1,6 @@
 include chad.mk
 DEBUG:=1
-CFLAGS:=-std=c99 -O2 -Wvla -Wshadow -Wundef $(if ${DEBUG}, ${CHAD_DEBUG},'')
+CFLAGS:=-std=c99 -O2 $(if ${DEBUG}, ${CHAD_DEBUG},'')
 CPPFLAGS:=-D_FORTIFY_SOURCE=2

 SRC.dir:=source/
--- a/chad.mk
+++ b/chad.mk
@ -8,14 +8,15 @@ CHAD_DEBUG:=-Og -ggdb -pg -fno-inline

 # Programs to check warnings for as defined by the Chad standard
 GCC:=gcc
-GCC.warnings:=-Wall -Wextra -Wpedantic
+GCC.warnings:=-Wall -Wextra -Wpedantic -Wvla -Wshadow -Wundef 
 CLANG:=clang
 CLANG.warnings:=-Weverything
 VALGRIND:=valgrind
+VALGRIND.flags:=--track-origins=yes --leak-check=full --show-leak-kinds=all

 chad_test:
 	${GCC} ${GCC.warnings} ${SRC} -o ${OUT}
 	${CLANG} ${GCC.warnings} ${SRC} -o ${OUT}
-	${VALGRIND} ${OUT} ${OUTARGS}
+	${VALGRIND} ${VALGRIND.flags} ${OUT} ${OUTARGS}

 .DEFAULT_GOAL:=main
--- a/source/hl.h
+++ b/source/hl.h
@ -3,6 +3,7 @@
 #include <ctype.h>
 #include <string.h>
 #include "chad.h"
+#include "regex.h"

 typedef void (*attribute_callback_t)(const char * const string,
                                     const int          length,
@ -39,22 +40,93 @@ typedef struct {
 token_t * token_table[1000];
 int token_table_top = 0;

-token_t * new_token(const char       * const syntax,
-                  const token_type_t            t,
-                  const hl_group_t * const      g) {
+int append_token(token_t * token){
+	token_table[token_table_top++] = token;
+	return 0;
+}
+
+token_t * new_symbol_token(const char         * const word,
+                                 hl_group_t   * const    g) {
+
+	char * new_word = strdup(word);
+
 	token_t * mt = (token_t*)malloc(sizeof(token_t));
 	mt->hl = g;
-	mt->t = t;
-	mt->syntax = syntax;
-	token_table[token_table_top++] = mt;
+	mt->t = KEYSYMBOL;
+	mt->syntax = new_word;
+	append_token(mt);
+	return mt;
+
+}
+
+int new_symbol_tokens(const char       * const *     symbols,
+                            hl_group_t * const             g) {
+
+	int i = 0;
+	while (*symbols) {
+		if(new_symbol_token(*symbols, g)){
+			++i;
+		}
+		++symbols;
+	}
+
+	return i;
+}
+
+int new_char_tokens(const char       *         characters,
+                          hl_group_t * const            g) {
+	int i = 0;
+	char buffer[2];
+	buffer[1] = '\00';
+	for(const char * s = characters; *s != '\00'; s++){
+		buffer[0] = *s;
+		if(new_symbol_token(buffer, g)){
+			++i;
+		}
+	}
+	return i;
+}
+
+token_t * new_keyword_token(const char         * const word,
+                                  hl_group_t   * const    g) {
+
+	size_t word_length = strlen(word);
+	char * new_word = (char*)malloc(word_length + 4 + 1);
+	memcpy(new_word, "\\<", 2);
+	memcpy(new_word + 2, word, word_length);
+	strcpy(new_word + 2 + word_length, "\\>");
+
+	token_t * mt = (token_t*)malloc(sizeof(token_t));
+	mt->hl = g;
+	mt->t = KEYWORD;
+	mt->syntax = new_word;
+	append_token(mt);
 	return mt;
 }

-void new_keyword_tokens(const char       * const *       words,
+token_t * new_token(const char         * const word,
+                    const token_type_t            t,
+                          hl_group_t   * const    g) {
+	switch(t){
+		case KEYSYMBOL: {
+			return new_symbol_token(word, g);
+		};
+		case KEYWORD: {
+			return new_keyword_token(word, g);
+		};
+		case MATCH: {
+		} break;
+		case REGION: {
+		} break;
+	}
+	// XXX: implement the rest
+}
+
+int new_keyword_tokens(const char       * const *       words,
                             hl_group_t * const             g) {
 	int i = 0;
 	while (*words) {
-		if(new_token(*words, KEYWORD, g)){
+		if(new_keyword_token(*words, g)){
 			++i;
 		}
 		++words;
@ -63,30 +135,16 @@ void new_keyword_tokens(const char       * const *       words,
 	return i;
 }

-int token_fits(const char* const pattern,
+int token_fits(const token_t* const token,
               const char*    const    to) {
+
+	const char * const pattern = token->syntax;
+
 	if (pattern == NULL) {
 		return true;
 	}
-	for (int i = 0;; i++) {
-		if (pattern[i] == '\00') {
-			return i;
-		}
-		if (to[i] == '\00'
-		||  pattern[i] != to[i]) {
-			return false;
-		}
-	}
-}

-bool is_word_separator(const char character) {
-	if (( isascii(character))
-	&&  (!isalnum(character))
-	&&  ( character != '_')) {
-		return 1;
-	} else {
-		return 0;
-	}
+	return regex_match(pattern, to);
 }

 void render_string(const char * const string,
@ -95,7 +153,7 @@ void render_string(const char * const string,
 		int f;
 		int i = 0;
 		for (; i < token_table_top; i++) {
-			f = token_fits(token_table[i]->syntax, s);
+			f = token_fits(token_table[i], s);
 			if(f){ break; }
 		}
 		//
--- a/source/main.c
+++ b/source/main.c
@ -1,3 +1,5 @@
+//register
+//putchar()
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
@ -10,21 +12,24 @@ static char * buffer      = NULL;
 static size_t buffer_size = 0;

 typedef struct {
-	int attribute;
-	int foreground_color;
-	int background_color;
+	char * attribute;
+	char * foreground_color;
+	char * background_color;
 } terminal_hl_t;

 void cterm_render_callback(const char * const string,
                           const int          length,
                           void       * const attributes) {
 	if(!length){
+		fputs(TERMINAL_STYLE_BOLD, stdout);
 		putchar(*string);
+		fputs(TERMINAL_RESET, stdout);
 		return;
 	}

-	UNUSED(attributes);
-	fputs(TERMINAL_STYLE_BOLD, stdout);
+	terminal_hl_t * term_hl = (terminal_hl_t*)attributes;
+	fputs(term_hl->attribute, stdout);
+	fputs(term_hl->foreground_color, stdout);
 	for (int i = 0; i < length; i++) {
 		putchar(*(string+i));
 	}
@ -68,25 +73,50 @@ int main(int      argc,
 	  NULL
 	};

-	terminal_hl_t my_hl = (terminal_hl_t) {
-		.attribute = 1
-	};
-
+	//
 	display_t * cterm = &(display_t) {
 		.key = "cterm",
 		.callback = cterm_render_callback
 	};
-	hl_group_t mygroup = (hl_group_t) {
-		.link = NULL
+	//
+	terminal_hl_t terminal_keyword_hl = (terminal_hl_t) {
+		.attribute = TERMINAL_STYLE_BOLD,
+		.foreground_color = TERMINAL_COLOR_FG_GREEN,
+		.background_color = NULL
 	};
+	hl_group_t keyword_hl = (hl_group_t) {
+		.link = NULL,
+		.attributes = (void*)&terminal_keyword_hl
+	};
+	//
+	terminal_hl_t terminal_preprocessor_hl = (terminal_hl_t) {
+		.attribute = TERMINAL_STYLE_BOLD,
+		.foreground_color = TERMINAL_COLOR_FG_BLUE,
+		.background_color = NULL
+	};
+	hl_group_t preprocessor_hl = (hl_group_t) {
+		.link = NULL,
+		.attributes = (void*)&terminal_preprocessor_hl
+	};
+	//
+	terminal_hl_t terminal_symbol_hl = (terminal_hl_t) {
+		.attribute = TERMINAL_STYLE_BOLD,
+		.foreground_color = TERMINAL_COLOR_FG_YELLOW,
+		.background_color = NULL
+	};
+	hl_group_t symbol_hl = (hl_group_t) {
+		.link = NULL,
+		.attributes = (void*)&terminal_symbol_hl
+	};
+	//
 	new_display_mode(cterm);
-	new_keyword_tokens(c_keywords, &mygroup);
-	new_keyword_tokens(preprocessor_keywords, &mygroup);
-
+	new_keyword_tokens(c_keywords, &keyword_hl);
+	new_keyword_tokens(preprocessor_keywords, &preprocessor_hl);
+	new_char_tokens("&|()[]{}*,", &symbol_hl);
 	//
 	render_string(buffer, "cterm");
 	putchar('\n');
-	free (buffer);
+	free(buffer);

 	return 0;
 }
--- a/source/regex.c
+++ b/source/regex.c
@ -0,0 +1,199 @@
+#include "regex.h"
+
+bool is_case_on = true;
+
+static bool is_next_valid(const char * const s) {
+	return *(s + 1);
+}
+
+static bool char_in_range(const char     start,
+                          const char       end,
+                          const char character) {
+	if (start > end){
+		return false;
+	}
+
+	for (char c = start; c != end; c++) {
+		if (character == c) {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static bool is_word_separator(const char character) {
+	return (( isascii(character))
+	    &&  (!isalnum(character))
+	    &&  ( character != '_'));
+}
+
+static bool magic(const char magic_char, const char to_enchant) {
+	switch(magic_char){
+		//	\i	identifier character (see 'isident' option)
+		//	\I	like "\i", but excluding digits
+		//	\k	keyword character (see 'iskeyword' option)
+		//	\K	like "\k", but excluding digits
+		//	\f	file name character (see 'isfname' option)
+		//	\F	like "\f", but excluding digits
+		//	\p	printable character (see 'isprint' option)
+		//	\P	like "\p", but excluding digits
+		case 's': {
+			return ((to_enchant == ' ') || (to_enchant == '\t'));
+		}
+		case 'S': {
+			return !((to_enchant == ' ') || (to_enchant == '\t'));
+		}
+		case 'd': {	// [0-9]
+			return char_in_range('0', '9', to_enchant);
+		};
+		case 'D': {	// [^0-9]
+			return !char_in_range('0', '9', to_enchant);
+		};
+		case 'x': { // [0-9A-Fa-f]
+			return char_in_range('0', '9', to_enchant) || char_in_range('A', 'F', to_enchant) || char_in_range('a', 'f', to_enchant);
+		};
+		case 'X': { // [^0-9A-Fa-f]
+			return !char_in_range('0', '9', to_enchant) && !char_in_range('A', 'F', to_enchant) && !char_in_range('a', 'f', to_enchant);
+		};
+		case 'o': { // [0-7]
+			return char_in_range('0', '7', to_enchant);
+		};
+		case 'O': { // [^0-7]
+			return !char_in_range('0', '7', to_enchant);
+		};
+		case 'w': { // [0-9A-Za-z_]
+			return char_in_range('0', '9', to_enchant) || char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant) || (to_enchant == '_');
+		};
+		case 'W': { // [^0-9A-Za-z_]
+			return !(char_in_range('0', '9', to_enchant) || char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant) || (to_enchant == '_'));
+		};
+		case 'h': { // [A-Za-z_]
+			return char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant) || (to_enchant == '_');
+		};
+		case 'H': { // [^A-Za-z_]
+			return !(char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant) || (to_enchant == '_'));
+		};
+		case 'a': { // [A-Za-z]
+			return char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant);
+		};
+		case 'A': { // [A-Za-z]
+			return !(char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant));
+		};
+		case 'l': { // [a-z]
+			return char_in_range('a', 'z', to_enchant);
+		};
+		case 'L': { // [^a-z]
+			return !(char_in_range('a', 'z', to_enchant));
+		};
+		case 'u': { // [A-Z]
+			return char_in_range('A', 'Z', to_enchant);
+		};
+		case 'U': { // [^A-Z]
+			return !(char_in_range('A', 'Z', to_enchant));
+		};
+	}
+
+	return false;
+}
+
+int regex_match(const char * const pattern,
+                   const char * const  string) {
+	const char * pattern_pointer = pattern;
+	const char * string_pointer = string;
+
+	while (1488) {
+		// End of one of the arguments
+		if (!(*pattern_pointer)) {
+			break;
+		}
+		if (!(*string_pointer)) {
+			return false;
+		}
+
+		// Escape character
+		if (*pattern_pointer == '\\') {
+			if (!is_next_valid(pattern_pointer)) {
+				return false;
+			}
+
+			switch(*(pattern_pointer + 1)){
+				case 't': {
+					if (*(string_pointer + 1) == '\t') {
+						pattern_pointer += 2;
+						string_pointer += 1;
+					} else {
+						return false;
+					}
+				} break;
+				case 'r': {
+					if (*(string_pointer + 1) == '\r') {
+						pattern_pointer += 2;
+						string_pointer += 1;
+					} else {
+						return false;
+					}
+				} break;
+				case 'e': {
+					if (*(string_pointer + 1) == '\033') {
+						pattern_pointer += 2;
+						string_pointer += 1;
+					} else {
+						return false;
+					}
+				} break;
+				case 'b': {
+					if (*(string_pointer + 1) == '\010') {
+						pattern_pointer += 2;
+						string_pointer += 1;
+					} else {
+						return false;
+					}
+				} break;
+			}
+
+			if (*(pattern_pointer + 1) == '\\') {
+				if (*string_pointer == '\\') {
+					pattern_pointer += 2;
+					string_pointer += 1;
+					continue;
+				}
+			}
+
+			if (*(pattern_pointer + 1) == '<'
+			&& is_word_separator(*string_pointer)) {
+					pattern_pointer += 2;
+					string_pointer += 1;
+					continue;
+			}
+
+			if (*(pattern_pointer + 1) == '>') {
+				if (is_word_separator(*(string_pointer + 1))) {
+						pattern_pointer += 2;
+						continue;
+				}
+				if (*(string_pointer + 1) == '\00') {
+					break;
+				}
+			}
+
+			if (magic(*(pattern_pointer + 1), *string_pointer)) {
+				pattern_pointer += 2;
+				string_pointer += 1;
+				continue;
+			}
+
+			return false;
+		}
+
+		// Literal
+		if (*pattern_pointer != *string_pointer) {
+			return false;
+		} else {
+			++pattern_pointer;
+			++string_pointer;
+		}
+	}
+
+	return (string_pointer - string);
+}
--- a/source/regex.h
+++ b/source/regex.h
@ -0,0 +1,6 @@
+#include "chad.h"
+#include <ctype.h>
+
+extern bool is_case_on;
+
+int regex_match(const char * const pattern, const char * const  string);