libhl/source/hl.h

#include <stdio.h>
#include <uthash.h>
#include <ctype.h>
#include <string.h>
#include "chad.h"
#include "regex.h"

// -------------------
// ### Definitions ###
// -------------------

typedef enum {
	KEYSYMBOL,
	KEYWORD,
	MATCH,
	REGION
} token_type_t;

typedef struct {
	char                 * key;
	attribute_callback_t   callback;
	UT_hash_handle         hh;
} display_t;

typedef struct {
	void              * attributes;
	struct hl_group_t * link;
} hl_group_t;

typedef struct {
	hl_group_t   * hl;
	token_type_t   t;
	char         * syntax;
} token_t;

typedef void (*attribute_callback_t) (const char * const string,
                                      const int          length,
                                            void * const attributes);

// GLOBALS

token_t * token_table[1000];
int token_table_top = 0;

display_t * display_table = NULL;

// --------------------------------
// ### Constructors/Destructors ###
// --------------------------------

void new_display_mode(display_t * mode) {
	HASH_ADD_STR(display_table,
	             key,
	             mode);
}

int free_token(token_t * token) {
	free(token->hl);
	free(token->syntax);

	return 0;
}

int append_token(token_t * token) {
	token_table[token_table_top++] = token;

	return 0;
}

token_t * new_symbol_token(const char         * const word,
                                 hl_group_t   * const    g) {
	char * new_word = strdup(word);

	token_t * mt = (token_t*)malloc(sizeof(token_t));

	mt->hl     = g;
	mt->t      = KEYSYMBOL;
	mt->syntax = new_word;

	append_token(mt);

	return mt;
}

int new_symbol_tokens(const char       * const *     symbols,
                            hl_group_t * const             g) {
	int i = 0;

	while (*symbols) {
		if(new_symbol_token(*symbols, g)) {
			++i;
		}
		++symbols;
	}

	return i;
}

int new_char_tokens(const char       *       characters,
                          hl_group_t * const          g) {
	int  i         = 0;
	char buffer[2] = "";

	buffer[1] = '\00';

	for(const char * s = characters; *s != '\0'; s++) {
		buffer[0] = *s;
		if(new_symbol_token(buffer, g)) {
			++i;
		}
	}

	return i;
}

token_t * new_keyword_token(const char         * const word,
                                  hl_group_t   * const    g) {
	size_t   word_length = strlen(word);
	char   * new_word    = (char*)malloc(word_length + 4 + 1);

	memcpy(new_word, "\\<", 2);
	memcpy(new_word + 2, word, word_length);
	strcpy(new_word + 2 + word_length, "\\>");

	token_t * mt = (token_t*)malloc(sizeof(token_t));

	mt->hl     = g;
	mt->t      = KEYWORD;
	mt->syntax = new_word;

	append_token(mt);

	return mt;
}

int new_keyword_tokens(const char       * const * words,
                             hl_group_t * const   g) {
	int i = 0;

	while (*words) {
		if(new_keyword_token(*words, g)) {
			++i;
		}
		++words;
	}

	return i;
}

token_t * new_token(const char         * const word,
                    const token_type_t            t,
                          hl_group_t   * const    g) {
	switch (t) {
		case KEYSYMBOL: {
			return new_symbol_token(word, g);
		}
		case KEYWORD: {
			return new_keyword_token(word, g);
		}
		case MATCH: {
		} break;
		case REGION: {
		} break;
	}

	return NULL;
}

// --------------------
// ### Highlighting ###
// --------------------

int token_fits(const token_t * const       token,
               const char    * const       to,
               const int                   string_offset,
			             int * match_offset) {
	const char * const pattern = token->syntax;

	if (! pattern) {
		return true;
	}

	return regex_match(pattern, to, string_offset, match_offset);
}

void render_string(const char * const string,
                   const char * const mode) {
	for (const char * s = string; *s != '\00';) {
		int f;
		int token_index = 0;
		int offset;

		for (; token_index < token_table_top; token_index++) {
			f = token_fits(token_table[token_index], string, s - string, &offset);
			if (f) {
				break;
			}
		}
		//
		display_t * display;
		HASH_FIND_STR(display_table,
		              mode,
		              display);
		//
		if (f) {
			for (int i = 0; i < offset; i++) {
				display->callback(s + i,
				                  0,
				                  token_table[token_index]->hl->attributes);
			}
			display->callback(s + offset,
			                  f,
			                  token_table[token_index]->hl->attributes);
			s += f + offset;
		} else {
			display->callback(s,
			                  0,
			                  NULL);
			++s;
		}
	}
}

// -------------------------
// ### Library Mangement ###
// -------------------------

int hl_init(void) {
	return 0;
}

int hl_deinit(void) {
	for (int i = 0; i < token_table_top; i++) {
		free_token(token_table[i]);
	}

	return 0;
}
clean up, merged prototypes and anon's prototype rework 2023-08-19 07:18:34 -04:00			`#include <stdio.h>`
			`#include <uthash.h>`
			`#include <ctype.h>`
			`#include <string.h>`
			`#include "chad.h"`
anon prototype is (kinda) pretty now 2023-08-19 18:49:10 -04:00			`#include "regex.h"`
clean up, merged prototypes and anon's prototype rework 2023-08-19 07:18:34 -04:00
code ordering 2023-08-21 10:13:24 -04:00			`// -------------------`
			`// ### Definitions ###`
			`// -------------------`
clean up, merged prototypes and anon's prototype rework 2023-08-19 07:18:34 -04:00
			`typedef enum {`
renamed token structs, hid uxhash into hl.h 2023-08-19 14:47:42 -04:00			`KEYSYMBOL,`
clean up, merged prototypes and anon's prototype rework 2023-08-19 07:18:34 -04:00			`KEYWORD,`
			`MATCH,`
			`REGION`
renamed token structs, hid uxhash into hl.h 2023-08-19 14:47:42 -04:00			`} token_type_t;`
clean up, merged prototypes and anon's prototype rework 2023-08-19 07:18:34 -04:00
			`typedef struct {`
Standard fix... 2023-08-23 19:58:38 -04:00			`char * key;`
			`attribute_callback_t callback;`
			`UT_hash_handle hh;`
			`} display_t;`

			`typedef struct {`
			`void * attributes;`
			`struct hl_group_t * link;`
			`} hl_group_t;`

			`typedef struct {`
			`hl_group_t * hl;`
			`token_type_t t;`
			`char * syntax;`
renamed token structs, hid uxhash into hl.h 2023-08-19 14:47:42 -04:00			`} token_t;`
clean up, merged prototypes and anon's prototype rework 2023-08-19 07:18:34 -04:00
Standard fix... 2023-08-23 19:58:38 -04:00			`typedef void (attribute_callback_t) (const char const string,`
			`const int length,`
			`void * const attributes);`

			`// GLOBALS`

renamed token structs, hid uxhash into hl.h 2023-08-19 14:47:42 -04:00			`token_t * token_table[1000];`
clean up, merged prototypes and anon's prototype rework 2023-08-19 07:18:34 -04:00			`int token_table_top = 0;`

Standard fix... 2023-08-23 19:58:38 -04:00			`display_t * display_table = NULL;`
code ordering 2023-08-21 10:13:24 -04:00
			`// --------------------------------`
			`// ### Constructors/Destructors ###`
			`// --------------------------------`
Standard fix... 2023-08-23 19:58:38 -04:00
code ordering 2023-08-21 10:13:24 -04:00			`void new_display_mode(display_t * mode) {`
			`HASH_ADD_STR(display_table,`
			`key,`
			`mode);`
			`}`

Standard fix... 2023-08-23 19:58:38 -04:00			`int free_token(token_t * token) {`
code ordering 2023-08-21 10:13:24 -04:00			`free(token->hl);`
			`free(token->syntax);`
Standard fix... 2023-08-23 19:58:38 -04:00
code ordering 2023-08-21 10:13:24 -04:00			`return 0;`
			`}`

Standard fix... 2023-08-23 19:58:38 -04:00			`int append_token(token_t * token) {`
anon prototype is (kinda) pretty now 2023-08-19 18:49:10 -04:00			`token_table[token_table_top++] = token;`
Standard fix... 2023-08-23 19:58:38 -04:00
anon prototype is (kinda) pretty now 2023-08-19 18:49:10 -04:00			`return 0;`
			`}`

			`token_t * new_symbol_token(const char * const word,`
			`hl_group_t * const g) {`
			`char * new_word = strdup(word);`

renamed token structs, hid uxhash into hl.h 2023-08-19 14:47:42 -04:00			`token_t * mt = (token_t*)malloc(sizeof(token_t));`
Standard fix... 2023-08-23 19:58:38 -04:00
			`mt->hl = g;`
			`mt->t = KEYSYMBOL;`
anon prototype is (kinda) pretty now 2023-08-19 18:49:10 -04:00			`mt->syntax = new_word;`
Standard fix... 2023-08-23 19:58:38 -04:00
anon prototype is (kinda) pretty now 2023-08-19 18:49:10 -04:00			`append_token(mt);`

Standard fix... 2023-08-23 19:58:38 -04:00			`return mt;`
clean up, merged prototypes and anon's prototype rework 2023-08-19 07:18:34 -04:00			`}`

anon prototype is (kinda) pretty now 2023-08-19 18:49:10 -04:00			`int new_symbol_tokens(const char * const * symbols,`
			`hl_group_t * const g) {`
renamed token structs, hid uxhash into hl.h 2023-08-19 14:47:42 -04:00			`int i = 0;`
Standard fix... 2023-08-23 19:58:38 -04:00
anon prototype is (kinda) pretty now 2023-08-19 18:49:10 -04:00			`while (*symbols) {`
Standard fix... 2023-08-23 19:58:38 -04:00			`if(new_symbol_token(*symbols, g)) {`
renamed token structs, hid uxhash into hl.h 2023-08-19 14:47:42 -04:00			`++i;`
			`}`
anon prototype is (kinda) pretty now 2023-08-19 18:49:10 -04:00			`++symbols;`
clean up, merged prototypes and anon's prototype rework 2023-08-19 07:18:34 -04:00			`}`
renamed token structs, hid uxhash into hl.h 2023-08-19 14:47:42 -04:00
			`return i;`
clean up, merged prototypes and anon's prototype rework 2023-08-19 07:18:34 -04:00			`}`

the implemented parts of regex werk 2023-08-21 14:07:39 -04:00			`int new_char_tokens(const char * characters,`
			`hl_group_t * const g) {`
Standard fix... 2023-08-23 19:58:38 -04:00			`int i = 0;`
			`char buffer[2] = "";`

anon prototype is (kinda) pretty now 2023-08-19 18:49:10 -04:00			`buffer[1] = '\00';`
Standard fix... 2023-08-23 19:58:38 -04:00
			`for(const char * s = characters; *s != '\0'; s++) {`
anon prototype is (kinda) pretty now 2023-08-19 18:49:10 -04:00			`buffer[0] = *s;`
Standard fix... 2023-08-23 19:58:38 -04:00			`if(new_symbol_token(buffer, g)) {`
anon prototype is (kinda) pretty now 2023-08-19 18:49:10 -04:00			`++i;`
clean up, merged prototypes and anon's prototype rework 2023-08-19 07:18:34 -04:00			`}`
anon prototype is (kinda) pretty now 2023-08-19 18:49:10 -04:00			`}`
Standard fix... 2023-08-23 19:58:38 -04:00
anon prototype is (kinda) pretty now 2023-08-19 18:49:10 -04:00			`return i;`
			`}`

			`token_t * new_keyword_token(const char * const word,`
			`hl_group_t * const g) {`
Standard fix... 2023-08-23 19:58:38 -04:00			`size_t word_length = strlen(word);`
			`char * new_word = (char*)malloc(word_length + 4 + 1);`
anon prototype is (kinda) pretty now 2023-08-19 18:49:10 -04:00
			`memcpy(new_word, "\\<", 2);`
			`memcpy(new_word + 2, word, word_length);`
			`strcpy(new_word + 2 + word_length, "\\>");`

			`token_t * mt = (token_t*)malloc(sizeof(token_t));`
Standard fix... 2023-08-23 19:58:38 -04:00
			`mt->hl = g;`
			`mt->t = KEYWORD;`
anon prototype is (kinda) pretty now 2023-08-19 18:49:10 -04:00			`mt->syntax = new_word;`
Standard fix... 2023-08-23 19:58:38 -04:00
anon prototype is (kinda) pretty now 2023-08-19 18:49:10 -04:00			`append_token(mt);`
Standard fix... 2023-08-23 19:58:38 -04:00
anon prototype is (kinda) pretty now 2023-08-19 18:49:10 -04:00			`return mt;`
			`}`

Standard fix... 2023-08-23 19:58:38 -04:00			`int new_keyword_tokens(const char * const * words,`
			`hl_group_t * const g) {`
code ordering 2023-08-21 10:13:24 -04:00			`int i = 0;`
Standard fix... 2023-08-23 19:58:38 -04:00
code ordering 2023-08-21 10:13:24 -04:00			`while (*words) {`
Standard fix... 2023-08-23 19:58:38 -04:00			`if(new_keyword_token(*words, g)) {`
code ordering 2023-08-21 10:13:24 -04:00			`++i;`
			`}`
			`++words;`
			`}`

			`return i;`
			`}`

anon prototype is (kinda) pretty now 2023-08-19 18:49:10 -04:00			`token_t * new_token(const char * const word,`
			`const token_type_t t,`
			`hl_group_t * const g) {`
Standard fix... 2023-08-23 19:58:38 -04:00			`switch (t) {`
anon prototype is (kinda) pretty now 2023-08-19 18:49:10 -04:00			`case KEYSYMBOL: {`
			`return new_symbol_token(word, g);`
Standard fix... 2023-08-23 19:58:38 -04:00			`}`
anon prototype is (kinda) pretty now 2023-08-19 18:49:10 -04:00			`case KEYWORD: {`
			`return new_keyword_token(word, g);`
Standard fix... 2023-08-23 19:58:38 -04:00			`}`
anon prototype is (kinda) pretty now 2023-08-19 18:49:10 -04:00			`case MATCH: {`
			`} break;`
			`case REGION: {`
			`} break;`
			`}`
Standard fix... 2023-08-23 19:58:38 -04:00
code ordering 2023-08-21 10:13:24 -04:00			`return NULL;`
anon prototype is (kinda) pretty now 2023-08-19 18:49:10 -04:00			`}`

code ordering 2023-08-21 10:13:24 -04:00			`// --------------------`
			`// ### Highlighting ###`
			`// --------------------`
clean up, merged prototypes and anon's prototype rework 2023-08-19 07:18:34 -04:00
Standard fix... 2023-08-23 19:58:38 -04:00			`int token_fits(const token_t * const token,`
			`const char * const to,`
			`const int string_offset,`
			`int * match_offset) {`
anon prototype is (kinda) pretty now 2023-08-19 18:49:10 -04:00			`const char * const pattern = token->syntax;`

Standard fix... 2023-08-23 19:58:38 -04:00			`if (! pattern) {`
anon prototype is (kinda) pretty now 2023-08-19 18:49:10 -04:00			`return true;`
clean up, merged prototypes and anon's prototype rework 2023-08-19 07:18:34 -04:00			`}`
anon prototype is (kinda) pretty now 2023-08-19 18:49:10 -04:00
the implemented parts of regex werk 2023-08-21 14:07:39 -04:00			`return regex_match(pattern, to, string_offset, match_offset);`
clean up, merged prototypes and anon's prototype rework 2023-08-19 07:18:34 -04:00			`}`

			`void render_string(const char * const string,`
Standard fix... 2023-08-23 19:58:38 -04:00			`const char * const mode) {`
clean up, merged prototypes and anon's prototype rework 2023-08-19 07:18:34 -04:00			`for (const char * s = string; *s != '\00';) {`
			`int f;`
the implemented parts of regex werk 2023-08-21 14:07:39 -04:00			`int token_index = 0;`
			`int offset;`
Standard fix... 2023-08-23 19:58:38 -04:00
the implemented parts of regex werk 2023-08-21 14:07:39 -04:00			`for (; token_index < token_table_top; token_index++) {`
			`f = token_fits(token_table[token_index], string, s - string, &offset);`
Standard fix... 2023-08-23 19:58:38 -04:00			`if (f) {`
			`break;`
			`}`
clean up, merged prototypes and anon's prototype rework 2023-08-19 07:18:34 -04:00			`}`
			`//`
			`display_t * display;`
renamed token structs, hid uxhash into hl.h 2023-08-19 14:47:42 -04:00			`HASH_FIND_STR(display_table,`
			`mode,`
			`display);`
clean up, merged prototypes and anon's prototype rework 2023-08-19 07:18:34 -04:00			`//`
syntax fixing 2023-08-19 07:21:43 -04:00			`if (f) {`
Standard fix... 2023-08-23 19:58:38 -04:00			`for (int i = 0; i < offset; i++) {`
the implemented parts of regex werk 2023-08-21 14:07:39 -04:00			`display->callback(s + i,`
Standard fix... 2023-08-23 19:58:38 -04:00			`0,`
			`token_table[token_index]->hl->attributes);`
the implemented parts of regex werk 2023-08-21 14:07:39 -04:00			`}`
			`display->callback(s + offset,`
renamed token structs, hid uxhash into hl.h 2023-08-19 14:47:42 -04:00			`f,`
the implemented parts of regex werk 2023-08-21 14:07:39 -04:00			`token_table[token_index]->hl->attributes);`
			`s += f + offset;`
clean up, merged prototypes and anon's prototype rework 2023-08-19 07:18:34 -04:00			`} else {`
renamed token structs, hid uxhash into hl.h 2023-08-19 14:47:42 -04:00			`display->callback(s,`
			`0,`
			`NULL);`
clean up, merged prototypes and anon's prototype rework 2023-08-19 07:18:34 -04:00			`++s;`
			`}`
			`}`
			`}`
renamed token structs, hid uxhash into hl.h 2023-08-19 14:47:42 -04:00
code ordering 2023-08-21 10:13:24 -04:00			`// -------------------------`
			`// ### Library Mangement ###`
			`// -------------------------`
Standard fix... 2023-08-23 19:58:38 -04:00
code ordering 2023-08-21 10:13:24 -04:00			`int hl_init(void) {`
			`return 0;`
			`}`

			`int hl_deinit(void) {`
Standard fix... 2023-08-23 19:58:38 -04:00			`for (int i = 0; i < token_table_top; i++) {`
code ordering 2023-08-21 10:13:24 -04:00			`free_token(token_table[i]);`
			`}`
Standard fix... 2023-08-23 19:58:38 -04:00
code ordering 2023-08-21 10:13:24 -04:00			`return 0;`
renamed token structs, hid uxhash into hl.h 2023-08-19 14:47:42 -04:00			`}`