libhl/include/hl.h

/* hl.h
 * Copyright 2023 Anon Anonson, Ognjen 'xolatile' Milan Robovic, Emil Williams
 * SPDX Identifier: GPL-3.0-only / NO WARRANTY / NO GUARANTEE */

#include <stdio.h>
#include <uthash.h>
#include <ctype.h>
#include <string.h>
#include <stdbool.h>
#include "chad.h"
#include "regex.h"

// -------------------
// ### Definitions ###
// -------------------

typedef enum {
	KEYSYMBOL,
	KEYWORD,
	MATCH,
	REGION
} token_type_t;

typedef void (*attribute_callback_t) (const char * string,
                                      const int    length,
                                            void * attributes);

typedef struct {
	char                 * key;
	attribute_callback_t   callback;
	UT_hash_handle         hh;
} display_t;

typedef struct {
	void              * attributes;
	struct hl_group_t * link;
} hl_group_t;

typedef struct {
	hl_group_t   * hl;
	regex_t      * syntax;
	token_type_t   t;
	char           _pad[4];
} token_t;

extern vector_t    token_table;
extern display_t * display_table;

extern hl_group_t * keyword_hl;
extern hl_group_t * preprocessor_hl;
extern hl_group_t * symbol_hl;

extern void new_display_mode(display_t * mode);
extern int free_token(token_t * token);
extern int append_token(token_t * token);

// TODO: ALIGN PROPERLY...

extern token_t * new_symbol_token(const char         * const c,
                                        hl_group_t   * const g);

extern int       new_symbol_tokens(const char       * const *     symbols,
                                         hl_group_t * const             g);

extern int       new_char_tokens(const char       *              str,
                                       hl_group_t * const          g);

extern token_t * new_keyword_token(const char         * const word,
                                         hl_group_t   * const    g);

extern int       new_keyword_tokens(const char       * const * words,
                                          hl_group_t * const   g);

extern token_t * new_token(const char         * const word,
                           const token_type_t            t,
                                 hl_group_t   * const    g);

// TODO: ALIGN PROPERLY...

extern int token_fits(const token_t * const          token,
                      const char    * const             to,
                      const int              string_offset,
                      const bool            is_start_of_line,
                            int     *         match_offset);

extern void render_string(const char * const string,
                          const char * const mode);

extern int hl_init(void);
extern int hl_deinit(void);

// GLOBALS

vector_t token_table = {
	.data          = NULL,
	.element_size  = sizeof(token_t *),
	.element_count = 0UL
};

display_t * display_table = NULL;

// --------------------------------
// ### Constructors/Destructors ###
// --------------------------------

void new_display_mode(display_t * mode) {
	HASH_ADD_STR(display_table,
	             key,
	             mode);
}

int free_token(token_t * token) {
	free(token->hl);
	regex_free(token->syntax);

	return 0;
}

int append_token(token_t * token) {
	vector_push(&token_table, &token);

	return 0;
}

token_t * new_symbol_token(const char         * const c,
                                 hl_group_t   * const g) {

	token_t * mt = (token_t*)malloc(sizeof(token_t));

	mt->hl     = g;
	mt->t      = KEYSYMBOL;
	mt->syntax = regex_compile(c);

	append_token(mt);

	return mt;
}

int new_symbol_tokens(const char       * const *     symbols,
                            hl_group_t * const             g) {
	int i = 0;

	while (*symbols) {
		if(new_symbol_token(*symbols, g)) {
			++i;
		} else {
			assert(!(bool)"Kinda failed to new symbol token thing.");
		}
		++symbols;
	}

	return i;
}

int new_char_tokens(const char       *              str,
                          hl_group_t * const          g) {
	int i = 0;

	char buffer[3];
	buffer[0] = '\\';
	buffer[2] = '\0';

	for(const char * s = str; *s != '\0'; s++) {
		buffer[1] = *s;
		if(new_symbol_token(is_magic(*s) ? buffer : buffer + 1, g)) {
			++i;
		} else {
			assert(!(bool)"Kinda failed to new char token thing.");
		}
	}

	return i;
}

token_t * new_keyword_token(const char         * const word,
                                  hl_group_t   * const    g) {
	size_t   word_length = strlen(word);
	char   * new_word    = (char*)malloc(word_length + 4 + 1);

	memcpy(new_word, "\\<", 2);
	memcpy(new_word + 2, word, word_length);
	strcpy(new_word + 2 + word_length, "\\>");

	token_t * mt = (token_t*)malloc(sizeof(token_t));

	mt->hl     = g;
	mt->t      = KEYWORD;
	mt->syntax = regex_compile(new_word);

	append_token(mt);

	return mt;
}

int new_keyword_tokens(const char       * const * words,
                             hl_group_t * const   g) {
	int i = 0;

	while (*words) {
		if(new_keyword_token(*words, g)) {
			++i;
		}
		++words;
	}

	return i;
}

token_t * new_region_token(const char       * start,
                           const char       *   end,
                                 hl_group_t *       g) {
	char buffer[100];
	buffer[0] = '\0';
	strcat(buffer, start);
	strcat(buffer, "[\\d\\D]*");
	strcat(buffer, end);

	token_t * mt = (token_t*)malloc(sizeof(token_t));

	mt->hl     = g;
	mt->t      = KEYSYMBOL;
	mt->syntax = regex_compile(buffer);

	append_token(mt);

	return mt;
}

token_t * new_token(const char         * const word,
                    const token_type_t            t,
                          hl_group_t   * const    g) {
	switch (t) {
		case KEYSYMBOL: {
			return new_symbol_token(word, g);
		}
		case KEYWORD: {
			return new_keyword_token(word, g);
		}
		case MATCH: {
			token_t * mt = (token_t*)malloc(sizeof(token_t));
				mt->hl     = g;
				mt->t      = MATCH;
				mt->syntax = regex_compile(word);
			append_token(mt);
		} break;
		case REGION: {
		} break;
	}

	return NULL;
}

// --------------------
// ### Highlighting ###
// --------------------

int token_fits(const token_t * const            token,
               const char    * const               to,
               const int                string_offset,
               const bool            is_start_of_line,
                     int     *       match_offset) {
  UNUSED(match_offset);
	//return regex_match(pattern, to, string_offset, match_offset);
	return regex_match(token->syntax, to, is_start_of_line, string_offset);
}

void render_string(const char * const string,
                   const char * const mode) {
	for (const char * s = string; *s != '\00';) {
		int    f           = 0;
		size_t token_index = 0;
		int    offset      = 0;

		for (; token_index < token_table.element_count; token_index++) {
			token_t * t = *(token_t**)vector_get(&token_table,
			                                     token_index);
			const bool is_start_of_line = (s == string) || (*s == '\n');
			f = token_fits(t, string, (int)(s - string), is_start_of_line, &offset);
			if (f) {
				break;
			}
		}
		//
		display_t * display;
		HASH_FIND_STR(display_table,
		              mode,
		              display);
		//
		if (f) {
			for (int i = 0; i < offset; i++) {
				token_t * t = *(token_t**)vector_get(&token_table,
				                                     token_index);
				display->callback(s + i,
				                  0,
				                  t->hl->attributes);
			}
			token_t * t = *(token_t**)vector_get(&token_table,
			                                     token_index);
			display->callback(s + offset,
			                  f,
			                  t->hl->attributes);
			s += f + offset;
		} else {
			display->callback(s,
			                  0,
			                  NULL);
			++s;
		}
	}
}

// -------------------------
// ### Library Mangement ###
// -------------------------
hl_group_t * special_hl          = NULL;
hl_group_t * control_hl          = NULL;
hl_group_t * keyword_hl          = NULL;
hl_group_t * block_hl            = NULL;
hl_group_t * separator_hl        = NULL;
hl_group_t * operator_hl         = NULL;
hl_group_t * comment_hl          = NULL;
hl_group_t * string_literal_hl   = NULL;

int hl_init(void) {
	return 0;
}

int hl_deinit(void) {
	for (size_t i = 0; i < token_table.element_count; i++) {
		free_token(*(token_t**)vector_get(&token_table, i));
	}

	return 0;
}