Compare commits

...

2 Commits

Author SHA1 Message Date
256c1b0279 renamed token structs, hid uxhash into hl.h 2023-08-19 20:47:42 +02:00
b47f01079c README draft 2023-08-19 20:46:57 +02:00
4 changed files with 105 additions and 17 deletions

0
README
View File

72
README.md Normal file
View File

@ -0,0 +1,72 @@
# hl
General purpose highlighter.
// it would be lovely to have a different name the "library" part and the cli
# Usage
hl will read from stdin and write to stdout.
hl < source/main.c
### Cli Options
-h : display help message
-F <dir> : syntax file look up directory
-s <syntax> : specify syntax to load
### Environment variables
HL_HOME : default directory to load syntax files from
# API
void render_string(const char * const string, const char * const mode);
This function matches _string_ against all known highlighting rules and dispatches the appropriate callback defending on mode.
typedef void (*attribute_callback_t)(const char * const string, const int length, void * const attributes);
The type used for defining appropriate callbacks for render_string().
string - string to be outputed
length - number of characters that matched a highlighting rule;
0 if rule passed, in such a case the user is expected still want 1 character outputed
attributes - arbitrary data associated with the matched rule; intended to hold color/font information for example
typedef struct {
char * key;
attribute_callback_t callback;
} display_t;
The type for defining display modes.
void new_display_mode(display_t * mode);
This is how you append a display mode that render_string() will search based on _.key_.
typedef enum {
KEYSYMBOL,
KEYWORD,
MATCH,
REGION
} token_type_t;
These are the valid type of distinct token types.
KEYSYMBOL - a string which is contextless, the surounding text is ignored
"mysymbol" will match inside all of these:
"something mysymbol something"
"somethingmysymbolsomething"
it is intended to match such thing as programming language operators,
so both "var a = 'a'" and "var a='a'" are recognized
KEYWORD - a string which is recognized when surounded by word bundaries such as ' ' or '\t'
MATCH - a Vim style regular expression to be recognized
REGION - a Vim style regular expression where the starting and ending patters are to be distinguished from the contents
The universal way to add a new pattern to be recognized is with:
token * new_token(const char * const syntax, const token_type_t t, const hl_group_t * const g);
This wraps one of the following:
// ?!
There are also convinience functions:
// NOTE: the return value is the number tokens successfully inserted
int new_keyword_tokens(const char * const * words, hl_group_t * const g);
int new_syntax_character_tokens(const char * const chars, hl_group_t * const g);
# Scripting
hl can parse a small subset of VimScript: the few instructions related to highlighing, and it ignores everything else.
All Vim highlighing scripts should be valid hl scripts.
The instrunctions in particular are:
sy[ntax] keyword <hl_group> <word>+
sy[ntax] match <hl_group> <regex>
sy[ntax] region <hl_group> start=<string|match> end=<string|match>
hi[ghtlight] link <from_group> <to_group>
hi[ghtlight] def <group> <display_t>=<data>+
Additionally hl recognizes:
syn[ntax] keysymbol <char>+

View File

@ -21,26 +21,28 @@ typedef struct {
} hl_group_t;
typedef enum {
KEYSYMBOL,
KEYWORD,
MATCH,
REGION
} token_t;
} token_type_t;
typedef struct {
hl_group_t * hl;
token_t t;
token_type_t t;
char* syntax;
} token; // XXX: this will have to be renamed
} token_t;
/* Temp solution
* this should be dynamic
*/
token * token_table[1000];
token_t * token_table[1000];
int token_table_top = 0;
token * new_token(const char * const syntax,
const token_t t,
token_t * new_token(const char * const syntax,
const token_type_t t,
const hl_group_t * const g) {
token * mt = (token*)malloc(sizeof(token));
token_t * mt = (token_t*)malloc(sizeof(token_t));
mt->hl = g;
mt->t = t;
mt->syntax = syntax;
@ -50,10 +52,15 @@ token * new_token(const char * const syntax,
void new_keyword_tokens(const char * const * words,
hl_group_t * const g) {
int i = 0;
while (*words) {
new_token(*words, KEYWORD, g);
words = words + 1;
if(new_token(*words, KEYWORD, g)){
++i;
}
++words;
}
return i;
}
int token_fits(const char* const pattern,
@ -89,18 +96,30 @@ void render_string(const char * const string,
int i = 0;
for (; i < token_table_top; i++) {
f = token_fits(token_table[i]->syntax, s);
if(f){ break; };
if(f){ break; }
}
//
display_t * display;
HASH_FIND_STR(display_table, mode, display);
HASH_FIND_STR(display_table,
mode,
display);
//
if (f) {
display->callback(s, f, token_table[i]->hl->attributes);
display->callback(s,
f,
token_table[i]->hl->attributes);
s += f;
} else {
display->callback(s, 0, NULL);
display->callback(s,
0,
NULL);
++s;
}
}
}
void new_display_mode(display_t * mode) {
HASH_ADD_STR(display_table,
key,
mode);
}

View File

@ -79,10 +79,7 @@ int main(int argc,
hl_group_t mygroup = (hl_group_t) {
.link = NULL
};
HASH_ADD_STR(display_table,
key,
cterm);
new_display_mode(cterm);
new_keyword_tokens(c_keywords, &mygroup);
new_keyword_tokens(preprocessor_keywords, &mygroup);