239 lines
11 KiB
C
Executable File
239 lines
11 KiB
C
Executable File
/// _
|
|
/// __ ___ _ _ __ | |_ __ ___ __
|
|
/// \ \/ / | | | '_ \| __/ _` \ \/ /
|
|
/// > <| |_| | | | | || (_| |> <
|
|
/// /_/\_\\__, |_| |_|\__\__,_/_/\_\
|
|
/// |___/
|
|
///
|
|
/// Copyright (c) 1997 - Ognjen 'xolatile' Milan Robovic
|
|
///
|
|
/// xolatile@chud.cyou - xyntax - Tiny, unsafe and somewhat insane unity header for generic syntax definition.
|
|
///
|
|
/// This program is free software, free as in freedom and as in free beer, you can redistribute it and/or modify it under the terms of the GNU
|
|
/// General Public License as published by the Free Software Foundation, either version 3 of the License, or any later version if you wish...
|
|
///
|
|
/// This program is distributed in the hope that it will be useful, but it is probably not, and without any warranty, without even the implied
|
|
/// warranty of merchantability or fitness for a particular purpose, because it is pointless. Please see the GNU (Geenoo) General Public License
|
|
/// for more details, if you dare, it is a lot of text that nobody wants to read...
|
|
|
|
/// Description
|
|
///
|
|
/// Xyntax, the most minimal text parser that can deal with syntax highlighting that I could've come up with, and the most generic name for it
|
|
/// also, it's rather slow, but if you're creating heavy duty program, you'd use heavy duty library for it. This library has only one header, so
|
|
/// it's easy to incorporate it into existing projects. If you want to see how it is used, check out simple examples below, if you want more
|
|
/// robust example, check out my other programs, Xarbon and Xighlight.
|
|
///
|
|
/// For start, you want to include this header file, there's no macro for including implementation (like stb libraries), this is for projects
|
|
/// that have only one C source file, and one or more C header files. Then make global or local variable 'syntax_structure * whatever_syntax'
|
|
/// defined below, initialize it, define the rules, then in main loop select rule, do whatever you wanted, then deinitialize the structure. It's
|
|
/// simple, I'll provide minimal examples below.
|
|
|
|
/// Structure for single syntax definition (array of rules), if you want to parse multiple languages simultaneously, use an array.
|
|
///
|
|
/// syntax_structure * syntax = null;
|
|
|
|
typedef struct {
|
|
natural count; /// Count of syntax rules used, maximum is set with limit below, if limit is 0, it'll allocate it dynamically.
|
|
natural limit; /// Preallocation limit for syntax rules, hardcode it if you don't want this to allocate memory dynamically.
|
|
boolean * enrange; /// Enrange rule, set to true if you want to begin matching by any character from 'begin' string below.
|
|
boolean * derange; /// Derange rule, set to true if you want to end matching by any character from 'end' string below.
|
|
character * * begin; /// String containing set of characters or full string for start of matching, correlating to 'enrange' above.
|
|
character * * end; /// String containing set of characters or full string for end of matching, correlating to 'derange' above.
|
|
character * escape; /// Escape character, which will skip one cycle in selection loop, then continue matching for 'end' string.
|
|
natural * colour; /// Colour for matched array of characters, can be anything, enumerated, literal, hardcoded...
|
|
natural * effect; /// Effect for matched array of characters, can be anything, enumerated, literal, hardcoded...
|
|
} syntax_structure;
|
|
|
|
/// Initialize syntax structure before calling other functions that take it as an argument, set 'limit' to 0 if you want dynamic array of rules.
|
|
///
|
|
/// syntax = syntax_initialize (0);
|
|
|
|
static syntax_structure * syntax_initialize (natural limit) {
|
|
syntax_structure * syntax = allocate (sizeof (* syntax));
|
|
|
|
syntax->limit = limit;
|
|
|
|
if (limit != 0) {
|
|
syntax->enrange = allocate (syntax->limit * sizeof (* syntax->enrange));
|
|
syntax->derange = allocate (syntax->limit * sizeof (* syntax->derange));
|
|
syntax->begin = allocate (syntax->limit * sizeof (* syntax->begin));
|
|
syntax->end = allocate (syntax->limit * sizeof (* syntax->end));
|
|
syntax->escape = allocate (syntax->limit * sizeof (* syntax->escape));
|
|
syntax->colour = allocate (syntax->limit * sizeof (* syntax->colour));
|
|
syntax->effect = allocate (syntax->limit * sizeof (* syntax->effect));
|
|
}
|
|
|
|
return (syntax);
|
|
}
|
|
|
|
/// Deinitialize syntax structure after using it, in order to avoid memory leaks.
|
|
///
|
|
/// syntax = syntax_deinitialize (syntax);
|
|
|
|
static syntax_structure * syntax_deinitialize (syntax_structure * syntax) {
|
|
for (natural index = 0; index < syntax->count; ++index) {
|
|
syntax->begin [index] = deallocate (syntax->begin [index]);
|
|
syntax->end [index] = deallocate (syntax->end [index]);
|
|
}
|
|
|
|
syntax->enrange = deallocate (syntax->enrange);
|
|
syntax->derange = deallocate (syntax->derange);
|
|
syntax->begin = deallocate (syntax->begin);
|
|
syntax->end = deallocate (syntax->end);
|
|
syntax->escape = deallocate (syntax->escape);
|
|
syntax->colour = deallocate (syntax->colour);
|
|
syntax->effect = deallocate (syntax->effect);
|
|
|
|
return (deallocate (syntax));
|
|
}
|
|
|
|
/// Define single syntax rule, which will be added into array part of syntax structure, return value is index into that array.
|
|
///
|
|
/// Take a look into few simple examples of defining some simplified rules of C programming language.
|
|
/// Two examples below show how to define multiline comments and strings, since these have priority, both enrange and derange are false.
|
|
///
|
|
/// syntax_define (syntax, false, false, "/*", "*/", '\\', 1, 0);
|
|
/// syntax_define (syntax, false, false, "\"", "\"", '\\', 2, 0);
|
|
///
|
|
/// Now we're defining syntax rule for one keyword, static, notice that end string contaings separator characters because derange is true.
|
|
///
|
|
/// syntax_define (syntax, false, true, "static", "()[]{}.,:;<=>+*-/%!&~^?| \t\r\n", '\0', 3, 0);
|
|
///
|
|
/// You can define brackets and operator characters separately, or if you want to, you can define some of them separately again.
|
|
///
|
|
/// syntax_define (syntax, true, false, "()[]{}", "", '\0', 4, 0);
|
|
/// syntax_define (syntax, true, false, ".,:;<=>+*-/%!&~^?|", "", '\0', 5, 0);
|
|
///
|
|
/// And lastly, we can define number selection like this below, by setting both enrange and derange as false.
|
|
///
|
|
/// syntax_define (syntax, true, true, "0123456789", "()[]{}.,:;<=>+*-/%!&~^?| \t\r\n", '\0', 6, 0);
|
|
///
|
|
/// I hope this is pretty clear, if you want to select a number, you start by matching any of digits provided above, and you end matching that
|
|
/// number by any character from 'end' string, if you want to support floating point numbers, you'd exclude '.' character, or alternatively add
|
|
/// letters f, u, l and whatever else your language supports (like in C/C++). However, this approach is too weak for detecting syntax errors,
|
|
/// you shouldn't use this library for robust linter or parser.
|
|
|
|
static natural syntax_define (syntax_structure * syntax, boolean enrange, boolean derange, character * begin, character * end, character escape,
|
|
natural colour, natural effect) {
|
|
++syntax->count;
|
|
|
|
natural current = syntax->count - 1;
|
|
|
|
fatal_failure (begin == null, "syntax_define: Begin string is null pointer.");
|
|
fatal_failure (end == null, "syntax_define: End string is null pointer.");
|
|
|
|
fatal_failure (syntax->count >= syntax->limit, "syntax_define: Reached the hardcoded limit.");
|
|
|
|
if (syntax->limit == 0) {
|
|
syntax->enrange = reallocate (syntax->enrange, syntax->count * sizeof (* syntax->enrange));
|
|
syntax->derange = reallocate (syntax->derange, syntax->count * sizeof (* syntax->derange));
|
|
syntax->begin = reallocate (syntax->begin, syntax->count * sizeof (* syntax->begin));
|
|
syntax->end = reallocate (syntax->end, syntax->count * sizeof (* syntax->end));
|
|
syntax->escape = reallocate (syntax->escape, syntax->count * sizeof (* syntax->escape));
|
|
syntax->colour = reallocate (syntax->colour, syntax->count * sizeof (* syntax->colour));
|
|
syntax->effect = reallocate (syntax->effect, syntax->count * sizeof (* syntax->effect));
|
|
}
|
|
|
|
syntax->begin [current] = allocate ((string_length (begin) + 1) * sizeof (* * syntax->begin));
|
|
syntax->end [current] = allocate ((string_length (end) + 1) * sizeof (* * syntax->end));
|
|
|
|
syntax->enrange [current] = enrange;
|
|
syntax->derange [current] = derange;
|
|
syntax->escape [current] = escape;
|
|
syntax->colour [current] = colour;
|
|
syntax->effect [current] = effect;
|
|
|
|
string_copy (syntax->begin [current], begin);
|
|
string_copy (syntax->end [current], end);
|
|
|
|
return (current);
|
|
}
|
|
|
|
/// After all syntax definitions have been defined, call this function inside your main loop, return value is index of selected rule.
|
|
///
|
|
/// Now, imagine that 'buffer' is file you've loaded into memory, you have declared natural numbers 'offset', 'length' and 'select', and you've
|
|
/// properly initialized syntax structure 'syntax', defined its rules for wanted language(s), simple main loop would look like this:
|
|
///
|
|
/// for (offset = 0; buffer [offset] != '\0'; offset += length) {
|
|
/// /// Notice that we're not incrementing 'offset', we're increasing it by 'length'.
|
|
/// select = syntax_select (syntax, & buffer [offset], & length);
|
|
/// if (select >= syntax->count) {
|
|
/// /// Syntax definition is incomplete, unknown sequence has been detected, either print nothing, or print default.
|
|
/// } else {
|
|
/// /// Print string of 'length', at '& buffer [offset]', using 'syntax->colour [select]' and 'syntax->effect [select]'.
|
|
/// /// Strings here aren't null terminated, you want to print sized string.
|
|
/// }
|
|
/// }
|
|
|
|
static natural syntax_select (syntax_structure * syntax, character * string, natural * length) {
|
|
natural offset = 0;
|
|
natural subset = 0;
|
|
natural select = 0;
|
|
|
|
for (; select != syntax->count; ++select) {
|
|
caliber begin_length = string_length (syntax->begin [select]);
|
|
|
|
if (syntax->enrange [select] == false) {
|
|
if (syntax->derange [select] == false) {
|
|
if (string_compare_limit (string, syntax->begin [select], begin_length) == true) {
|
|
break;
|
|
}
|
|
} else {
|
|
if ((string_compare_limit (string, syntax->begin [select], begin_length) == true)
|
|
&& (character_compare_array (string [offset + begin_length], syntax->end [select]) == true)) {
|
|
break;
|
|
}
|
|
}
|
|
} else {
|
|
for (subset = 0; subset != begin_length; ++subset) {
|
|
if (string [offset] == syntax->begin [select] [subset]) {
|
|
goto selected;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
selected:
|
|
|
|
if (select >= syntax->count) {
|
|
* length = 1;
|
|
|
|
return (syntax->count);
|
|
}
|
|
|
|
caliber end_length = string_length (syntax->end [select]);
|
|
|
|
for (offset = 1; string [offset - 1] != character_null; ++offset) {
|
|
if (string [offset] == syntax->escape [select]) {
|
|
++offset;
|
|
continue;
|
|
}
|
|
|
|
if (syntax->derange [select] == true) {
|
|
subset = 0;
|
|
if (end_length == 0) {
|
|
break;
|
|
} do {
|
|
if (string [offset] == syntax->end [select] [subset]) {
|
|
* length = offset;
|
|
goto finished;
|
|
}
|
|
} while (++subset != end_length);
|
|
} else {
|
|
if (end_length != 0) {
|
|
if (string_compare_limit (& string [offset], syntax->end [select], end_length)) {
|
|
* length = offset + end_length;
|
|
return (select);
|
|
}
|
|
} else {
|
|
* length = 1;
|
|
return (select);
|
|
}
|
|
}
|
|
}
|
|
|
|
finished:
|
|
|
|
return (select);
|
|
}
|