xolatilization/xyntax.h

239 lines
11 KiB
C
Executable File

/// _
/// __ ___ _ _ __ | |_ __ ___ __
/// \ \/ / | | | '_ \| __/ _` \ \/ /
/// > <| |_| | | | | || (_| |> <
/// /_/\_\\__, |_| |_|\__\__,_/_/\_\
/// |___/
///
/// Copyright (c) 1997 - Ognjen 'xolatile' Milan Robovic
///
/// xolatile@chud.cyou - xyntax - Tiny, unsafe and somewhat insane unity header for generic syntax definition.
///
/// This program is free software, free as in freedom and as in free beer, you can redistribute it and/or modify it under the terms of the GNU
/// General Public License as published by the Free Software Foundation, either version 3 of the License, or any later version if you wish...
///
/// This program is distributed in the hope that it will be useful, but it is probably not, and without any warranty, without even the implied
/// warranty of merchantability or fitness for a particular purpose, because it is pointless. Please see the GNU (Geenoo) General Public License
/// for more details, if you dare, it is a lot of text that nobody wants to read...
/// Description
///
/// Xyntax, the most minimal text parser that can deal with syntax highlighting that I could've come up with, and the most generic name for it
/// also, it's rather slow, but if you're creating heavy duty program, you'd use heavy duty library for it. This library has only one header, so
/// it's easy to incorporate it into existing projects. If you want to see how it is used, check out simple examples below, if you want more
/// robust example, check out my other programs, Xarbon and Xighlight.
///
/// For start, you want to include this header file, there's no macro for including implementation (like stb libraries), this is for projects
/// that have only one C source file, and one or more C header files. Then make global or local variable 'syntax_structure * whatever_syntax'
/// defined below, initialize it, define the rules, then in main loop select rule, do whatever you wanted, then deinitialize the structure. It's
/// simple, I'll provide minimal examples below.
/// Structure for single syntax definition (array of rules), if you want to parse multiple languages simultaneously, use an array.
///
/// syntax_structure * syntax = null;
typedef struct {
natural count; /// Count of syntax rules used, maximum is set with limit below, if limit is 0, it'll allocate it dynamically.
natural limit; /// Preallocation limit for syntax rules, hardcode it if you don't want this to allocate memory dynamically.
boolean * enrange; /// Enrange rule, set to true if you want to begin matching by any character from 'begin' string below.
boolean * derange; /// Derange rule, set to true if you want to end matching by any character from 'end' string below.
character * * begin; /// String containing set of characters or full string for start of matching, correlating to 'enrange' above.
character * * end; /// String containing set of characters or full string for end of matching, correlating to 'derange' above.
character * escape; /// Escape character, which will skip one cycle in selection loop, then continue matching for 'end' string.
natural * colour; /// Colour for matched array of characters, can be anything, enumerated, literal, hardcoded...
natural * effect; /// Effect for matched array of characters, can be anything, enumerated, literal, hardcoded...
} syntax_structure;
/// Initialize syntax structure before calling other functions that take it as an argument, set 'limit' to 0 if you want dynamic array of rules.
///
/// syntax = syntax_initialize (0);
static syntax_structure * syntax_initialize (natural limit) {
syntax_structure * syntax = allocate (sizeof (* syntax));
syntax->limit = limit;
if (limit != 0) {
syntax->enrange = allocate (syntax->limit * sizeof (* syntax->enrange));
syntax->derange = allocate (syntax->limit * sizeof (* syntax->derange));
syntax->begin = allocate (syntax->limit * sizeof (* syntax->begin));
syntax->end = allocate (syntax->limit * sizeof (* syntax->end));
syntax->escape = allocate (syntax->limit * sizeof (* syntax->escape));
syntax->colour = allocate (syntax->limit * sizeof (* syntax->colour));
syntax->effect = allocate (syntax->limit * sizeof (* syntax->effect));
}
return (syntax);
}
/// Deinitialize syntax structure after using it, in order to avoid memory leaks.
///
/// syntax = syntax_deinitialize (syntax);
static syntax_structure * syntax_deinitialize (syntax_structure * syntax) {
for (natural index = 0; index < syntax->count; ++index) {
syntax->begin [index] = deallocate (syntax->begin [index]);
syntax->end [index] = deallocate (syntax->end [index]);
}
syntax->enrange = deallocate (syntax->enrange);
syntax->derange = deallocate (syntax->derange);
syntax->begin = deallocate (syntax->begin);
syntax->end = deallocate (syntax->end);
syntax->escape = deallocate (syntax->escape);
syntax->colour = deallocate (syntax->colour);
syntax->effect = deallocate (syntax->effect);
return (deallocate (syntax));
}
/// Define single syntax rule, which will be added into array part of syntax structure, return value is index into that array.
///
/// Take a look into few simple examples of defining some simplified rules of C programming language.
/// Two examples below show how to define multiline comments and strings, since these have priority, both enrange and derange are false.
///
/// syntax_define (syntax, false, false, "/*", "*/", '\\', 1, 0);
/// syntax_define (syntax, false, false, "\"", "\"", '\\', 2, 0);
///
/// Now we're defining syntax rule for one keyword, static, notice that end string contaings separator characters because derange is true.
///
/// syntax_define (syntax, false, true, "static", "()[]{}.,:;<=>+*-/%!&~^?| \t\r\n", '\0', 3, 0);
///
/// You can define brackets and operator characters separately, or if you want to, you can define some of them separately again.
///
/// syntax_define (syntax, true, false, "()[]{}", "", '\0', 4, 0);
/// syntax_define (syntax, true, false, ".,:;<=>+*-/%!&~^?|", "", '\0', 5, 0);
///
/// And lastly, we can define number selection like this below, by setting both enrange and derange as false.
///
/// syntax_define (syntax, true, true, "0123456789", "()[]{}.,:;<=>+*-/%!&~^?| \t\r\n", '\0', 6, 0);
///
/// I hope this is pretty clear, if you want to select a number, you start by matching any of digits provided above, and you end matching that
/// number by any character from 'end' string, if you want to support floating point numbers, you'd exclude '.' character, or alternatively add
/// letters f, u, l and whatever else your language supports (like in C/C++). However, this approach is too weak for detecting syntax errors,
/// you shouldn't use this library for robust linter or parser.
static natural syntax_define (syntax_structure * syntax, boolean enrange, boolean derange, character * begin, character * end, character escape,
natural colour, natural effect) {
++syntax->count;
natural current = syntax->count - 1;
fatal_failure (begin == null, "syntax_define: Begin string is null pointer.");
fatal_failure (end == null, "syntax_define: End string is null pointer.");
fatal_failure (syntax->count >= syntax->limit, "syntax_define: Reached the hardcoded limit.");
if (syntax->limit == 0) {
syntax->enrange = reallocate (syntax->enrange, syntax->count * sizeof (* syntax->enrange));
syntax->derange = reallocate (syntax->derange, syntax->count * sizeof (* syntax->derange));
syntax->begin = reallocate (syntax->begin, syntax->count * sizeof (* syntax->begin));
syntax->end = reallocate (syntax->end, syntax->count * sizeof (* syntax->end));
syntax->escape = reallocate (syntax->escape, syntax->count * sizeof (* syntax->escape));
syntax->colour = reallocate (syntax->colour, syntax->count * sizeof (* syntax->colour));
syntax->effect = reallocate (syntax->effect, syntax->count * sizeof (* syntax->effect));
}
syntax->begin [current] = allocate ((string_length (begin) + 1) * sizeof (* * syntax->begin));
syntax->end [current] = allocate ((string_length (end) + 1) * sizeof (* * syntax->end));
syntax->enrange [current] = enrange;
syntax->derange [current] = derange;
syntax->escape [current] = escape;
syntax->colour [current] = colour;
syntax->effect [current] = effect;
string_copy (syntax->begin [current], begin);
string_copy (syntax->end [current], end);
return (current);
}
/// After all syntax definitions have been defined, call this function inside your main loop, return value is index of selected rule.
///
/// Now, imagine that 'buffer' is file you've loaded into memory, you have declared natural numbers 'offset', 'length' and 'select', and you've
/// properly initialized syntax structure 'syntax', defined its rules for wanted language(s), simple main loop would look like this:
///
/// for (offset = 0; buffer [offset] != '\0'; offset += length) {
/// /// Notice that we're not incrementing 'offset', we're increasing it by 'length'.
/// select = syntax_select (syntax, & buffer [offset], & length);
/// if (select >= syntax->count) {
/// /// Syntax definition is incomplete, unknown sequence has been detected, either print nothing, or print default.
/// } else {
/// /// Print string of 'length', at '& buffer [offset]', using 'syntax->colour [select]' and 'syntax->effect [select]'.
/// /// Strings here aren't null terminated, you want to print sized string.
/// }
/// }
static natural syntax_select (syntax_structure * syntax, character * string, natural * length) {
natural offset = 0;
natural subset = 0;
natural select = 0;
for (; select != syntax->count; ++select) {
caliber begin_length = string_length (syntax->begin [select]);
if (syntax->enrange [select] == false) {
if (syntax->derange [select] == false) {
if (string_compare_limit (string, syntax->begin [select], begin_length) == true) {
break;
}
} else {
if ((string_compare_limit (string, syntax->begin [select], begin_length) == true)
&& (character_compare_array (string [offset + begin_length], syntax->end [select]) == true)) {
break;
}
}
} else {
for (subset = 0; subset != begin_length; ++subset) {
if (string [offset] == syntax->begin [select] [subset]) {
goto selected;
}
}
}
}
selected:
if (select >= syntax->count) {
* length = 1;
return (syntax->count);
}
caliber end_length = string_length (syntax->end [select]);
for (offset = 1; string [offset - 1] != character_null; ++offset) {
if (string [offset] == syntax->escape [select]) {
++offset;
continue;
}
if (syntax->derange [select] == true) {
subset = 0;
if (end_length == 0) {
break;
} do {
if (string [offset] == syntax->end [select] [subset]) {
* length = offset;
goto finished;
}
} while (++subset != end_length);
} else {
if (end_length != 0) {
if (string_compare_limit (& string [offset], syntax->end [select], end_length)) {
* length = offset + end_length;
return (select);
}
} else {
* length = 1;
return (select);
}
}
}
finished:
return (select);
}