xolatilization/xcript.h

455 lines
20 KiB
C
Executable File

/// _ _
/// __ _____ _ __(_)_ __ | |_
/// \ \/ / __| '__| | '_ \| __|
/// > < (__| | | | |_) | |_
/// /_/\_\___|_| |_| .__/ \__|
/// |_|
///
/// Copyright (c) 1997 - Ognjen 'xolatile' Milan Robovic
///
/// xolatile@chud.cyou - xcript - Whitespace insignificant INI/CFG-like script parser.
///
/// This program is free software, free as in freedom and as in free beer, you can redistribute it and/or modify it under the terms of the GNU
/// General Public License as published by the Free Software Foundation, either version 3 of the License, or any later version if you wish...
///
/// This program is distributed in the hope that it will be useful, but it is probably not, and without any warranty, without even the implied
/// warranty of merchantability or fitness for a particular purpose, because it is pointless. Please see the GNU (Geenoo) General Public License
/// for more details, if you dare, it is a lot of text that nobody wants to read...
/// Description
///
/// Xcript, autistic whitespace insignificant INI/CFG-like script parser. What else to say? Usage is more complex, this library is good for
/// defining data in configuration files that corresponds with hardcoded variables or structure fields. More elaborate examples are in my other
/// programs such as Xhads and Xhallenge for now, this will be used in few more projects that I planned.
///
/// Since this is single header library, just include it in your C source file, no need to use any stb-like macros for it as it was intended to
/// be used in projects with only one source file. After that, you want to initialize some structures and open some files, it'll be more clear as
/// you continue reading "documentation" below. I still think it's better to check out two projects mentioned above tho.
/// Enumeration of scripts syntax elements.
typedef enum {
script_unknown, script_comment, script_string, script_number, script_marker, script_header, script_assign, script_end,
script_from, script_to, script_next
} script_word_enumeration;
/// Script data structure, you want to define one per file you open, usage will be explained belo, they work one string per time.
///
/// script_data_structure * script = null;
typedef struct {
character * path; /// File path for the script used in warnings and failures.
character * source; /// Raw character data imported from that file.
natural prefix; /// Amount of ignored characters before last string.
natural length; /// Amount of detected characters in last string.
natural suffix; /// Amount of ignored characters after last string.
natural offset; /// Offset inside script file data.
natural line; /// Currently active line in script file.
natural last_length; /// Length of last selected string.
character * last_string; /// Address of first character of last selected string.
boolean force; /// Reserved for future use...
boolean range; /// Boolean value for ranged expressions.
} script_data_structure;
/// Script structure containing global data, you want to have one per parsing unit, often one per entire project.
///
/// script_structure * information = null;
typedef struct {
natural counter; /// Identifier count.
character * * identifier; /// Array of identifier strings.
natural * index; /// Array of identifier indices.
} script_structure;
/// Print a warning message if condition was true, otherwise continue parsing the script, minor errors should be warnings.
///
/// script_warning (script, name_defined == true, "Marker 'name' already defined.");
static procedure script_warning (script_data_structure * script, boolean condition, character * message) {
if (condition == true) {
print ("/w %s: %i: %s\n", script->path, script->line, message);
}
}
/// Print a failure message and exit program if condition was true, otherwise continue parsing the script, major errors should be failures.
///
/// script_failure (script, name_defined == true, "Marker 'name' already defined.");
static procedure script_failure (script_data_structure * script, boolean condition, character * message) {
if (condition == true) {
print ("/f %s: %i: %s\n", script->path, script->line, message);
exit (log_failure);
}
}
/// Initialize script data structure, by allocating memory for it, importing raw data from file path, and set other important fields.
///
/// script = script_open ("script.cfg");
static script_data_structure * script_open (character * path) {
script_data_structure * script = allocate (sizeof (* script));
script->path = string_duplicate (path);
script->source = file_import (path);
script->line = 1;
script->last_string = & script->source [0];
return (script);
}
/// Deinitialize script data structure, by deallocating memory used in it.
///
/// script = script_close (script);
static script_data_structure * script_close (script_data_structure * script) {
script->path = deallocate (script->path);
script->source = deallocate (script->source);
return (deallocate (script));
}
/// Compare certain null terminated string with last selected string from script.
static boolean script_compare (script_data_structure * script, character * string) {
return (string_compare_limit (string, script->last_string, script->last_length));
}
/// Compare certain null terminated string with identifier from script structure selected by index.
static boolean script_check (script_structure * information, natural index, character * identifier) {
return (string_compare (identifier, information->identifier [index]));
}
/// Return duplicate of last selected string from script. This will allocate memory that you have to free later.
static character * script_export_string (script_data_structure * script) {
return (string_duplicate_limit (script->last_string, script->last_length));
}
/// Return duplicate of last selected number from script.
static natural script_export_number (script_data_structure * script) {
return (string_limit_to_number (script->last_string, script->last_length));
}
/// Return identifier index of last selected string from script, if it's not identified, this function will exit program (fatal failure). I want
/// strict error checking in my programs, only in parts where I encountered tiny mistakes that were more difficult to debug (took more than 30
/// seconds). This code is under GNU/GPLv3 license, you can modify it as long as you're in the frame of the license.
static natural script_export_marker (script_structure * information, script_data_structure * script) {
for (natural counter = 0; counter < information->counter; ++counter) {
if (script_compare (script, information->identifier [counter]) == true) {
return (information->index [counter]);
}
}
script_failure (script, true, "No such identifier defined so far in any of the headers!");
return (~ 0u);
}
/// Big big badass bug, function that does all the parsing, I don't even want to explain how it works, it's obvious from reading. Good luck.
static script_word_enumeration script_parser (script_data_structure * script) {
script_word_enumeration word = script_unknown;
script->prefix = 0;
script->length = 0;
script->suffix = 0;
for (; character_is_blank (script->source [script->offset + script->prefix]) == true; ++script->prefix) {
if (script->source [script->offset + script->prefix] == '\n') {
++script->line;
}
}
if (script->source [script->offset + script->prefix] == '\0') {
word = script_end;
} else if (script->source [script->offset + script->prefix] == '(') {
script_failure (script, script->range == true, "You are already defining a range, only one pair of () is allowed.");
script->range = true;
++script->length;
word = script_from;
} else if (script->source [script->offset + script->prefix] == ',') {
script_failure (script, script->range == false, "You can't use ',' outside of a range.");
++script->length;
word = script_next;
} else if (script->source [script->offset + script->prefix] == ')') {
script_failure (script, script->range == false, "You already defined a range, only one pair of () is allowed.");
script->range = false;
++script->length;
word = script_to;
} else if (script->source [script->offset + script->prefix] == ';') {
for (; script->source [script->offset + script->prefix + script->length] != '\n'; ++script->length) {
script_warning (script, script->source [script->offset + script->prefix + script->length] == '\0',
"Expected at least a trailing new line or some blank character after a comment!");
}
word = script_comment;
} else if (script->source [script->offset + script->prefix] == '#') {
for (; script->source [script->offset + script->prefix + script->length] != '\n'; ++script->length) {
script_warning (script, script->source [script->offset + script->prefix + script->length] == '\0',
"Expected at least a trailing new line or some blank character after a comment!");
}
word = script_comment;
} else if (script->source [script->offset + script->prefix] == '=') {
++script->length;
word = script_assign;
} else if (script->source [script->offset + script->prefix] == '"') {
script_failure (script, script->range == true, "You can't use string inside of a range.");
for (script->length = 1; script->source [script->offset + script->prefix + script->length] != '"'; ++script->length) {
script_failure (script, script->source [script->offset + script->prefix + script->length] == '\0',
"Unterminated string literal, missing '\"' character.");
}
++script->prefix;
--script->length;
++script->suffix;
word = script_string;
} else if (script->source [script->offset + script->prefix] == '\'') {
script_failure (script, script->range == true, "You can't use string inside of a range.");
for (script->length = 1; script->source [script->offset + script->prefix + script->length] != '\''; ++script->length) {
script_failure (script, script->source [script->offset + script->prefix + script->length] == '\0',
"Unterminated string literal, missing ''' character.");
}
++script->prefix;
--script->length;
++script->suffix;
word = script_string;
} else if (script->source [script->offset + script->prefix] == '[') {
script_failure (script, script->range == true, "You can't use header inside of a range.");
for (; script->source [script->offset + script->prefix + script->length] != ']'; ++script->length) {
script_failure (script, script->source [script->offset + script->prefix + script->length] == '\0',
"Unterminated header element, missing ']' character.");
}
++script->prefix;
--script->length;
++script->suffix;
word = script_header;
} else if (character_is_digit (script->source [script->offset + script->prefix]) == true) {
for (; character_is_digit (script->source [script->offset + script->prefix + script->length]) == true; ++script->length) {
script_warning (script, script->source [script->offset + script->prefix + script->length] == '\0',
"Expected at least a trailing new line or some blank character after a number!");
}
word = script_number;
} else if (character_is_identifier (script->source [script->offset + script->prefix]) == true) {
for (; character_is_identifier (script->source [script->offset + script->prefix + script->length]) == true; ++script->length) {
script_warning (script, script->source [script->offset + script->prefix + script->length] == '\0',
"Expected at least a trailing new line or some blank character after a marker!");
}
word = script_marker;
} else {
script_failure (script, true, format ("Illegal character '%c' in script.", script->source [script->offset + script->prefix]));
}
script->last_string = & script->source [script->offset + script->prefix];
script->last_length = script->length;
script->offset += script->prefix + script->length + script->suffix;
return (word);
}
/// Okay, if you're not scared by the abomination written above, lets continue. This function checks for header in script file. That's the small
/// part of file containing "[foo_bar]" text. If variable 'accept' is true, it'll add that identifier into script structure, otherwise it'll just
/// return that string, which can be ignored or processed further.
static character * script_expect_header (script_structure * information, script_data_structure * script, natural index, boolean accept) {
if (accept == true) {
++information->counter;
information->identifier = reallocate (information->identifier, information->counter * sizeof (* information->identifier));
information->index = reallocate (information->index, information->counter * sizeof (* information->index));
information->identifier [information->counter - 1] = string_duplicate_limit (script->last_string, script->last_length);
information->index [information->counter - 1] = index;
}
return (script_export_string (script));
}
/// This function checks for string, if you want your configuration data to be string, this will check for it.
static character * script_expect_string (script_data_structure * script) {
script_word_enumeration word = script_unknown;
script_failure (script, (word = script_parser (script)) != script_assign, "Expected '=', assignment operator.");
script_failure (script, (word = script_parser (script)) != script_string, "Expected string literal.");
return (script_export_string (script));
}
/// This function checks for number, if you want your configuration data to be number, this will check for it again. No floating point numbers!
static natural script_expect_number (script_data_structure * script) {
script_word_enumeration word = script_unknown;
script_failure (script, (word = script_parser (script)) != script_assign, "Expected '=', assignment operator.");
script_failure (script, (word = script_parser (script)) != script_number, "Expected number literal.");
return (script_export_number (script));
}
/// This function checks for marker, this should be previously defined identifier, also known as header string.
static natural script_expect_marker (script_structure * information, script_data_structure * script) {
script_word_enumeration word = script_unknown;
script_failure (script, (word = script_parser (script)) != script_assign, "Expected '=', assignment operator.");
script_failure (script, (word = script_parser (script)) != script_marker, "Expected marker literal.");
return (script_export_marker (information, script));
}
/// This function checks for number or marker, but not string, sometimes you want to use hardcoded enumerations in configuration files.
static natural script_expect_number_or_marker (script_structure * information, script_data_structure * script) {
script_word_enumeration word = script_unknown;
script_failure (script, (word = script_parser (script)) != script_assign, "Expected '=', assignment operator.");
word = script_parser (script);
if (word == script_number) {
return (script_export_number (script));
} else if (word == script_marker) {
return (script_export_marker (information, script));
} else {
script_failure (script, true, "Expected number or marker literal.");
}
return (~ 0u);
}
/// Warning: I don't know how to really explain this...
static natural * script_expect_ordered_array (script_structure * information, script_data_structure * script, natural * count) {
script_word_enumeration word = script_unknown;
natural found = 0;
natural * array = null;
script_failure (script, (word = script_parser (script)) != script_assign, "Expected '=', assignment operator.");
script_failure (script, (word = script_parser (script)) != script_from, "Expected '(', begin range operator.");
for (word = script_parser (script); word != script_to; word = script_parser (script)) {
++found;
array = reallocate (array, found * sizeof (* array));
if (word == script_number) {
array [found - 1] = script_export_number (script);
} else if (word == script_marker) {
array [found - 1] = script_export_marker (information, script);
} else {
script_failure (script, true, "Expected number or marker!");
}
if ((word = script_parser (script)) == script_to) break;
script_failure (script, word != script_next, "Expected ranged next ','.");
script_failure (script, word == script_end, "Expected ranged to ')'.");
}
(* count) = found;
return (array);
}
/// Warning: I don't know how to really explain this...
static natural * script_expect_unordered_array (script_structure * information, script_data_structure * script, natural count) {
script_word_enumeration word = script_unknown;
natural * array = allocate (count * sizeof (* array));
script_failure (script, (word = script_parser (script)) != script_assign, "Expected '=', assignment operator.");
script_failure (script, (word = script_parser (script)) != script_from, "Expected '(', begin range operator.");
for (word = script_parser (script); word != script_to; word = script_parser (script)) {
natural index = script_export_marker (information, script);
script_failure (script, word != script_marker, "Expected ranged marker.");
script_failure (script, (word = script_parser (script)) != script_assign, "Expected '=', assignment operator.");
word = script_parser (script);
if (word == script_number) {
array [index] = script_export_number (script);
} else if (word == script_marker) {
array [index] = script_export_marker (information, script);
} else {
script_failure (script, true, "Expected number or marker!");
}
if ((word = script_parser (script)) == script_to) break;
script_failure (script, word != script_next, "Expected ranged next ','.");
script_failure (script, word == script_end, "Expected ranged to ')'.");
}
return (array);
}
/// Initialize script structure by importing basic headers from general script file. This is where things get complex. Everyscript parser needs
/// to have main, general script, that's not an empty file! Consider this an entry point of all scripts, since this script parser assumes
/// there'll be thousands of tiny configuration files (scripts!), but you can also use only one script if you wanted.
///
/// script = script_initialize ("general.cfg");
static script_structure * script_initialize (character * general_script_file_path) {
script_structure * script = allocate (sizeof (* script));
script_word_enumeration word = script_unknown;
script_data_structure * general = script_open (general_script_file_path);
for (word = script_parser (general); word != script_end; word = script_parser (general)) {
if (word == script_header) {
++script->counter;
script->identifier = reallocate (script->identifier, script->counter * sizeof (* script->identifier));
script->index = reallocate (script->index, script->counter * sizeof (* script->index));
script->identifier [script->counter - 1] = string_duplicate_limit (general->last_string, general->last_length);
script->index [script->counter - 1] = script->counter - 1;
} else if ((word == script_end) || (word == script_comment)) {
continue;
} else {
script_failure (general, true, "Expected header in general script.");
}
}
general = script_close (general);
return (script);
}
/// Deinitialize script structure by deallocating all identifier data. You can clean up stuff at program exit point, or after reading the script.
///
/// script = script_deinitialize (script);
static script_structure * script_deinitialize (script_structure * script) {
for (natural index = 0; index < script->counter; ++index) {
script->identifier [index] = deallocate (script->identifier [index]);
}
script->identifier = deallocate (script->identifier);
script->index = deallocate (script->index);
return (deallocate (script));
}
/// This function is similar to 'script_export_marker', but it compares identifier string, then returns index if it was found.
static natural script_indexer (script_structure * information, character * identifier) {
for (natural counter = 0; counter < information->counter; ++counter) {
if (string_compare (identifier, information->identifier [counter]) == true) {
return (information->index [counter]);
}
}
fatal_failure (true, "script_indexer: No such identifier defined so far in any of the headers!");
return (~ 0u);
}