xolatilization/xcript.h

///                _       _
/// __  _____ _ __(_)_ __ | |_
/// \ \/ / __| '__| | '_ \| __|
///  >  < (__| |  | | |_) | |_
/// /_/\_\___|_|  |_| .__/ \__|
///                 |_|
///
/// Copyright (c) 1997 - Ognjen 'xolatile' Milan Robovic
///
/// xolatile@chud.cyou - xcript - Whitespace insignificant INI/CFG-like script parser.
///
/// This program is free software, free as in freedom and as in free beer, you can redistribute it and/or modify it under the terms of the GNU
/// General Public License as published by the Free Software Foundation, either version 3 of the License, or any later version if you wish...
///
/// This program is distributed in the hope that it will be useful, but it is probably not, and without any warranty, without even the implied
/// warranty of merchantability or fitness for a particular purpose, because it is pointless. Please see the GNU (Geenoo) General Public License
/// for more details, if you dare, it is a lot of text that nobody wants to read...

/// Description
///
/// Xcript, autistic whitespace insignificant INI/CFG-like script parser. What else to say? Usage is more complex, this library is good for
/// defining data in configuration files that corresponds with hardcoded variables or structure fields. More elaborate examples are in my other
/// programs such as Xhads and Xhallenge for now, this will be used in few more projects that I planned.
///
/// Since this is single header library, just include it in your C source file, no need to use any stb-like macros for it as it was intended to
/// be used in projects with only one source file. After that, you want to initialize some structures and open some files, it'll be more clear as
/// you continue reading "documentation" below. I still think it's better to check out two projects mentioned above tho.

/// Enumeration of scripts syntax elements.

typedef enum {
	script_unknown, script_comment, script_string,  script_number,  script_marker,  script_header,  script_assign,  script_end,
	script_from,    script_to,      script_next
} script_word_enumeration;

/// Script data structure, you want to define one per file you open, usage will be explained belo, they work one string per time.
///
/// script_data_structure * script = null;

typedef struct {
	character * path;        /// File path for the script used in warnings and failures.
	character * source;      /// Raw character data imported from that file.
	natural     prefix;      /// Amount of ignored characters before last string.
	natural     length;      /// Amount of detected characters in last string.
	natural     suffix;      /// Amount of ignored characters after last string.
	natural     offset;      /// Offset inside script file data.
	natural     line;        /// Currently active line in script file.
	natural     last_length; /// Length of last selected string.
	character * last_string; /// Address of first character of last selected string.
	boolean     force;       /// Reserved for future use...
	boolean     range;       /// Boolean value for ranged expressions.
} script_data_structure;

/// Script structure containing global data, you want to have one per parsing unit, often one per entire project.
///
/// script_structure * information = null;

typedef struct {
	natural       counter;    /// Identifier count.
	character * * identifier; /// Array of identifier strings.
	natural     * index;      /// Array of identifier indices.
} script_structure;

/// Print a warning message if condition was true, otherwise continue parsing the script, minor errors should be warnings.
///
/// script_warning (script, name_defined == true, "Marker 'name' already defined.");

static procedure script_warning (script_data_structure * script, boolean condition, character * message) {
	if (condition == true) {
		print ("/w %s: %i: %s\n", script->path, script->line, message);
	}
}

/// Print a failure message and exit program if condition was true, otherwise continue parsing the script, major errors should be failures.
///
/// script_failure (script, name_defined == true, "Marker 'name' already defined.");

static procedure script_failure (script_data_structure * script, boolean condition, character * message) {
	if (condition == true) {
		print ("/f %s: %i: %s\n", script->path, script->line, message);

		exit (log_failure);
	}
}

/// Initialize script data structure, by allocating memory for it, importing raw data from file path, and set other important fields.
///
/// script = script_open ("script.cfg");

static script_data_structure * script_open (character * path) {
	script_data_structure * script = allocate (sizeof (* script));

	script->path   = string_duplicate (path);
	script->source = file_import      (path);

	script->line        = 1;
	script->last_string = & script->source [0];

	return (script);
}

/// Deinitialize script data structure, by deallocating memory used in it.
///
/// script = script_close (script);

static script_data_structure * script_close (script_data_structure * script) {
	script->path   = deallocate (script->path);
	script->source = deallocate (script->source);

	return (deallocate (script));
}

/// Compare certain null terminated string with last selected string from script.

static boolean script_compare (script_data_structure * script, character * string) {
	return (string_compare_limit (string, script->last_string, script->last_length));
}

/// Compare certain null terminated string with identifier from script structure selected by index.

static boolean script_check (script_structure * information, natural index, character * identifier) {
	return (string_compare (identifier, information->identifier [index]));
}

/// Return duplicate of last selected string from script. This will allocate memory that you have to free later.

static character * script_export_string (script_data_structure * script) {
	return (string_duplicate_limit (script->last_string, script->last_length));
}

/// Return duplicate of last selected number from script.

static natural script_export_number (script_data_structure * script) {
	return (string_limit_to_number (script->last_string, script->last_length));
}

/// Return identifier index of last selected string from script, if it's not identified, this function will exit program (fatal failure). I want
/// strict error checking in my programs, only in parts where I encountered tiny mistakes that were more difficult to debug (took more than 30
/// seconds). This code is under GNU/GPLv3 license, you can modify it as long as you're in the frame of the license.

static natural script_export_marker (script_structure * information, script_data_structure * script) {
	for (natural counter = 0; counter < information->counter; ++counter) {
		if (script_compare (script, information->identifier [counter]) == true) {
			return (information->index [counter]);
		}
	}

	script_failure (script, true, "No such identifier defined so far in any of the headers!");

	return (~ 0u);
}

/// Big big badass bug, function that does all the parsing, I don't even want to explain how it works, it's obvious from reading. Good luck.

static script_word_enumeration script_parser (script_data_structure * script) {
	script_word_enumeration word = script_unknown;

	script->prefix = 0;
	script->length = 0;
	script->suffix = 0;

	for (; character_is_blank (script->source [script->offset + script->prefix]) == true; ++script->prefix) {
		if (script->source [script->offset + script->prefix] == '\n') {
			++script->line;
		}
	}

	if (script->source [script->offset + script->prefix] == '\0') {
		word = script_end;
	} else if (script->source [script->offset + script->prefix] == '(') {
		script_failure (script, script->range == true, "You are already defining a range, only one pair of () is allowed.");
		script->range = true;
		++script->length;
		word = script_from;
	} else if (script->source [script->offset + script->prefix] == ',') {
		script_failure (script, script->range == false, "You can't use ',' outside of a range.");
		++script->length;
		word = script_next;
	} else if (script->source [script->offset + script->prefix] == ')') {
		script_failure (script, script->range == false, "You already defined a range, only one pair of () is allowed.");
		script->range = false;
		++script->length;
		word = script_to;
	} else if (script->source [script->offset + script->prefix] == ';') {
		for (; script->source [script->offset + script->prefix + script->length] != '\n'; ++script->length) {
			script_warning (script, script->source [script->offset + script->prefix + script->length] == '\0',
			                "Expected at least a trailing new line or some blank character after a comment!");
		}
		word = script_comment;
	} else if (script->source [script->offset + script->prefix] == '#') {
		for (; script->source [script->offset + script->prefix + script->length] != '\n'; ++script->length) {
			script_warning (script, script->source [script->offset + script->prefix + script->length] == '\0',
			                "Expected at least a trailing new line or some blank character after a comment!");
		}
		word = script_comment;
	} else if (script->source [script->offset + script->prefix] == '=') {
		++script->length;
		word = script_assign;
	} else if (script->source [script->offset + script->prefix] == '"') {
		script_failure (script, script->range == true, "You can't use string inside of a range.");
		for (script->length = 1; script->source [script->offset + script->prefix + script->length] != '"'; ++script->length) {
			script_failure (script, script->source [script->offset + script->prefix + script->length] == '\0',
			                "Unterminated string literal, missing '\"' character.");
		}
		++script->prefix;
		--script->length;
		++script->suffix;
		word = script_string;
	} else if (script->source [script->offset + script->prefix] == '\'') {
		script_failure (script, script->range == true, "You can't use string inside of a range.");
		for (script->length = 1; script->source [script->offset + script->prefix + script->length] != '\''; ++script->length) {
			script_failure (script, script->source [script->offset + script->prefix + script->length] == '\0',
			                "Unterminated string literal, missing ''' character.");
		}
		++script->prefix;
		--script->length;
		++script->suffix;
		word = script_string;
	} else if (script->source [script->offset + script->prefix] == '[') {
		script_failure (script, script->range == true, "You can't use header inside of a range.");
		for (; script->source [script->offset + script->prefix + script->length] != ']'; ++script->length) {
			script_failure (script, script->source [script->offset + script->prefix + script->length] == '\0',
			                "Unterminated header element, missing ']' character.");
		}
		++script->prefix;
		--script->length;
		++script->suffix;
		word = script_header;
	} else if (character_is_digit (script->source [script->offset + script->prefix]) == true) {
		for (; character_is_digit (script->source [script->offset + script->prefix + script->length]) == true; ++script->length) {
			script_warning (script, script->source [script->offset + script->prefix + script->length] == '\0',
			                "Expected at least a trailing new line or some blank character after a number!");
		}
		word = script_number;
	} else if (character_is_identifier (script->source [script->offset + script->prefix]) == true) {
		for (; character_is_identifier (script->source [script->offset + script->prefix + script->length]) == true; ++script->length) {
			script_warning (script, script->source [script->offset + script->prefix + script->length] == '\0',
			                "Expected at least a trailing new line or some blank character after a marker!");
		}
		word = script_marker;
	} else {
		script_failure (script, true, format ("Illegal character '%c' in script.", script->source [script->offset + script->prefix]));
	}

	script->last_string = & script->source [script->offset + script->prefix];
	script->last_length =   script->length;

	script->offset += script->prefix + script->length + script->suffix;

	return (word);
}

/// Okay, if you're not scared by the abomination written above, lets continue. This function checks for header in script file. That's the small
/// part of file containing "[foo_bar]" text. If variable 'accept' is true, it'll add that identifier into script structure, otherwise it'll just
/// return that string, which can be ignored or processed further.

static character * script_expect_header (script_structure * information, script_data_structure * script, natural index, boolean accept) {
	if (accept == true) {
		++information->counter;

		information->identifier = reallocate (information->identifier, information->counter * sizeof (* information->identifier));
		information->index      = reallocate (information->index,      information->counter * sizeof (* information->index));

		information->identifier [information->counter - 1] = string_duplicate_limit (script->last_string, script->last_length);
		information->index      [information->counter - 1] = index;
	}

	return (script_export_string (script));
}

/// This function checks for string, if you want your configuration data to be string, this will check for it.

static character * script_expect_string (script_data_structure * script) {
	script_word_enumeration word = script_unknown;

	script_failure (script, (word = script_parser (script)) != script_assign, "Expected '=', assignment operator.");
	script_failure (script, (word = script_parser (script)) != script_string, "Expected string literal.");

	return (script_export_string (script));
}

/// This function checks for number, if you want your configuration data to be number, this will check for it again. No floating point numbers!

static natural script_expect_number (script_data_structure * script) {
	script_word_enumeration word = script_unknown;

	script_failure (script, (word = script_parser (script)) != script_assign, "Expected '=', assignment operator.");
	script_failure (script, (word = script_parser (script)) != script_number, "Expected number literal.");

	return (script_export_number (script));
}

/// This function checks for marker, this should be previously defined identifier, also known as header string.

static natural script_expect_marker (script_structure * information, script_data_structure * script) {
	script_word_enumeration word = script_unknown;

	script_failure (script, (word = script_parser (script)) != script_assign, "Expected '=', assignment operator.");
	script_failure (script, (word = script_parser (script)) != script_marker, "Expected marker literal.");

	return (script_export_marker (information, script));
}

/// This function checks for number or marker, but not string, sometimes you want to use hardcoded enumerations in configuration files.

static natural script_expect_number_or_marker (script_structure * information, script_data_structure * script) {
	script_word_enumeration word = script_unknown;

	script_failure (script, (word = script_parser (script)) != script_assign, "Expected '=', assignment operator.");

	word = script_parser (script);

	if (word == script_number) {
		return (script_export_number (script));
	} else if (word == script_marker) {
		return (script_export_marker (information, script));
	} else {
		script_failure (script, true, "Expected number or marker literal.");
	}

	return (~ 0u);
}

/// Warning: I don't know how to really explain this...

static natural * script_expect_ordered_array (script_structure * information, script_data_structure * script, natural * count) {
	script_word_enumeration word = script_unknown;

	natural   found = 0;
	natural * array = null;

	script_failure (script, (word = script_parser (script)) != script_assign, "Expected '=', assignment operator.");
	script_failure (script, (word = script_parser (script)) != script_from,   "Expected '(', begin range operator.");

	for (word = script_parser (script); word != script_to; word = script_parser (script)) {
		++found;

		array = reallocate (array, found * sizeof (* array));

		if (word == script_number) {
			array [found - 1] = script_export_number (script);
		} else if (word == script_marker) {
			array [found - 1] = script_export_marker (information, script);
		} else {
			script_failure (script, true, "Expected number or marker!");
		}

		if ((word = script_parser (script)) == script_to) break;

		script_failure (script, word != script_next, "Expected ranged next ','.");
		script_failure (script, word == script_end,  "Expected ranged to ')'.");
	}

	(* count) = found;

	return (array);
}

/// Warning: I don't know how to really explain this...

static natural * script_expect_unordered_array (script_structure * information, script_data_structure * script, natural count) {
	script_word_enumeration word = script_unknown;

	natural * array = allocate (count * sizeof (* array));

	script_failure (script, (word = script_parser (script)) != script_assign, "Expected '=', assignment operator.");
	script_failure (script, (word = script_parser (script)) != script_from,   "Expected '(', begin range operator.");

	for (word = script_parser (script); word != script_to; word = script_parser (script)) {
		natural index = script_export_marker (information, script);

		script_failure (script, word != script_marker, "Expected ranged marker.");

		script_failure (script, (word = script_parser (script)) != script_assign, "Expected '=', assignment operator.");

		word = script_parser (script);

		if (word == script_number) {
			array [index] = script_export_number (script);
		} else if (word == script_marker) {
			array [index] = script_export_marker (information, script);
		} else {
			script_failure (script, true, "Expected number or marker!");
		}

		if ((word = script_parser (script)) == script_to) break;

		script_failure (script, word != script_next, "Expected ranged next ','.");
		script_failure (script, word == script_end,  "Expected ranged to ')'.");
	}

	return (array);
}

/// Initialize script structure by importing basic headers from general script file. This is where things get complex. Everyscript  parser needs
/// to have main, general script, that's not an empty file! Consider this an entry point of all scripts, since this script parser assumes
/// there'll be thousands of tiny configuration files (scripts!), but you can also use only one script if you wanted.
///
/// script = script_initialize ("general.cfg");

static script_structure * script_initialize (character * general_script_file_path) {
	script_structure * script = allocate (sizeof (* script));

	script_word_enumeration word = script_unknown;

	script_data_structure * general = script_open (general_script_file_path);

	for (word = script_parser (general); word != script_end; word = script_parser (general)) {
		if (word == script_header) {
			++script->counter;
			script->identifier = reallocate (script->identifier, script->counter * sizeof (* script->identifier));
			script->index      = reallocate (script->index,      script->counter * sizeof (* script->index));
			script->identifier [script->counter - 1] = string_duplicate_limit (general->last_string, general->last_length);
			script->index      [script->counter - 1] = script->counter - 1;
		} else if ((word == script_end) || (word == script_comment)) {
			continue;
		} else {
			script_failure (general, true, "Expected header in general script.");
		}
	}

	general = script_close (general);

	return (script);
}

/// Deinitialize script structure by deallocating all identifier data. You can clean up stuff at program exit point, or after reading the script.
///
/// script = script_deinitialize (script);

static script_structure * script_deinitialize (script_structure * script) {
	for (natural index = 0; index < script->counter; ++index) {
		script->identifier [index] = deallocate (script->identifier [index]);
	}

	script->identifier = deallocate (script->identifier);
	script->index      = deallocate (script->index);

	return (deallocate (script));
}

/// This function is similar to 'script_export_marker', but it compares identifier string, then returns index if it was found.

static natural script_indexer (script_structure * information, character * identifier) {
	for (natural counter = 0; counter < information->counter; ++counter) {
		if (string_compare (identifier, information->identifier [counter]) == true) {
			return (information->index [counter]);
		}
	}

	fatal_failure (true, "script_indexer: No such identifier defined so far in any of the headers!");

	return (~ 0u);
}