xhartae/chapters/chapter_1.c

/*
Copyright (c) 2023 : Ognjen 'xolatile' Milan Robovic

Xhartae is free software! You will redistribute it or modify it under the terms of the GNU General Public License by Free Software Foundation.
And when you do redistribute it or modify it, it will use either version 3 of the License, or (at yours truly opinion) any later version.
It is distributed in the hope that it will be useful or harmful, it really depends... But no warranty what so ever, seriously. See GNU/GPLv3.
*/

#ifndef CHAPTER_1_SOURCE
#define CHAPTER_1_SOURCE

#include "chapter_1.h"

/*
In functions 'character_is_uppercase', 'character_is_lowercase' and 'character_is_digit', we use characters that are in certain range on ASCII table, which we'll show just below.
So, it's safe to use '>=' and '<=' operators, but in other cases, we want to compare them selectively, and for simplicity we use function 'character_compare_array'... Here's how
ASCII table looks like, I don't like encodings like UTF-8 and others, so neither should you. We'll also write a subprogram that prints this to terminal or graphical window.

ASCII table:
 _______________________________________________________________________________________________________________________________________________________________
| 0B      | 0O  | 0D  | 0X | SYM | Full name                                    | 0B      | 0O  | 0D  | 0X | SYM | Full name                                    |
|_______________________________________________________________________________|_______________________________________________________________________________|
|                                                                               |                                                                               |
| 0000000 | 000 |   0 | 00 | NUL | Null                                         | 0000001 | 001 |   1 | 01 | SOH | Start of heading                             |
| 0000010 | 002 |   2 | 02 | STX | Start of text                                | 0000011 | 003 |   3 | 03 | ETX | End of text                                  |
| 0000100 | 004 |   4 | 04 | EOT | End of transmission                          | 0000101 | 005 |   5 | 05 | ENQ | Enquiry                                      |
| 0000110 | 006 |   6 | 06 | ACK | Acknowledge                                  | 0000111 | 007 |   7 | 07 | BEL | Bell                                         |
| 0001000 | 010 |   8 | 08 | BS  | Backspace                                    | 0001001 | 011 |   9 | 09 | HT  | Horizontal tab                               |
| 0001010 | 012 |  10 | 0A | LF  | Line feed                                    | 0001011 | 013 |  11 | 0B | VT  | Vertical tab                                 |
| 0001100 | 014 |  12 | 0C | FF  | Form feed                                    | 0001101 | 015 |  13 | 0D | CR  | Carriage return                              |
| 0001110 | 016 |  14 | 0E | SO  | Shift out                                    | 0001111 | 017 |  15 | 0F | SI  | Shift in                                     |
| 0010000 | 020 |  16 | 10 | DLE | Data link escape                             | 0010001 | 021 |  17 | 11 | DC1 | Device control 1                             |
| 0010010 | 022 |  18 | 12 | DC2 | Device control 2                             | 0010011 | 023 |  19 | 13 | DC3 | Device control 3                             |
| 0010100 | 024 |  20 | 14 | DC4 | Device control 4                             | 0010101 | 025 |  21 | 15 | NAK | Negative acknowledge                         |
| 0010110 | 026 |  22 | 16 | SYN | Synchronous idle                             | 0010111 | 027 |  23 | 17 | ETB | End transmission block                       |
| 0011000 | 030 |  24 | 18 | CAN | Cancel                                       | 0011001 | 031 |  25 | 19 | EM  | End of medium                                |
| 0011010 | 032 |  26 | 1A | SUB | Substitute                                   | 0011011 | 033 |  27 | 1B | ESC | Escape                                       |
| 0011100 | 034 |  28 | 1C | FS  | File separator                               | 0011101 | 035 |  29 | 1D | GS  | Group separator                              |
| 0011110 | 036 |  30 | 1E | RS  | Record separator                             | 0011111 | 037 |  31 | 1F | US  | Unit separator                               |
| 0100000 | 040 |  32 | 20 |     | Space                                        | 0100001 | 041 |  33 | 21 | !   | Exclamation mark                             |
| 0100010 | 042 |  34 | 22 | "   | Speech mark                                  | 0100011 | 043 |  35 | 23 | #   | Number sign                                  |
| 0100100 | 044 |  36 | 24 | $   | Dollar sign                                  | 0100101 | 045 |  37 | 25 | %   | Percent                                      |
| 0100110 | 046 |  38 | 26 | &   | Ampersand                                    | 0100111 | 047 |  39 | 27 | '   | Quote                                        |
| 0101000 | 050 |  40 | 28 | (   | Open parenthesis                             | 0101001 | 051 |  41 | 29 | )   | Close parenthesis                            |
| 0101010 | 052 |  42 | 2A | *   | Asterisk                                     | 0101011 | 053 |  43 | 2B | +   | Plus                                         |
| 0101100 | 054 |  44 | 2C | ,   | Comma                                        | 0101101 | 055 |  45 | 2D | -   | Minus                                        |
| 0101110 | 056 |  46 | 2E | .   | Period                                       | 0101111 | 057 |  47 | 2F | /   | Slash                                        |
| 0110000 | 060 |  48 | 30 | 0   | Zero                                         | 0110001 | 061 |  49 | 31 | 1   | One                                          |
| 0110010 | 062 |  50 | 32 | 2   | Two                                          | 0110011 | 063 |  51 | 33 | 3   | Three                                        |
| 0110100 | 064 |  52 | 34 | 4   | Four                                         | 0110101 | 065 |  53 | 35 | 5   | Five                                         |
| 0110110 | 066 |  54 | 36 | 6   | Six                                          | 0110111 | 067 |  55 | 37 | 7   | Seven                                        |
| 0111000 | 070 |  56 | 38 | 8   | Eight                                        | 0111001 | 071 |  57 | 39 | 9   | Nine                                         |
| 0111010 | 072 |  58 | 3A | :   | Colon                                        | 0111011 | 073 |  59 | 3B | ;   | Semicolon                                    |
| 0111100 | 074 |  60 | 3C | <   | Open angled bracket                          | 0111101 | 075 |  61 | 3D | =   | Equal                                        |
| 0111110 | 076 |  62 | 3E | >   | Close angled bracket                         | 0111111 | 077 |  63 | 3F | ?   | Question mark                                |
| 1000000 | 100 |  64 | 40 | @   | At sign                                      | 1000001 | 101 |  65 | 41 | A   | Uppercase A                                  |
| 1000010 | 102 |  66 | 42 | B   | Uppercase B                                  | 1000011 | 103 |  67 | 43 | C   | Uppercase C                                  |
| 1000100 | 104 |  68 | 44 | D   | Uppercase D                                  | 1000101 | 105 |  69 | 45 | E   | Uppercase E                                  |
| 1000110 | 106 |  70 | 46 | F   | Uppercase F                                  | 1000111 | 107 |  71 | 47 | G   | Uppercase G                                  |
| 1001000 | 110 |  72 | 48 | H   | Uppercase H                                  | 1001001 | 111 |  73 | 49 | I   | Uppercase I                                  |
| 1001010 | 112 |  74 | 4A | J   | Uppercase J                                  | 1001011 | 113 |  75 | 4B | K   | Uppercase K                                  |
| 1001100 | 114 |  76 | 4C | L   | Uppercase L                                  | 1001101 | 115 |  77 | 4D | M   | Uppercase M                                  |
| 1001110 | 116 |  78 | 4E | N   | Uppercase N                                  | 1001111 | 117 |  79 | 4F | O   | Uppercase O                                  |
| 1010000 | 120 |  80 | 50 | P   | Uppercase P                                  | 1010001 | 121 |  81 | 51 | Q   | Uppercase Q                                  |
| 1010010 | 122 |  82 | 52 | R   | Uppercase R                                  | 1010011 | 123 |  83 | 53 | S   | Uppercase S                                  |
| 1010100 | 124 |  84 | 54 | T   | Uppercase T                                  | 1010101 | 125 |  85 | 55 | U   | Uppercase U                                  |
| 1010110 | 126 |  86 | 56 | V   | Uppercase V                                  | 1010111 | 127 |  87 | 57 | W   | Uppercase W                                  |
| 1011000 | 130 |  88 | 58 | X   | Uppercase X                                  | 1011001 | 131 |  89 | 59 | Y   | Uppercase Y                                  |
| 1011010 | 132 |  90 | 5A | Z   | Uppercase Z                                  | 1011011 | 133 |  91 | 5B | [   | Opening bracket                              |
| 1011100 | 134 |  92 | 5C | \   | Backslash                                    | 1011101 | 135 |  93 | 5D | ]   | Closing bracket                              |
| 1011110 | 136 |  94 | 5E | ^   | Caret                                        | 1011111 | 137 |  95 | 5F | _   | Underscore                                   |
| 1100000 | 140 |  96 | 60 | `   | Grave                                        | 1100001 | 141 |  97 | 61 | a   | Lowercase a                                  |
| 1100010 | 142 |  98 | 62 | b   | Lowercase b                                  | 1100011 | 143 |  99 | 63 | c   | Lowercase c                                  |
| 1100100 | 144 | 100 | 64 | d   | Lowercase d                                  | 1100101 | 145 | 101 | 65 | e   | Lowercase e                                  |
| 1100110 | 146 | 102 | 66 | f   | Lowercase f                                  | 1100111 | 147 | 103 | 67 | g   | Lowercase g                                  |
| 1101000 | 150 | 104 | 68 | h   | Lowercase h                                  | 1101001 | 151 | 105 | 69 | i   | Lowercase i                                  |
| 1101010 | 152 | 106 | 6A | j   | Lowercase j                                  | 1101011 | 153 | 107 | 6B | k   | Lowercase k                                  |
| 1101100 | 154 | 108 | 6C | l   | Lowercase l                                  | 1101101 | 155 | 109 | 6D | m   | Lowercase m                                  |
| 1101110 | 156 | 110 | 6E | n   | Lowercase n                                  | 1101111 | 157 | 111 | 6F | o   | Lowercase o                                  |
| 1110000 | 160 | 112 | 70 | p   | Lowercase p                                  | 1110001 | 161 | 113 | 71 | q   | Lowercase q                                  |
| 1110010 | 162 | 114 | 72 | r   | Lowercase r                                  | 1110011 | 163 | 115 | 73 | s   | Lowercase s                                  |
| 1110100 | 164 | 116 | 74 | t   | Lowercase t                                  | 1110101 | 165 | 117 | 75 | u   | Lowercase u                                  |
| 1110110 | 166 | 118 | 76 | v   | Lowercase v                                  | 1110111 | 167 | 119 | 77 | w   | Lowercase w                                  |
| 1111000 | 170 | 120 | 78 | x   | Lowercase x                                  | 1111001 | 171 | 121 | 79 | y   | Lowercase y                                  |
| 1111010 | 172 | 122 | 7A | z   | Lowercase z                                  | 1111011 | 173 | 123 | 7B | {   | Opening brace                                |
| 1111100 | 174 | 124 | 7C | |   | Vertical bar                                 | 1111101 | 175 | 125 | 7D | }   | Closing brace                                |
| 1111110 | 176 | 126 | 7E | ~   | Tilde                                        | 1111111 | 177 | 127 | 7F | DEL | Delete                                       |
|_______________________________________________________________________________|_______________________________________________________________________________|

You can see that values of 'A' ... 'Z', 'a' ... 'z' and '0' ... '9' are sequential, but symbols and "system" characters are mixed up.
*/

int character_is_uppercase (char character) {
	return ((int) ((character >= 'A') && (character <= 'Z')));
}

int character_is_lowercase (char character) {
	return ((int) ((character >= 'a') && (character <= 'z')));
}

int character_is_digit (char character) {
	return ((int) ((character >= '0') && (character <= '9')));
}

int character_is_blank (char character) { // Standard implementation also considers vertical tab and form feed as blank, we don't...
	return ((int) ((character == ' ') || (character == CHARACTER_TAB_HORIZONTAL) || (character == CHARACTER_CARRIAGE_RETURN) || (character == CHARACTER_LINE_FEED)));
	// If you like smaller line length limit, you can align it like this:
	// return ((character == ' ')
	//      || (character == CHARACTER_TAB_HORIZONTAL)
	//      || (character == CHARACTER_CARRIAGE_RETURN)
	//      || (character == CHARACTER_LINE_FEED));
	// Or:
	// return ((character == ' ')                       ||
	//         (character == CHARACTER_TAB_HORIZONTAL)  ||
	//         (character == CHARACTER_CARRIAGE_RETURN) ||
	//         (character == CHARACTER_LINE_FEED));
	// Or even use literal characters:
	// return ((character == ' ')  ||
	//         (character == '\t') ||
	//         (character == '\r') ||
	//         (character == '\n'));
}

int character_is_alpha (char character) { // Returns TRUE / 1 or FALSE / 0 depending on if the character is either uppercase or lowercase.
	return ((character_is_uppercase (character) != 0) || (character_is_lowercase (character) != 0));
}

int character_is_symbol (char character) { // Returns TRUE / 1 if character is one of the characters in that string (array of characters), otherwise it returns FALSE / 0.
	return (character_compare_array (character, "~!@#$%^&*()+{}|:\"<>?`-=[]\\;',./"));
}

int character_is_visible (char character) { // This is visible (printable) character range, and space is included in there.
	return ((int) ((character >= ' ') && (character <= '~')));
}

int character_is_invisible (char character) { // If character is not visible, then guess what? It's invisible.
	return (character_is_visible (character) == FALSE);
}

int character_is_escape (char character) { // We might use this function...
	return ((int) (character == CHARACTER_ESCAPE));
}

int character_is_underscore (char character) { // I don't even know if I'll ever use this one, we'll see, I'm in the process of writing this "book"...
	return ((int) (character == '_'));
}

int character_is_hexadecimal (char character) { // Same as function 'character_is_symbol', but for hexadecimal digits.
	return (character_compare_array (character, "0123456789ABCDEF"));
}

int character_compare_array (char character, char * character_array) { // I didn't use name "string", but "character_array", to explicitly show the intention of argument.
	int offset;

	for (offset = 0; offset != string_length (character_array); ++offset) { // We iterate through string (character array!) and return TRUE / 1 if we found it.
		if (character == character_array [offset]) {                    // If we don't find it in that string, we return FALSE / 0 since it's not there.
			return (TRUE);                                          // Note that we could do this without the variable 'offset', similar to string functions.
		}
	}

	return (FALSE);
}

int file_open (char * name, int mode) {
	int descriptor = -1;

	fatal_failure (name == NULL, "file_open: Failed to open file, name is null pointer.");

	descriptor = open (name, mode);

	fatal_failure (descriptor == -1, "file_open: Failed to open file, function open returned invalid descriptor.");

	return (descriptor);
}

int file_close (int file) {
	fatal_failure (file         == -1, "file_close: Failed to close file, invalid file descriptor.");
	fatal_failure (close (file) == -1, "file_close: Failed to close file, function close returned invalid code.");

	return (-1);
}

void file_read (int file, void * data, int size) {
	fatal_failure (file == -1,   "file_read: Failed to read from file, invalid descriptor.");
	fatal_failure (data == NULL, "file_read: Failed to read from file, data is null pointer.");
	fatal_failure (size == 0,    "file_read: Failed to read from file, size is zero.");

	(void) read (file, data, (unsigned long int) size);
}

void file_write (int file, void * data, int size) {
	fatal_failure (file == -1,   "file_write: Failed to write to file, invalid descriptor.");
	fatal_failure (data == NULL, "file_write: Failed to write to file, data is null pointer.");
	fatal_failure (size == 0,    "file_write: Failed to write to file, size is zero.");

	(void) write (file, data, (unsigned long int) size);
}

int file_seek (int file, int whence) {
	fatal_failure (file == -1, "file_seek: Failed to seek in file, invalid descriptor.");

	return ((int) lseek (file, 0, whence));
}

int file_size (char * name) {
	int size = -1;
	int file = -1;

	file = file_open (name, O_RDONLY);

	size = lseek (file, 0, SEEK_END);

	fatal_failure (size == -1, "file_size: Failed to get size of file, invalid file size.");

	file = file_close (file);

	return (size);
}

int file_type (char * name) {
	char * file_type_data [FILE_TYPE_COUNT] = {
		".txt",     ".s",       ".fasm",    ".gasm",    ".nasm",    ".yasm",    ".c",       ".h",
		".adb",     ".ads",     ".cpp",     ".hpp"
	};

	int type = 0;

	while (* name != '.') {
		++name;
	}

	for (type = 0; type != FILE_TYPE_COUNT; ++type) {
		if (string_compare (name, file_type_data [type]) != 0) {
			return (type);
		}
	}

	return (-1);
}

void * file_record (char * name) {
	int    file = -1;
	int    size = -1;
	char * data = NULL;

	fatal_failure (name == NULL, "file_import: Failed to import file, name is null pointer.");

	file = file_open (name, O_RDONLY);
	size = file_size (name);
	data = allocate  (size);

	file_read (file, data, size);

	file = file_close (file);

	return (data);
}

#endif