458 lines
22 KiB
C
458 lines
22 KiB
C
/*
|
|
Copyright (c) 2023 : Ognjen 'xolatile' Milan Robovic
|
|
|
|
Xhartae is free software! You will redistribute it or modify it under the terms of the GNU General Public License by Free Software Foundation.
|
|
And when you do redistribute it or modify it, it will use either version 3 of the License, or (at yours truly opinion) any later version.
|
|
It is distributed in the hope that it will be useful or harmful, it really depends... But no warranty what so ever, seriously. See GNU/GPLv3.
|
|
*/
|
|
|
|
#ifndef CHAPTER_0_SOURCE // These two, and "#endif" at the end of the file are header guards, we'll talk about them more when the time comes!
|
|
#define CHAPTER_0_SOURCE
|
|
|
|
#include "chapter_0.h" // We're pasting macros, enumerations and function declarations from header file "chapter_0.h" into this file, at this location.
|
|
|
|
/*
|
|
Function 'in' will perform read system call, literally reading 'size' bytes from standard input, which is terminal in most kernels unless it's redirected to some other file
|
|
descriptor, and store it at 'data' memory address, if there's enough space in it. Since this is one of the core functions, if it fails, we want to abort the program and see what
|
|
we did wrong... Maybe there wasn't enough space in 'data', maybe 'size' was negative and it overflowed because read system call internally uses 'size_t / unsigned long int', which
|
|
is 64 bits wide, maybe we made some other mistake? Just abort, find the error and fix it.
|
|
*/
|
|
|
|
void in (void * data, int size) {
|
|
fatal_failure (data == NULL, "in: Failed to read from standard input, data is null pointer."); // This function is defined below, but we can call it here.
|
|
fatal_failure (size == 0, "in: Failed to read from standard input, size is zero."); // That's because we declared it previously. Look at 'out' function.
|
|
|
|
(void) read (STDIN_FILENO, data, (unsigned long int) size); // I cast to void type return value of read and write, because I don't really care about it.
|
|
}
|
|
|
|
/*
|
|
Similar to 'in' function and read system call, write system call will store 'size' bytes from 'data' memory address into standard output, which is usually what you see in your
|
|
terminal. Now, I won't talk much about teletypes, virtual terminals, file descriptor redirections and similar stuff, because I'm not very knowledgable about them. What matters is,
|
|
you have a working operating system, terminal and compiler, and you can make things happen. Once you learn C better than me, and start writing your own multi-threaded kernel, core
|
|
libraries, compilers and what not, you'll care about those things. I'll briefly talk about function structure for 'reallocate' function soon.
|
|
*/
|
|
|
|
void out (void * data, int size) {
|
|
fatal_failure (data == NULL, "out: Failed to write to standard output, data is null pointer."); // Notice how we can use function 'fatal_failure' before its' definition.
|
|
fatal_failure (size == 0, "out: Failed to write to standard output, size is zero."); // That's because we declared it in 'chapter_0.h' header file.
|
|
|
|
(void) write (STDOUT_FILENO, data, (unsigned long int) size);
|
|
}
|
|
|
|
/*
|
|
Function 'echo' is just a simplification of function 'out', because we'll be using it a lot, notice that it must accept null terminated strings, which are sort of C-style thing. I
|
|
really like them, because you don't need to always know size of the string in order to iterate it, but it requires some mental overhead in order to use them without creating hard
|
|
to find bugs, which is why newer programming languages consider them unsafe. They're not unsafe, they need to be used like intended.
|
|
*/
|
|
|
|
void echo (char * string) {
|
|
out (string, string_length (string)); // This function fails when we pass a string that's not null terminated, and we don't care to check for errors...
|
|
}
|
|
|
|
void fatal_failure (int condition, char * message) { // We use this function to abort the program if condition is met and to print the message.
|
|
if (condition == TRUE) { // If the variable 'condition' is not equal to 0, we execute the code in curly braces.
|
|
echo ("[\033[1;31mExiting\033[0m] "); // Simply printing the message using our 'echo' function, but we also use some colours, more on that later.
|
|
echo (message); // Also, notice how "this or that" is implicity 'char *' type... Maybe it's too early to explain it at this point.
|
|
echo ("\n"); // This will only print a new line, we'll see how to use it later.
|
|
exit (EXIT_FAILURE); // This is the function (and '_exit' system call) that aborts the program with a return code.
|
|
} // If condition isn't met, function will just return, and nothing is printed, execution continues.
|
|
}
|
|
|
|
void limit (int * value, int minimum, int maximum) { // This is somewhat similar to limiting a variable to some values, inclusively, we'll use it later.
|
|
if ( value == NULL) { return; } // We shouldn't dereference null pointer, but also don't want to abort the program for small mistake.
|
|
if (* value <= minimum) { * value = minimum; } // You can also align similar consecutive statements like this, we'll see it more often in switch statement later on...
|
|
if (* value >= maximum) { * value = maximum; } // If we pass a null pointer to this function, it won't do anything, just return.
|
|
}
|
|
|
|
/*
|
|
Memory management is a whole new topic that's too complex to cover it now in details, and it's the source of most security vunrabilities and hidden bugs. For now, just remember
|
|
that every program can read, write or execute only parts of the memory that the kernel allows it. Program can request new memory or release old memory, so some other programs can
|
|
use it. We'll learn to use program called Valgrind to find and fix memory related bugs in later chapters. We rely on functions 'calloc', 'realloc' and 'free' from <stdlib.h>
|
|
header file, and we'll avoid 'malloc', and use 'realloc' carefully, because they leave new memory uninitialized.
|
|
|
|
We're internally using function 'calloc' to request new memory, function 'realloc' to enlarge existing memory and function 'free' to release old memory, data that won't be used
|
|
later in the program. It's important to "free" all "malloc/calloc/realloc"-ed memory when program finishes successfully, and in some special cases, even when program fails and
|
|
aborts. It's important for safety to do so, think of it like open and close braces, if you have some allocations, you should deallocate them later.
|
|
|
|
Some examples of using them directly (not wrapping them like I like to do) are:
|
|
|
|
@C
|
|
char * data = NULL;
|
|
|
|
data = malloc (20 * sizeof (* data)); // Allocates 20 bytes of memory for 'data'.
|
|
data = calloc (20, sizeof (* data)); // Allocates 20 bytes also, but initializes them to 0 value.
|
|
data = realloc (data, 20 * sizeof (* data)); // When 'data' is null pointer, it will be same as 'malloc', else it will reallocate more memory (for correct usage).
|
|
// Also, it's best to just use 'calloc', but it complicates some other tasks.
|
|
free (data); // Deallocates memory, we'll talk about "double free" later.
|
|
@
|
|
*/
|
|
|
|
void * allocate (int size) {
|
|
char * data = NULL;
|
|
|
|
data = calloc ((unsigned long int) size, sizeof (* data));
|
|
|
|
fatal_failure (data == NULL, "standard : allocate : Failed to allocate memory, internal function 'calloc' returned null pointer.");
|
|
|
|
return ((void *) data);
|
|
}
|
|
|
|
/*
|
|
Now, lets see that code formatting in action, with briefly describing function structure in C programming language. Our function is called "reallocate", its' inputs (arguments)
|
|
are "data" with type 'void *' (pointer to any type of memory address) and "size" with type 'int' (integer), and its' output is also 'void *' (some memory address). All code
|
|
between first '{' and last connected '}' is part of that function. We're using function 'realloc' inside, but we check for error (it return 'NULL' on error), then we print message
|
|
and abort the program if there was an error, it there wasn't, we return new enlarged chunk of memory, changing the "data" variable.
|
|
*/
|
|
|
|
void * reallocate (void * data, int size) {
|
|
data = realloc (data, (unsigned long int) size);
|
|
|
|
fatal_failure (data == NULL, "standard : reallocate: Failed to reallocate memory, internal function 'realloc' returned null pointer.");
|
|
|
|
return (data);
|
|
}
|
|
|
|
void * deallocate (void * data) {
|
|
if (data != NULL) {
|
|
free (data);
|
|
}
|
|
|
|
return (NULL);
|
|
}
|
|
|
|
/*
|
|
This program is intended to be a book-like guide for this source code, which is also a book. We'll deal with strings a lot, and they're a good example of code formatting which is
|
|
the main topic of chapter zero. In function 'string_length' we have for loop without a body, some people prefer to put '{}' or ';' in same or next line, to express the intention
|
|
that the loop shouldn't have a body (code block {}). I just put ';' on the same line. Also, functions 'string_*' could depend on functions 'string_*_limit', but we won't do that
|
|
now, and since we've already declared them in header file "chapter_0.h" we can define them and call them in whatever order we want. Nice.
|
|
|
|
@C
|
|
// Simple example of how we could make 'string_*' function dependable on 'string_*_limit' function...
|
|
|
|
int string_compare (char * string_0, char * string_1) {
|
|
return (string_compare_limit (string_0, string_1, string_length (string_0));
|
|
}
|
|
@
|
|
*/
|
|
|
|
int string_length (char * string) {
|
|
int length;
|
|
|
|
fatal_failure (string == NULL, "string_length: String is null pointer.");
|
|
|
|
for (length = 0; string [length] != CHARACTER_NULL; ++length); // Since in C, strings are null terminated, looping until we see null character is strings' length.
|
|
|
|
return (length);
|
|
}
|
|
|
|
/*
|
|
Now, I've implemented "unlimited" versions of string comparison, copying and concatenation different from "limited" versions. They correspond with standard library functions
|
|
'strcmp', 'strcpy', 'strcat', 'strncmp', 'strncpy' and 'strncat' found in header file <string.h>. In "unlimited" versions, I rely on the fact that we want to apply the operation
|
|
on entire strings, that those strings are null terminated and I used that in my advantage. For example, function 'string_compare' could be something like this:
|
|
|
|
@C
|
|
int string_compare (char * string_0, char * string_1) {
|
|
int offset;
|
|
|
|
fatal_failure (string_0 == NULL, "string_compare: Destination string is null pointer.");
|
|
fatal_failure (string_1 == NULL, "string_compare: Source string is null pointer.");
|
|
|
|
for (offset = 0; (string_0 [offset] != CHARACTER_NULL) && (string_1 [offset] != CHARACTER_NULL); ++offset) {
|
|
if (string_0 [offset] != string_1 [offset]) {
|
|
return (FALSE);
|
|
}
|
|
}
|
|
|
|
return (TRUE);
|
|
}
|
|
@
|
|
|
|
And I used this approach below to show that you can solve the problem using different solutions... You'll notice that "limited" versions have variable 'offset' of type integer. We
|
|
use it to interate the strings, while in "unlimited" versions, we iterate on pointers to those strings, which are pushed to the stack. Both versions work, both versions give the
|
|
same results, you can use any of them.
|
|
*/
|
|
|
|
int string_compare (char * string_0, char * string_1) {
|
|
fatal_failure (string_0 == NULL, "string_compare: Destination string is null pointer."); // This will be seen in next 5 functions too, we don't want NULL here.
|
|
fatal_failure (string_1 == NULL, "string_compare: Source string is null pointer.");
|
|
|
|
for (; (* string_0 != CHARACTER_NULL) && (* string_1 != CHARACTER_NULL); ++string_0, ++string_1) { // We iterate until either string reaches the null character.
|
|
if (* string_0 != * string_1) { // In case that characters at the same offset are different:
|
|
return (FALSE); // > We return FALSE, 0, since strings aren't the same...
|
|
}
|
|
}
|
|
if (* string_0 != * string_1) { // Now, we'll do one last termination check.
|
|
return (FALSE);
|
|
}
|
|
|
|
return (TRUE); // Otherwise, strings are same, we return TRUE, 1.
|
|
}
|
|
|
|
char * string_copy (char * string_0, char * string_1) {
|
|
char * result = string_0; // We need to save pointer to destination string before changing it.
|
|
|
|
fatal_failure (string_0 == NULL, "string_copy: Destination string is null pointer.");
|
|
fatal_failure (string_1 == NULL, "string_copy: Source string is null pointer.");
|
|
|
|
for (; * string_1 != CHARACTER_NULL; ++string_0, ++string_1) { // This time and in next function, we iterate only source string.
|
|
* string_0 = * string_1; // And we assign character at the same offset to destination string (aka copy it).
|
|
}
|
|
|
|
* string_0 = CHARACTER_NULL; // Copying null termination, since the loop stopped on that condition.
|
|
|
|
return (result); // Lastly, we return the destination string, in order to be able to bind functions.
|
|
}
|
|
|
|
char * string_concatenate (char * string_0, char * string_1) {
|
|
char * result = string_0;
|
|
|
|
fatal_failure (string_0 == NULL, "string_concatenate: Destination string is null pointer.");
|
|
fatal_failure (string_1 == NULL, "string_concatenate: Source string is null pointer.");
|
|
|
|
string_0 += string_length (string_0); // We'll first offset destination string to the end of it.
|
|
// Because we want to start copying from the end, aka concatenate it.
|
|
for (; * string_1 != CHARACTER_NULL; ++string_0, ++string_1) { // The rest of the function is same as string_copy, so:
|
|
* string_0 = * string_1; // We could even use it here, but that defies the purpose of learning now.
|
|
}
|
|
|
|
* string_0 = CHARACTER_NULL; // Again, assign null termination.
|
|
|
|
return (result);
|
|
}
|
|
|
|
char * string_reverse (char * string) { // Example of implementing "unlimited" version by calling "limited" version.
|
|
return (string_reverse_limit (string, string_length (string)));
|
|
}
|
|
|
|
/*
|
|
As for "limited" versions of previous 3 functions, they do the same thing, but are capped to some variable 'limit'. These functions have their own use-case, for example, if
|
|
strings aren't null terminated, if you're not sure that they are null terminated, if we're dealing with binary (not textual) data (casted to char *), and many more cases. I won't
|
|
write comments for 'string_copy_limit', 'string_concatenate_limit' and 'string_reverse_limit', try to read them and understand what kind of operation they'll perform with your
|
|
current knowledge of C language.
|
|
*/
|
|
|
|
int string_compare_limit (char * string_0, char * string_1, int limit) {
|
|
int offset;
|
|
|
|
fatal_failure (string_0 == NULL, "string_compare_limit: Destination string is null pointer."); // This is the new trend, check for unimportant things.
|
|
fatal_failure (string_1 == NULL, "string_compare_limit: Source string is null pointer."); // At least this isn't too verbose. I hope...
|
|
|
|
for (offset = 0; offset < limit; ++offset) { // Now, we'll iterate until 'limit' is reached, but it can overrun.
|
|
if (string_0 [offset] != string_1 [offset]) { // All said here applies to next two functions as well...
|
|
return (FALSE); // As soon as 2 characters mismatch, they're not same, we return FALSE.
|
|
}
|
|
}
|
|
|
|
return (TRUE); // Otherwise, we're reached the end, they're same, we return TRUE.
|
|
}
|
|
|
|
char * string_copy_limit (char * string_0, char * string_1, int limit) {
|
|
int offset;
|
|
|
|
fatal_failure (string_0 == NULL, "string_copy_limit: Destination string is null pointer.");
|
|
fatal_failure (string_1 == NULL, "string_copy_limit: Source string is null pointer.");
|
|
|
|
if ((limit <= 0) || (string_1 == NULL)) {
|
|
return (string_0);
|
|
}
|
|
|
|
for (offset = 0; offset < limit; ++offset) {
|
|
string_0 [offset] = string_1 [offset];
|
|
}
|
|
|
|
return (string_0);
|
|
}
|
|
|
|
char * string_concatenate_limit (char * string_0, char * string_1, int limit) {
|
|
int offset, length_0, length_1;
|
|
|
|
fatal_failure (string_0 == NULL, "string_concatenate_limit: Destination string is null pointer.");
|
|
fatal_failure (string_1 == NULL, "string_concatenate_limit: Source string is null pointer.");
|
|
|
|
if ((limit <= 0) || (string_1 == NULL)) {
|
|
return (string_0);
|
|
}
|
|
|
|
length_0 = string_length (string_0);
|
|
length_1 = string_length (string_1);
|
|
|
|
for (offset = 0; (offset < length_1) && (offset < limit); ++offset) {
|
|
string_0 [length_0 + offset] = string_1 [offset];
|
|
}
|
|
|
|
return (string_0);
|
|
}
|
|
|
|
char * string_reverse_limit (char * string, int limit) {
|
|
int i;
|
|
|
|
fatal_failure (string == NULL, "string_reverse: String is null pointer.");
|
|
|
|
for (i = 0; i < limit / 2; ++i) {
|
|
char temporary = string [i];
|
|
string [i] = string [limit - 1 - i];
|
|
string [limit - 1 - i] = temporary;
|
|
}
|
|
|
|
return (string);
|
|
}
|
|
|
|
/*
|
|
We'll use this function in many other chapters (in other C source files!), but we'll be careful, since compiler can't catch all the mistakes we make. Lets listen a story!
|
|
|
|
Compiler likes you because you are his master. You tell him what to do (with passing flags as 'argv'), you give him the tool (your text files) and he'll do it. Sometimes he can
|
|
complain, like "you gave me a shovel to chop some firewood" or "you gave me an axe to clean the leaves from the road", and he can't do that task. But if you give him a proper tool
|
|
for proper task, he'll do it and won't complain at all. However, sometimes, you give him an imperfect tool (your C program full of subtle bugs) and he won't notice it. He'll do
|
|
the job (translate bunch of ASCII characters into bunch of bytes), and say he's finished. Then you go out and see the results (run your executable), and it's all mess! You're like
|
|
"What the hell, do as I say!", but he doesn't know what he did wrong... And in fact, it was your fault for giving him a imperfect tool all along. That's why we sometimes need more
|
|
difficult to use servants. When you, as his master, tell him to do some job for you, he'll complain endlessly. You should use all your servants appropriately, each of them for
|
|
different kind of task, and you'll learn to choose them wisely.
|
|
|
|
Now, funny story aside, some languages are considered safe because they won't compile your code if they find any kind of syntax mistake. Ada is very strict programming language,
|
|
we'll talk about it in later chapters, as Ada compilers complain a lot about the source code, but they can't validate incorrect algorythm. Pust is also very strict language, but
|
|
it sucks, and it's unreadable pile of characters. Remember, no matter how strict language you use, it'll never validate correctness of the algorythm, only syntax mistakes and
|
|
few other checks like life-times, bounds, etc. I'll mention this a lot, think twice, write once.
|
|
|
|
Obedient servants:
|
|
$ gcc -Wall
|
|
$ clang -Wall
|
|
|
|
Complicated servants:
|
|
$ gcc -ansi -Werror -Wall -Wextra -Wpedantic
|
|
$ clang -ansi -Werror -Weverything
|
|
|
|
Terrible servants (same as above, but with also using):
|
|
$ splint [custom flags]
|
|
$ valgrind --show-leak-kinds=all --leak-check=full
|
|
*/
|
|
|
|
char * string_realign (char * string, int amount, char character) { // Now, this is our "align string to right" function, lets explain it.
|
|
int offset, length; // We're declaring two local (automatic) variables of type 'int'.
|
|
|
|
length = string_length (string); // We'll use variable 'length' later in the code, so we initialize it to length of the string.
|
|
|
|
for (offset = 0; offset != length; ++offset) { // We're essentially moving the string to the right, iterating through its' length to amount.
|
|
string [amount - offset - 1] = string [length - offset - 1]; // Needless to say, string needs to have enough memory ((pre) allocated) for it to store it.
|
|
}
|
|
|
|
for (offset = 0; offset != amount - length; ++offset) { // Now, we have some "garbage" data left from the actual string, so we iterate through left side and:
|
|
string [offset] = character; // Assign to it argument 'character' that we provided in a function call. We can align with anything.
|
|
}
|
|
|
|
string [amount] = CHARACTER_NULL; // I like to null terminate them explicitly, so I don't have to worry about tiny bugs later.
|
|
|
|
return (string); // Lastly, we return a pointer to our modified string, in order to, again, bind function calls.
|
|
}
|
|
|
|
/*
|
|
Again, please consider these 'terminal_*' functions black magic, as well as 'number_to_string' and 'format_to_string' as they are more complex to cover them at this point, we'll
|
|
talk more about them later... For now, just take a look at how I format the code in them.
|
|
*/
|
|
|
|
void terminal_clear (void) {
|
|
echo ("\033[2J\033[H");
|
|
}
|
|
|
|
void terminal_colour (int colour, int effect) {
|
|
char format [8] = "\033[ ;3 m";
|
|
|
|
format [2] = (char) (effect % EFFECT_COUNT) + '0';
|
|
format [5] = (char) (colour % COLOUR_COUNT) + '0';
|
|
|
|
echo (format);
|
|
}
|
|
|
|
void terminal_cancel (void) {
|
|
echo ("\033[0m");
|
|
}
|
|
|
|
void terminal_show_cursor (int show) {
|
|
if (show != 0) {
|
|
echo ("\033[?25h");
|
|
} else {
|
|
echo ("\033[?25l");
|
|
}
|
|
}
|
|
|
|
char * number_to_string (int number) {
|
|
int i, sign;
|
|
|
|
static char string [32];
|
|
|
|
for (i = 0; i != 32; ++i) {
|
|
string [i] = CHARACTER_NULL;
|
|
}
|
|
|
|
if (number == 0) {
|
|
string [0] = '0';
|
|
string [1] = CHARACTER_NULL;
|
|
return (string);
|
|
}
|
|
|
|
if (number < 0) {
|
|
number *= -1;
|
|
sign = 1;
|
|
} else {
|
|
sign = 0;
|
|
}
|
|
|
|
for (i = (string [0] == '-'); number != 0; ++i) {
|
|
string [i] = (char) (number % 10) + '0';
|
|
number /= 10;
|
|
}
|
|
|
|
if (sign != 0) {
|
|
string [i] = '-';
|
|
++i;
|
|
}
|
|
|
|
string [i] = CHARACTER_NULL;
|
|
|
|
string_reverse (string);
|
|
|
|
return (string);
|
|
}
|
|
|
|
char * format_to_string (int number, int sign, int base, int amount, char character) {
|
|
int i;
|
|
|
|
static char string [32];
|
|
|
|
for (i = 0; i != 32; ++i) {
|
|
string [i] = CHARACTER_NULL;
|
|
}
|
|
|
|
if (number == 0) {
|
|
string [0] = '0';
|
|
string [1] = CHARACTER_NULL;
|
|
|
|
string_realign (string, amount, character);
|
|
|
|
return (string);
|
|
}
|
|
|
|
if (number < 0) {
|
|
number *= -1;
|
|
}
|
|
|
|
for (i = (string [0] == '-'); number != 0; ++i) {
|
|
string [i] = "0123456789ABCDEF" [number % base];
|
|
number /= base;
|
|
}
|
|
|
|
if (sign != 0) {
|
|
string [i] = '-';
|
|
++i;
|
|
}
|
|
|
|
string [i] = CHARACTER_NULL;
|
|
|
|
string_reverse (string);
|
|
|
|
string_realign (string, amount, character);
|
|
|
|
return (string);
|
|
}
|
|
|
|
#endif
|