Minimal programming language prototype, created with goal to have very small compiler, so that anyone can write his own compiler for it.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

242 lines
8.4KB

  1. #include <xolatile/xtandard.c>
  2. enum {
  3. core_none, core_word, core_marker, core_string, core_number,
  4. core_symbol, core_type
  5. };
  6. enum {
  7. word_type, word_loop, word_if, word_else, word_case,
  8. word_return, word_import, word_system
  9. };
  10. #define STRING_LIMIT (80)
  11. #define NAME_LIMIT (80)
  12. static int token_count = 0;
  13. static int string_code = 0;
  14. static int number_code = 0;
  15. static int marker_code = 0;
  16. static int type_code = 0;
  17. static int symbol_code = 0;
  18. static int * token_data = null;
  19. static int * token_type = null;
  20. /*static char * * string_name = null;*/
  21. static char * * string_data = null;
  22. static int * string_size = null;
  23. /*static char * * number_name = null;*/
  24. static int * number_data = null;
  25. static char * * marker_name = null;
  26. static char * * type_name = null;
  27. static int * marker_type = null;
  28. static char * symbol_data = null;
  29. /*
  30. static int * function_name = null;
  31. static int * function_type = null;
  32. static int * * function_argument_name = null;
  33. static int * * function_argument_type = null;
  34. */
  35. #include <stdio.h>
  36. static char * word_list [] = {
  37. "type", "loop", "if", "else", "case", "return", "import", "system"
  38. };
  39. static void add_token (int type, int data) {
  40. token_data = reallocate (token_data, (token_count + 1) * (int) sizeof (* token_data));
  41. token_type = reallocate (token_type, (token_count + 1) * (int) sizeof (* token_type));
  42. token_data [token_count] = data;
  43. token_type [token_count] = type;
  44. ++token_count;
  45. }
  46. static void add_string (char * data, int size) {
  47. string_data = reallocate (string_data, (string_code + 1) * (int) sizeof (* string_data));
  48. string_size = reallocate (string_size, (string_code + 1) * (int) sizeof (* string_size));
  49. string_data [string_code] = allocate (STRING_LIMIT * (int) sizeof (* string_data));
  50. string_copy (string_data [string_code], data);
  51. string_size [string_code] = size;
  52. ++string_code;
  53. }
  54. static void add_number (int data) {
  55. number_data = reallocate (number_data, (number_code + 1) * (int) sizeof (* number_data));
  56. number_data [number_code] = data;
  57. ++number_code;
  58. }
  59. static void add_marker (char * name, int type) {
  60. marker_name = reallocate (marker_name, (marker_code + 1) * (int) sizeof (* marker_name));
  61. marker_type = reallocate (marker_type, (marker_code + 1) * (int) sizeof (* marker_type));
  62. marker_name [marker_code] = allocate (NAME_LIMIT * (int) sizeof (* marker_name));
  63. string_copy (marker_name [marker_code], name);
  64. marker_type [marker_code] = type;
  65. ++marker_code;
  66. }
  67. static void add_type (char * name) {
  68. type_name = reallocate (type_name, (type_code + 1) * (int) sizeof (* type_name));
  69. type_name [type_code] = allocate (NAME_LIMIT * (int) sizeof (* type_name));
  70. string_copy (type_name [type_code], name);
  71. ++type_code;
  72. }
  73. static void add_symbol (char data) {
  74. symbol_data = reallocate (symbol_data, (symbol_code + 1) * (int) sizeof (* symbol_data));
  75. symbol_data [symbol_code] = data;
  76. ++symbol_code;
  77. }
  78. static void kill (char * data) {
  79. terminal_colour (colour_red, effect_bold);
  80. echo (data);
  81. terminal_cancel ();
  82. exit (log_failure);
  83. }
  84. int main (void) {
  85. char * buffer = null;
  86. int offset = 0;
  87. int length = 0;
  88. buffer = file_import ("./test.x");
  89. for (offset = 0; buffer [offset] != '\0'; ++offset) {
  90. if ((buffer [offset] == '-') && (buffer [offset + 1] == '-') && (buffer [offset + 2] == '-') && (buffer [offset + 1] != '\0') && (buffer [offset + 2] != '\0')) {
  91. for (; buffer [offset] != '\n'; ++offset);
  92. } else if (buffer [offset] == '"') {
  93. int size = 0;
  94. char data [STRING_LIMIT] = "";
  95. for (++offset; (buffer [offset] != '"') && (buffer [offset] != '\0'); ++offset) {
  96. data [size++] = buffer [offset];
  97. }
  98. data [size] = '\0';
  99. add_token (core_string, string_code);
  100. add_string (data, size);
  101. } else if (character_is_digit (buffer [offset]) == true) {
  102. int data = buffer [offset] - '0';
  103. for (++offset; (character_is_digit (buffer [offset]) == true) && (buffer [offset] != '\0'); ++offset) {
  104. data *= 10;
  105. data += (buffer [offset] - '0');
  106. }
  107. add_token (core_number, number_code);
  108. add_number (data);
  109. --offset;
  110. } else if (character_is_alpha (buffer [offset]) == true) {
  111. int size = 0;
  112. char name [NAME_LIMIT] = "";
  113. for (; ((character_is_alpha (buffer [offset]) == true) ||
  114. (character_is_digit (buffer [offset]) == true) ||
  115. (character_is_underscore (buffer [offset]) == true)) && (buffer [offset] != '\0'); ++offset) {
  116. name [size++] = buffer [offset];
  117. }
  118. name [size] = '\0';
  119. for (length = 0; length < (int) (sizeof (word_list) / sizeof (word_list [0])); ++length) {
  120. if (string_compare_limit (name, word_list [length], string_length (word_list [length]) + 1) == true) {
  121. add_token (core_word, length);
  122. goto here;
  123. }
  124. }
  125. add_token (core_marker, marker_code);
  126. add_marker (name, token_type [token_count - 1]);
  127. if ((token_type [token_count - 2] == core_word) && (token_data [token_count - 2] == word_type)) {
  128. token_type [token_count - 1] = core_type;
  129. add_type (name);
  130. }
  131. for (length = 0; length < type_code; ++length) {
  132. if (string_compare_limit (name, type_name [length], string_length (type_name [length]) + 1) == true) {
  133. token_type [token_count - 1] = core_type;
  134. }
  135. }
  136. here:
  137. --offset;
  138. } else if (character_compare_array (buffer [offset], ",.;:=<>&|!+-*/%()[]") == true) {
  139. add_token (core_symbol, symbol_code);
  140. add_symbol (buffer [offset]);
  141. } else {
  142. if (character_is_blank (buffer [offset]) == false) {
  143. echo ("\033[1;31mCharacter set exception point: Segmentation\033[0m\n");
  144. printf ("%c -- %i\n", buffer [offset], (int) buffer [offset]);
  145. exit (log_failure);
  146. }
  147. }
  148. }
  149. for (length = 0; length < token_count; ++length) {
  150. switch (token_type [length]) {
  151. case core_symbol: printf ("\033[1;34m%c\033[0m ", symbol_data [token_data [length]]); break;
  152. case core_string: printf ("\033[1;31m%s\033[0m ", string_data [token_data [length]]); break;
  153. case core_number: printf ("\033[1;32m%i\033[0m ", number_data [token_data [length]]); break;
  154. case core_type: printf ("\033[1;36m%s\033[0m ", marker_name [token_data [length]]); break;
  155. case core_marker: printf ("\033[1;33m%s\033[0m ", marker_name [token_data [length]]); break;
  156. case core_word: printf ("\033[1;35m%s\033[0m ", word_list [token_data [length]]); break;
  157. default: break;
  158. }
  159. }
  160. printf ("\n");
  161. for (length = 0; length < token_count; ++length) {
  162. if ((token_type [length] == core_word) && (token_data [length] == word_return)) {
  163. ++length;
  164. if ((token_type [length] == core_symbol) && (symbol_data [token_data [length]] == '(')) {
  165. echo ("return (\n");
  166. ++length;
  167. } else if ((token_type [length] == core_symbol) && (symbol_data [token_data [length]] == ';')) {
  168. echo ("; return;\nxor rax, rax\nret\n");
  169. ++length;
  170. } else {
  171. kill ("return ?\n");
  172. }
  173. } else if ((token_type [length] == core_word) && (token_data [length] == word_if)) {
  174. ++length;
  175. if ((token_type [length] == core_symbol) && (symbol_data [token_data [length]] == '(')) {
  176. echo ("if (\n");
  177. ++length;
  178. } else {
  179. kill ("if ?\n");
  180. }
  181. } else if ((token_type [length] == core_word) && (token_data [length] == word_else)) {
  182. ++length;
  183. if ((token_type [length] == core_symbol) && (symbol_data [token_data [length]] == ':')) {
  184. echo ("else :\n");
  185. ++length;
  186. } else if ((token_type [length] == core_word) && (token_data [length] == word_if)) {
  187. echo ("else if\n");
  188. ++length;
  189. } else {
  190. echo ("expression\n");
  191. }
  192. } else if ((token_type [length] == core_word) && (token_data [length] == word_type)) {
  193. ++length;
  194. if (token_type [length] == core_type) {
  195. echo ("type <name> -- ");
  196. echo (marker_name [token_data [length]]);
  197. echo ("\n");
  198. ++length;
  199. } else {
  200. kill ("type ?\n");
  201. }
  202. }
  203. }
  204. for (length = 0; length < string_code; ++length) { string_data [length] = deallocate (string_data [length]); }
  205. for (length = 0; length < marker_code; ++length) { marker_name [length] = deallocate (marker_name [length]); }
  206. for (length = 0; length < type_code; ++length) { type_name [length] = deallocate (type_name [length]); }
  207. string_data = deallocate (string_data);
  208. string_size = deallocate (string_size);
  209. marker_name = deallocate (marker_name);
  210. marker_type = deallocate (marker_type);
  211. type_name = deallocate (type_name);
  212. symbol_data = deallocate (symbol_data);
  213. number_data = deallocate (number_data);
  214. token_data = deallocate (token_data);
  215. token_type = deallocate (token_type);
  216. buffer = deallocate (buffer);
  217. return (log_success);
  218. }