Minimal programming language prototype, created with goal to have very small compiler, so that anyone can write his own compiler for it.
Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

238 řádky
8.3KB

  1. #include <xolatile/xtandard.c>
  2. enum {
  3. core_none, core_word, core_marker, core_string, core_number,
  4. core_symbol, core_type
  5. };
  6. enum {
  7. word_type, word_loop, word_if, word_else, word_case,
  8. word_return, word_import, word_export, word_define, word_system
  9. };
  10. #define STRING_LIMIT (80)
  11. #define NAME_LIMIT (80)
  12. static int token_count = 0;
  13. static int string_code = 0;
  14. static int number_code = 0;
  15. static int marker_code = 0;
  16. static int type_code = 0;
  17. static int symbol_code = 0;
  18. static int * token_data = null;
  19. static int * token_type = null;
  20. /*static char * * string_name = null;*/
  21. static char * * string_data = null;
  22. static int * string_size = null;
  23. /*static char * * number_name = null;*/
  24. static int * number_data = null;
  25. static char * * marker_name = null;
  26. static char * * type_name = null;
  27. static int * marker_type = null;
  28. static char * symbol_data = null;
  29. #include <stdio.h>
  30. static char * word_list [] = {
  31. "type", "loop", "if", "else", "case", "return",
  32. "import", "export", "define", "system"
  33. };
  34. static void add_token (int type, int data) {
  35. token_data = reallocate (token_data, (token_count + 1) * (int) sizeof (* token_data));
  36. token_type = reallocate (token_type, (token_count + 1) * (int) sizeof (* token_type));
  37. token_data [token_count] = data;
  38. token_type [token_count] = type;
  39. ++token_count;
  40. }
  41. static void add_string (char * data, int size) {
  42. string_data = reallocate (string_data, (string_code + 1) * (int) sizeof (* string_data));
  43. string_size = reallocate (string_size, (string_code + 1) * (int) sizeof (* string_size));
  44. string_data [string_code] = allocate (STRING_LIMIT * (int) sizeof (* string_data));
  45. string_copy (string_data [string_code], data);
  46. string_size [string_code] = size;
  47. ++string_code;
  48. }
  49. static void add_number (int data) {
  50. number_data = reallocate (number_data, (number_code + 1) * (int) sizeof (* number_data));
  51. number_data [number_code] = data;
  52. ++number_code;
  53. }
  54. static void add_marker (char * name, int type) {
  55. marker_name = reallocate (marker_name, (marker_code + 1) * (int) sizeof (* marker_name));
  56. marker_type = reallocate (marker_type, (marker_code + 1) * (int) sizeof (* marker_type));
  57. marker_name [marker_code] = allocate (NAME_LIMIT * (int) sizeof (* marker_name));
  58. string_copy (marker_name [marker_code], name);
  59. marker_type [marker_code] = type;
  60. ++marker_code;
  61. }
  62. static void add_type (char * name) {
  63. type_name = reallocate (type_name, (type_code + 1) * (int) sizeof (* type_name));
  64. type_name [type_code] = allocate (NAME_LIMIT * (int) sizeof (* type_name));
  65. string_copy (type_name [type_code], name);
  66. ++type_code;
  67. }
  68. static void add_symbol (char data) {
  69. symbol_data = reallocate (symbol_data, (symbol_code + 1) * (int) sizeof (* symbol_data));
  70. symbol_data [symbol_code] = data;
  71. ++symbol_code;
  72. }
  73. static void kill (char * data) {
  74. terminal_colour (colour_red, effect_bold);
  75. echo (data);
  76. terminal_cancel ();
  77. exit (log_failure);
  78. }
  79. int main (void) {
  80. char * buffer = null;
  81. int offset = 0;
  82. int length = 0;
  83. buffer = file_import ("./test.x");
  84. for (offset = 0; buffer [offset] != '\0'; ++offset) {
  85. if ((buffer [offset] == '-') && (buffer [offset + 1] == '-') && (buffer [offset + 2] == '-') && (buffer [offset + 1] != '\0') && (buffer [offset + 2] != '\0')) {
  86. for (; buffer [offset] != '\n'; ++offset);
  87. } else if (buffer [offset] == '"') {
  88. int size = 0;
  89. char data [STRING_LIMIT] = "";
  90. for (++offset; (buffer [offset] != '"') && (buffer [offset] != '\0'); ++offset) {
  91. data [size++] = buffer [offset];
  92. }
  93. data [size] = '\0';
  94. add_token (core_string, string_code);
  95. add_string (data, size);
  96. } else if (character_is_digit (buffer [offset]) == true) {
  97. int data = buffer [offset] - '0';
  98. for (++offset; (character_is_digit (buffer [offset]) == true) && (buffer [offset] != '\0'); ++offset) {
  99. data *= 10;
  100. data += (buffer [offset] - '0');
  101. }
  102. add_token (core_number, number_code);
  103. add_number (data);
  104. --offset;
  105. } else if (character_is_alpha (buffer [offset]) == true) {
  106. int size = 0;
  107. char name [NAME_LIMIT] = "";
  108. for (; ((character_is_alpha (buffer [offset]) == true) ||
  109. (character_is_digit (buffer [offset]) == true) ||
  110. (character_is_underscore (buffer [offset]) == true)) && (buffer [offset] != '\0'); ++offset) {
  111. name [size++] = buffer [offset];
  112. }
  113. name [size] = '\0';
  114. for (length = 0; length < 10; ++length) {
  115. if (string_compare_limit (name, word_list [length], string_length (word_list [length]) + 1) == true) {
  116. add_token (core_word, length);
  117. goto here;
  118. }
  119. }
  120. add_token (core_marker, marker_code);
  121. add_marker (name, token_type [token_count - 1]);
  122. if ((token_type [token_count - 2] == core_word) && (token_data [token_count - 2] == word_type)) { // NEW TYPE
  123. token_type [token_count - 1] = core_type;
  124. add_type (name);
  125. }
  126. for (length = 0; length < type_code; ++length) {
  127. if (string_compare_limit (name, type_name [length], string_length (type_name [length]) + 1) == true) { // EXISTING TYPE
  128. token_type [token_count - 1] = core_type;
  129. }
  130. }
  131. here:
  132. --offset;
  133. } else if (character_compare_array (buffer [offset], ",.;:=<>&|!+-*/%()[]") == true) {
  134. add_token (core_symbol, symbol_code);
  135. add_symbol (buffer [offset]);
  136. } else {
  137. if (character_is_blank (buffer [offset]) == false) {
  138. echo ("\033[1;31mCharacter set exception point: Segmentation\033[0m\n");
  139. printf ("%c -- %i\n", buffer [offset], (int) buffer [offset]);
  140. exit (log_failure);
  141. }
  142. }
  143. }
  144. for (length = 0; length < token_count; ++length) {
  145. switch (token_type [length]) {
  146. case core_symbol: printf ("\033[1;34m%c\033[0m ", symbol_data [token_data [length]]); break;
  147. case core_string: printf ("\033[1;31m%s\033[0m ", string_data [token_data [length]]); break;
  148. case core_number: printf ("\033[1;32m%i\033[0m ", number_data [token_data [length]]); break;
  149. case core_type: printf ("\033[1;36m%s\033[0m ", marker_name [token_data [length]]); break;
  150. case core_marker: printf ("\033[1;33m%s\033[0m ", marker_name [token_data [length]]); break;
  151. case core_word: printf ("\033[1;35m%s\033[0m ", word_list [token_data [length]]); break;
  152. default: break;
  153. }
  154. }
  155. printf ("\n");
  156. for (length = 0; length < token_count; ++length) {
  157. if ((token_type [length] == core_word) && (token_data [length] == word_return)) {
  158. ++length;
  159. if ((token_type [length] == core_symbol) && (symbol_data [token_data [length]] == '(')) {
  160. echo ("return (\n");
  161. ++length;
  162. } else if ((token_type [length] == core_symbol) && (symbol_data [token_data [length]] == ';')) {
  163. echo ("return ; -- 48 33 C0 3C -- xor rax rax ret\n");
  164. ++length;
  165. } else {
  166. kill ("return ?\n");
  167. }
  168. } else if ((token_type [length] == core_word) && (token_data [length] == word_if)) {
  169. ++length;
  170. if ((token_type [length] == core_symbol) && (symbol_data [token_data [length]] == '(')) {
  171. echo ("if (\n");
  172. ++length;
  173. } else {
  174. kill ("if ?\n");
  175. }
  176. } else if ((token_type [length] == core_word) && (token_data [length] == word_else)) {
  177. ++length;
  178. if ((token_type [length] == core_symbol) && (symbol_data [token_data [length]] == ':')) {
  179. echo ("else :\n");
  180. ++length;
  181. } else if ((token_type [length] == core_word) && (token_data [length] == word_if)) {
  182. echo ("else if\n");
  183. ++length;
  184. } else {
  185. kill ("else ?\n");
  186. }
  187. } else if ((token_type [length] == core_word) && (token_data [length] == word_type)) {
  188. ++length;
  189. if (token_type [length] == core_type) {
  190. echo ("type <name> -- ");
  191. echo (marker_name [token_data [length]]);
  192. echo ("\n");
  193. ++length;
  194. } else {
  195. kill ("type ?\n");
  196. }
  197. }
  198. }
  199. for (length = 0; length < string_code; ++length) { string_data [length] = deallocate (string_data [length]); }
  200. for (length = 0; length < marker_code; ++length) { marker_name [length] = deallocate (marker_name [length]); }
  201. for (length = 0; length < type_code; ++length) { type_name [length] = deallocate (type_name [length]); }
  202. string_data = deallocate (string_data);
  203. string_size = deallocate (string_size);
  204. marker_name = deallocate (marker_name);
  205. marker_type = deallocate (marker_type);
  206. type_name = deallocate (type_name);
  207. symbol_data = deallocate (symbol_data);
  208. number_data = deallocate (number_data);
  209. token_data = deallocate (token_data);
  210. token_type = deallocate (token_type);
  211. buffer = deallocate (buffer);
  212. return (log_success);
  213. }