Highlight things
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

781 lines
20KB

  1. #ifdef __cplusplus
  2. # pragma GCC diagnostic ignored "-Wc++20-extensions"
  3. #endif
  4. #include "jeger.h"
  5. #include <assert.h>
  6. #include <string.h>
  7. #include <limits.h>
  8. #include <stdlib.h>
  9. #define JEGER_INIT_STATE 2
  10. // ------------------
  11. // ### Char tests ###
  12. // ------------------
  13. static inline
  14. bool mystrchr(const char * const str, const char c){
  15. for (const char * s = str; *s != '\00'; s++) {
  16. if (*s == c) {
  17. return true;
  18. }
  19. }
  20. return false;
  21. }
  22. static inline
  23. bool is_quantifier(const char c) {
  24. return mystrchr("=?+*", c);
  25. }
  26. static inline
  27. bool is_hologram_escape(const char c) {
  28. return mystrchr("<>", c);
  29. }
  30. bool is_magic(const char c) {
  31. return is_quantifier(c)
  32. || mystrchr("\\[].^", c)
  33. ;
  34. }
  35. // -----------------
  36. // ### Char sets ###
  37. // -----------------
  38. #define JEGER_CHAR_SET_at "@"
  39. #define JEGER_CHAR_SET_underscore "_"
  40. #define JEGER_CHAR_SET_lower "abcdefghijklmnopqrstuwxyz"
  41. #define JEGER_CHAR_SET_upper "ABCDEFGHIJKLMNOPQRSTUWXYZ"
  42. #define JEGER_CHAR_SET_digits "0123456789"
  43. #define JEGER_CHAR_SET_octal_digits "01234567"
  44. #define JEGER_CHAR_SET_lower_hex "abcdef"
  45. #define JEGER_CHAR_SET_upper_hex "ABCDEF"
  46. #define JEGER_CHAR_SET_oct_241_to_277 \
  47. "\241\242\243\244\245" \
  48. "\246\247\250\251\252" \
  49. "\253\254\255\256\257" \
  50. "\260\261\262\263\264" \
  51. "\265\266\267\270\271" \
  52. "\272\273\274\275\276" \
  53. "\277"
  54. #define JEGER_CHAR_SET_oct_300_to_337 \
  55. "\300\301\302\303\304" \
  56. "\305\306\307\310\311" \
  57. "\312\313\314\315\316" \
  58. "\317\320\321\322\323" \
  59. "\324\325\326\327\330" \
  60. "\331\332\333\334\335" \
  61. "\336\337"
  62. #define JEGER_CHAR_SET_file_extra "/.-_+,#$%~="
  63. #define JEGER_CHAR_SET_whitespace " \t\v\n"
  64. static const char JEGER_CHAR_very_word_chars[] =
  65. JEGER_CHAR_SET_underscore
  66. JEGER_CHAR_SET_lower
  67. JEGER_CHAR_SET_upper
  68. ;
  69. // ----------------------
  70. // ### Internal Types ###
  71. // ----------------------
  72. typedef struct {
  73. int in;
  74. char input;
  75. int to;
  76. int pattern_width;
  77. int match_width;
  78. } delta_t;
  79. typedef struct {
  80. int in;
  81. int to;
  82. int pattern_width;
  83. int match_width;
  84. } offshoot_t;
  85. enum {
  86. DO_CATCH = 0x00000001 << 0,
  87. IS_NEGATIVE = 0x00000001 << 1,
  88. IS_AT_THE_BEGINNING = 0x00000001 << 2,
  89. FORCE_START_OF_STRING = 0x00000001 << 3,
  90. INCREMENT_STATE = 0x00000001 << 4,
  91. };
  92. typedef struct {
  93. int flags;
  94. int state;
  95. int width;
  96. char * whitelist;
  97. char * blacklist;
  98. } compiler_state;
  99. // ----------------------------------
  100. // ### Regex creation/destruction ###
  101. // ----------------------------------
  102. static const int HALT_AND_CATCH_FIRE = INT_MIN;
  103. #define ASSERT_HALT(a) ((a == HALT_AND_CATCH_FIRE) ? HALT_AND_CATCH_FIRE : (cs->state + a))
  104. static
  105. void HOOK_ALL(const int from,
  106. const char * const str,
  107. const int to,
  108. const compiler_state * const cs,
  109. regex_t * regex) {
  110. for (const char * s = str; *s != '\0'; s++) {
  111. delta_t * delta = (delta_t *)malloc(sizeof(delta_t));
  112. *delta = (delta_t){
  113. .in = cs->state + from,
  114. .input = *s,
  115. .to = ASSERT_HALT(to),
  116. .pattern_width = cs->width,
  117. .match_width = 1,
  118. };
  119. vector_push(&regex->delta_table,
  120. &delta);
  121. }
  122. }
  123. static
  124. void ABSOLUTE_OFFSHOOT(const int from,
  125. const int to,
  126. const int width,
  127. const int match_width,
  128. regex_t * regex) {
  129. offshoot_t * offshoot = (offshoot_t *)malloc(sizeof(offshoot_t));
  130. *offshoot = (offshoot_t){
  131. .in = from,
  132. .to = to,
  133. .pattern_width = width,
  134. .match_width = match_width,
  135. };
  136. vector_push(&regex->catch_table,
  137. &offshoot);
  138. }
  139. static
  140. void OFFSHOOT(const int from,
  141. const int to,
  142. const int width,
  143. const int match_width,
  144. const compiler_state * cs,
  145. regex_t * regex) {
  146. ABSOLUTE_OFFSHOOT(cs->state + from, ASSERT_HALT(to), width, match_width, regex);
  147. }
  148. static
  149. int escape_1_to_1(const char c,
  150. const compiler_state * const cs) {
  151. char * target_list = (cs->flags & IS_NEGATIVE) ? cs->blacklist : cs->whitelist;
  152. switch (c) {
  153. case 't': {
  154. strcat(target_list, "\t");
  155. } return 1;
  156. case 'n': {
  157. strcat(target_list, "\n");
  158. } return 1;
  159. case 'r': {
  160. strcat(target_list, "\r");
  161. } return 1;
  162. case 'b': {
  163. strcat(target_list, "\b");
  164. } return 1;
  165. case '[': {
  166. strcat(target_list, "[");
  167. } return 1;
  168. case ']': {
  169. strcat(target_list, "]");
  170. } return 1;
  171. case '.': {
  172. strcat(target_list, ".");
  173. } return 1;
  174. case '^': {
  175. strcat(target_list, "^");
  176. } return 1;
  177. case '=': {
  178. strcat(target_list, "=");
  179. } return 1;
  180. case '?': {
  181. strcat(target_list, "?");
  182. } return 1;
  183. case '+': {
  184. strcat(target_list, "+");
  185. } return 1;
  186. case '*': {
  187. strcat(target_list, "*");
  188. } return 1;
  189. case '\\': {
  190. strcat(target_list, "\\");
  191. } return 1;
  192. }
  193. return 0;
  194. }
  195. static
  196. int escape_1_to_N(const char c,
  197. const compiler_state * const cs) {
  198. char * target_list = (cs->flags & IS_NEGATIVE) ? cs->blacklist : cs->whitelist;
  199. switch(c) {
  200. case 'i': {
  201. const char identifier_chars[] = JEGER_CHAR_SET_at
  202. JEGER_CHAR_SET_underscore
  203. JEGER_CHAR_SET_digits
  204. JEGER_CHAR_SET_oct_300_to_337
  205. ;
  206. strcpy(target_list, identifier_chars);
  207. return sizeof(identifier_chars)-1;
  208. };
  209. case 'I': {
  210. const char identifier_chars[] = JEGER_CHAR_SET_at
  211. JEGER_CHAR_SET_underscore
  212. JEGER_CHAR_SET_oct_300_to_337
  213. ;
  214. strcpy(target_list, identifier_chars);
  215. return sizeof(identifier_chars)-1;
  216. };
  217. case 'k': {
  218. const char keyword_chars[] = JEGER_CHAR_SET_at
  219. JEGER_CHAR_SET_underscore
  220. JEGER_CHAR_SET_digits
  221. JEGER_CHAR_SET_oct_300_to_337
  222. ;
  223. strcpy(target_list, keyword_chars);
  224. return sizeof(keyword_chars)-1;
  225. };
  226. case 'K': {
  227. const char keyword_chars[] = JEGER_CHAR_SET_at
  228. JEGER_CHAR_SET_underscore
  229. JEGER_CHAR_SET_oct_300_to_337
  230. ;
  231. strcpy(target_list, keyword_chars);
  232. return sizeof(keyword_chars)-1;
  233. };
  234. case 'f': {
  235. const char filename_chars[] = JEGER_CHAR_SET_at
  236. JEGER_CHAR_SET_digits
  237. JEGER_CHAR_SET_file_extra
  238. ;
  239. strcpy(target_list, filename_chars);
  240. return sizeof(filename_chars)-1;
  241. };
  242. case 'F': {
  243. const char filename_chars[] = JEGER_CHAR_SET_at
  244. JEGER_CHAR_SET_file_extra
  245. ;
  246. strcpy(target_list, filename_chars);
  247. return sizeof(filename_chars)-1;
  248. };
  249. case 'p': {
  250. const char printable_chars[] = JEGER_CHAR_SET_at
  251. JEGER_CHAR_SET_oct_241_to_277
  252. JEGER_CHAR_SET_oct_300_to_337
  253. ;
  254. strcpy(target_list, printable_chars);
  255. return sizeof(printable_chars)-1;
  256. };
  257. case 'P': {
  258. const char printable_chars[] = JEGER_CHAR_SET_at
  259. JEGER_CHAR_SET_oct_241_to_277
  260. JEGER_CHAR_SET_oct_300_to_337
  261. ;
  262. strcpy(target_list, printable_chars);
  263. return sizeof(printable_chars)-1;
  264. };
  265. case 's': {
  266. const char whitespace_chars[] = JEGER_CHAR_SET_whitespace;
  267. strcpy(target_list, whitespace_chars);
  268. return sizeof(whitespace_chars)-1;
  269. };
  270. case 'd': {
  271. const char digit_chars[] = JEGER_CHAR_SET_digits;
  272. strcpy(target_list, digit_chars);
  273. return sizeof(digit_chars)-1;
  274. };
  275. case 'x': {
  276. const char hex_chars[] = JEGER_CHAR_SET_digits
  277. JEGER_CHAR_SET_lower_hex
  278. JEGER_CHAR_SET_upper_hex
  279. ;
  280. strcpy(target_list, hex_chars);
  281. return sizeof(hex_chars)-1;
  282. };
  283. case 'o': {
  284. const char oct_chars[] = JEGER_CHAR_SET_octal_digits;
  285. strcpy(target_list, oct_chars);
  286. return sizeof(oct_chars)-1;
  287. };
  288. case 'w': {
  289. const char word_chars[] = JEGER_CHAR_SET_underscore
  290. JEGER_CHAR_SET_digits
  291. JEGER_CHAR_SET_lower
  292. JEGER_CHAR_SET_upper
  293. ;
  294. strcpy(target_list, word_chars);
  295. return sizeof(word_chars)-1;
  296. };
  297. case 'h': {
  298. // #global JEGER_CHAR_very_word_chars
  299. strcpy(target_list, JEGER_CHAR_very_word_chars);
  300. return sizeof(JEGER_CHAR_very_word_chars)-1;
  301. };
  302. case 'a': {
  303. const char alpha_chars[] = JEGER_CHAR_SET_lower
  304. JEGER_CHAR_SET_upper
  305. ;
  306. strcpy(target_list, alpha_chars);
  307. return sizeof(alpha_chars)-1;
  308. };
  309. case 'l': {
  310. const char lower_alpha_chars[] = JEGER_CHAR_SET_lower;
  311. strcpy(target_list, lower_alpha_chars);
  312. return sizeof(lower_alpha_chars)-1;
  313. };
  314. case 'u': {
  315. const char upper_alpha_chars[] = JEGER_CHAR_SET_upper;
  316. strcpy(target_list, upper_alpha_chars);
  317. return sizeof(upper_alpha_chars)-1;
  318. };
  319. }
  320. return 0;
  321. }
  322. static inline
  323. int escape_to_negative(const char c,
  324. compiler_state * const cs) {
  325. switch (c) {
  326. case 'D': {
  327. const char digit_chars[] = JEGER_CHAR_SET_digits;
  328. strcpy(cs->blacklist, digit_chars);
  329. cs->flags |= IS_NEGATIVE;
  330. return sizeof(digit_chars)-1;
  331. };
  332. case 'X': {
  333. const char hex_chars[] = JEGER_CHAR_SET_digits
  334. JEGER_CHAR_SET_lower_hex
  335. JEGER_CHAR_SET_upper_hex
  336. ;
  337. strcpy(cs->blacklist, hex_chars);
  338. cs->flags |= IS_NEGATIVE;
  339. return sizeof(hex_chars)-1;
  340. };
  341. case 'O': {
  342. const char oct_chars[] = JEGER_CHAR_SET_octal_digits;
  343. strcpy(cs->blacklist, oct_chars);
  344. cs->flags |= IS_NEGATIVE;
  345. return sizeof(oct_chars)-1;
  346. };
  347. case 'W': {
  348. const char word_chars[] = JEGER_CHAR_SET_underscore
  349. JEGER_CHAR_SET_digits
  350. JEGER_CHAR_SET_lower
  351. JEGER_CHAR_SET_upper
  352. ;
  353. strcpy(cs->blacklist, word_chars);
  354. cs->flags |= IS_NEGATIVE;
  355. return sizeof(word_chars)-1;
  356. };
  357. case 'L': {
  358. const char lower_alpha_chars[] = JEGER_CHAR_SET_lower;
  359. strcpy(cs->blacklist, lower_alpha_chars);
  360. cs->flags |= IS_NEGATIVE;
  361. return sizeof(lower_alpha_chars)-1;
  362. };
  363. case 'U': {
  364. const char upper_alpha_chars[] = JEGER_CHAR_SET_upper;
  365. strcpy(cs->blacklist, upper_alpha_chars);
  366. cs->flags |= IS_NEGATIVE;
  367. return sizeof(upper_alpha_chars)-1;
  368. };
  369. }
  370. return 0;
  371. }
  372. static inline
  373. int compile_dot(compiler_state * const cs) {
  374. cs->flags |= DO_CATCH;
  375. return true;
  376. }
  377. static inline
  378. int compile_escape(const char c,
  379. compiler_state * const cs) {
  380. return escape_1_to_1(c, cs)
  381. || escape_1_to_N(c, cs)
  382. || escape_to_negative(c, cs)
  383. ;
  384. }
  385. static
  386. int compile_range(const char * const range,
  387. compiler_state * const cs) {
  388. assert((range[0] == '[') && "Not a range.");
  389. const char * s;
  390. if (range[1] == '^') {
  391. cs->flags |= IS_NEGATIVE;
  392. s = range + 2;
  393. } else {
  394. s = range + 1;
  395. }
  396. char * target_list = (cs->flags & IS_NEGATIVE) ? cs->blacklist : cs->whitelist;
  397. for (; *s != ']'; s++) {
  398. assert((*s != '\0') && "Unclosed range.");
  399. char c = *s;
  400. if (c == '\\') {
  401. s += 1;
  402. assert(compile_escape(*s, cs) && "Unknown escape.");
  403. } else if (*(s+1) == '-') {
  404. char end = *(s+2);
  405. assert((c < end) && "Endless range.");
  406. for (char cc = c; cc < end+1; cc++) {
  407. strncat(target_list, &cc, 1);
  408. strncat(target_list, "\0", 1);
  409. }
  410. s += 2;
  411. } else {
  412. strncat(target_list, &c, 1);
  413. }
  414. }
  415. return ((s - range) + 1);
  416. }
  417. static
  418. void filter_blacklist(const char * whitelist,
  419. const char * blacklist,
  420. char * filtered) {
  421. for (; *blacklist != '\0'; blacklist++) {
  422. for (; *whitelist != '\0'; whitelist++) {
  423. if (*blacklist == *whitelist) {
  424. goto long_continue;
  425. }
  426. }
  427. strncat(filtered, blacklist, 1);
  428. long_continue:
  429. ;
  430. }
  431. }
  432. regex_t * regex_compile(const char * const pattern) {
  433. regex_t * regex = (regex_t *)malloc(sizeof(regex_t));
  434. regex->str = strdup(pattern);
  435. vector_init(&regex->delta_table, sizeof(delta_t*), 0UL);
  436. vector_init(&regex->catch_table, sizeof(offshoot_t*), 0UL);
  437. char whitelist[64];
  438. char blacklist[64];
  439. compiler_state cs = {
  440. .flags = IS_AT_THE_BEGINNING,
  441. .state = JEGER_INIT_STATE,
  442. .whitelist = whitelist,
  443. .blacklist = blacklist,
  444. };
  445. for (const char * s = pattern; *s != '\00';) {
  446. assert(!is_quantifier(*s) && "Pattern starts with quantifier.");
  447. // Reset the compiler
  448. whitelist[0] = '\0';
  449. blacklist[0] = '\0';
  450. cs.flags &= (IS_AT_THE_BEGINNING | FORCE_START_OF_STRING);
  451. cs.width = 1;
  452. // Translate char
  453. switch (*s) {
  454. case '^': {
  455. ;
  456. } break;
  457. case '.': {
  458. compile_dot(&cs);
  459. s += 1;
  460. } break;
  461. case '\\': {
  462. s += 1;
  463. if (compile_escape(*s, &cs)) {
  464. s += 1;
  465. } else if (is_hologram_escape(*s)) {
  466. ;
  467. } else {
  468. assert("Unknown escape.");
  469. }
  470. } break;
  471. case '[': {
  472. s += compile_range(s, &cs);
  473. } break;
  474. default: { // Literal
  475. whitelist[0] = *s;
  476. whitelist[1] = '\0';
  477. s += 1;
  478. } break;
  479. }
  480. // Compile char
  481. switch (*s) {
  482. // holograms
  483. case '^': {
  484. whitelist[0] = '\n';
  485. whitelist[1] = '\0';
  486. HOOK_ALL(0, whitelist, 0, &cs, regex);
  487. if (cs.flags & IS_AT_THE_BEGINNING) {
  488. cs.flags |= FORCE_START_OF_STRING;
  489. } else {
  490. cs.flags |= INCREMENT_STATE;
  491. }
  492. s += 1;
  493. } break;
  494. case '<': {
  495. cs.flags |= IS_NEGATIVE | INCREMENT_STATE;
  496. if (cs.flags & IS_AT_THE_BEGINNING) {
  497. ABSOLUTE_OFFSHOOT(0, JEGER_INIT_STATE+1, 0, 0, regex);
  498. }
  499. strcat(blacklist, JEGER_CHAR_very_word_chars);
  500. OFFSHOOT(0, 0, 1, 0, &cs, regex);
  501. s += 1;
  502. } break;
  503. case '>': {
  504. cs.flags |= IS_NEGATIVE | INCREMENT_STATE;
  505. strcat(blacklist, JEGER_CHAR_very_word_chars);
  506. OFFSHOOT(0, 1, 0, 0, &cs, regex);
  507. s += 1;
  508. } break;
  509. // quantifiers
  510. case '=':
  511. case '?': {
  512. HOOK_ALL(0, whitelist, +1, &cs, regex);
  513. if ((cs.flags & DO_CATCH)
  514. || (cs.flags & IS_NEGATIVE)) {
  515. OFFSHOOT(0, +1, 1, 1, &cs, regex);
  516. }
  517. s += 1;
  518. } break;
  519. case '*': {
  520. HOOK_ALL(0, whitelist, 0, &cs, regex);
  521. if ((cs.flags & DO_CATCH)
  522. || (cs.flags & IS_NEGATIVE)) {
  523. OFFSHOOT(0, 0, 1, 1, &cs, regex);
  524. }
  525. s += 1;
  526. } break;
  527. case '+': {
  528. cs.flags |= INCREMENT_STATE;
  529. HOOK_ALL(0, whitelist, +1, &cs, regex);
  530. if ((cs.flags & DO_CATCH)
  531. || (cs.flags & IS_NEGATIVE)) {
  532. OFFSHOOT(0, +1, 1, 1, &cs, regex);
  533. }
  534. HOOK_ALL(+1, whitelist, +1, &cs, regex);
  535. if ((cs.flags & DO_CATCH)
  536. || (cs.flags & IS_NEGATIVE)) {
  537. OFFSHOOT(+1, +1, 1, 1, &cs, regex);
  538. }
  539. s += 1;
  540. } break;
  541. default: { // Literal
  542. cs.flags |= INCREMENT_STATE;
  543. HOOK_ALL(0, whitelist, +1, &cs, regex);
  544. if ((cs.flags & DO_CATCH)
  545. || (cs.flags & IS_NEGATIVE)) {
  546. OFFSHOOT(0, +1, 1, 1, &cs, regex);
  547. }
  548. } break;
  549. }
  550. // Compile blacklist
  551. if (*blacklist) {
  552. char filtered_blacklist[64];
  553. filtered_blacklist[0] = '\0';
  554. filter_blacklist(whitelist, blacklist, filtered_blacklist);
  555. HOOK_ALL(0, filtered_blacklist, HALT_AND_CATCH_FIRE, &cs, regex);
  556. }
  557. if (cs.flags & INCREMENT_STATE) {
  558. ++cs.state;
  559. }
  560. cs.flags &= (~IS_AT_THE_BEGINNING);
  561. }
  562. // Init state hookups
  563. ABSOLUTE_OFFSHOOT(0, JEGER_INIT_STATE, 0, 0, regex);
  564. if (cs.flags & FORCE_START_OF_STRING) {
  565. ABSOLUTE_OFFSHOOT(1, HALT_AND_CATCH_FIRE, 0, 0, regex);
  566. } else {
  567. ABSOLUTE_OFFSHOOT(1, JEGER_INIT_STATE, 0, 0, regex);
  568. }
  569. regex->accepting_state = cs.state;
  570. return regex;
  571. }
  572. int regex_free(regex_t * const regex) {
  573. free(regex->str);
  574. vector_free(&regex->delta_table);
  575. vector_free(&regex->catch_table);
  576. free(regex);
  577. return 0;
  578. }
  579. // -----------------
  580. // ### Searching ###
  581. // -----------------
  582. static
  583. const offshoot_t * catch_table_lookup(const regex_t * const regex,
  584. const int * const state) {
  585. for (size_t i = 0; i < regex->catch_table.element_count; i++){
  586. const offshoot_t * const offshoot = *(offshoot_t**)vector_get(&regex->catch_table, i);
  587. if (offshoot->in == *state) {
  588. return offshoot;
  589. }
  590. }
  591. return NULL;
  592. }
  593. static
  594. bool regex_assert(const regex_t * const regex,
  595. const char * const string,
  596. int state,
  597. match_t * const match) {
  598. if (state == HALT_AND_CATCH_FIRE) {
  599. return false;
  600. }
  601. bool last_stand = false;
  602. bool was_found;
  603. const char * s = string;
  604. LOOP: {
  605. was_found = false;
  606. if (*s == '\0') {
  607. last_stand = true;
  608. goto PERFORM_CATCH_LOOKUP;
  609. }
  610. // Jump search for the correct state
  611. const int jump = 10;
  612. size_t i = jump;
  613. while (i < regex->delta_table.element_count) {
  614. const delta_t * const delta = *(delta_t**)vector_get(&regex->delta_table, i);
  615. if (delta->in >= state) {
  616. break;
  617. }
  618. i += jump;
  619. }
  620. i -= jump;
  621. // Linear search finish up
  622. for (; i < regex->delta_table.element_count; i++) {
  623. const delta_t * const delta = *(delta_t**)vector_get(&regex->delta_table, i);
  624. if (delta->in > state) {
  625. break;
  626. }
  627. if ((delta->in == state)
  628. && (delta->input == *s)) {
  629. was_found = true;
  630. const int r = regex_assert(regex, s + delta->pattern_width, delta->to, match);
  631. if(r){
  632. if (match->position == -1) {
  633. match->position = (s - string);
  634. }
  635. match->width += delta->match_width;
  636. return r;
  637. }
  638. }
  639. }
  640. }
  641. PERFORM_CATCH_LOOKUP: {
  642. if (!was_found) {
  643. const offshoot_t * const my_catch = catch_table_lookup(regex, &state);
  644. if (my_catch && (!my_catch->pattern_width || !last_stand)) {
  645. state = my_catch->to;
  646. s += my_catch->pattern_width;
  647. match->width += my_catch->match_width;
  648. goto LOOP;
  649. }
  650. }
  651. }
  652. return (state == regex->accepting_state);
  653. }
  654. match_t * regex_match(const regex_t * const regex,
  655. const char * const string,
  656. const bool is_start_of_string) {
  657. vector_t matches;
  658. vector_init(&matches, sizeof(match_t), 0);
  659. match_t * match = (match_t *)malloc(sizeof(match_t));
  660. /* Non-existent regex does not match anything.
  661. * Not to be confused with an empty regex.
  662. */
  663. if (regex == NULL) {
  664. goto FINISH;
  665. }
  666. // Find all matches
  667. {
  668. const char * s = string;
  669. do {
  670. int initial_state;
  671. initial_state = (int)(!(is_start_of_string && (s == string)));
  672. *match = (match_t){
  673. .position = -1,
  674. .width = 0,
  675. };
  676. if (regex_assert(regex, s, initial_state, match)) {
  677. match->position = (s - string);
  678. vector_push(&matches, match);
  679. s += ((match->width > 0) ? match->width : 1);
  680. match = (match_t *)malloc(sizeof(match_t));
  681. } else {
  682. ++s;
  683. }
  684. } while (*s != '\0');
  685. }
  686. FINISH:
  687. // Insert sentinel
  688. *match = (match_t){
  689. .position = -1,
  690. .width = -1,
  691. };
  692. vector_push(&matches, match);
  693. // Hide internal vector usage
  694. const size_t data_size = matches.element_size * matches.element_count;
  695. match_t * r = (match_t *)malloc(data_size);
  696. memcpy(r, matches.data, data_size);
  697. vector_free(&matches);
  698. return r;
  699. }
  700. bool regex_search(const regex_t * const regex,
  701. const char * const string) {
  702. match_t * m = regex_match(regex, string, true);
  703. const bool r = (m->position != -1);
  704. free(m);
  705. return r;
  706. }