libhl/source/regex.c
2023-08-21 16:13:50 +02:00

202 lines
5.0 KiB
C

#include "regex.h"
bool is_case_on = true;
static bool is_next_valid(const char * const s) {
return *(s + 1);
}
static bool char_in_range(const char start,
const char end,
const char character) {
if (start > end){
return false;
}
for (char c = start; c != end; c++) {
if (character == c) {
return true;
}
}
return false;
}
static bool is_word_separator(const char character) {
return (( isascii(character))
&& (!isalnum(character))
&& ( character != '_'));
}
static bool magic(const char magic_char, const char to_enchant) {
switch(magic_char){
// \i identifier character (see 'isident' option)
// \I like "\i", but excluding digits
// \k keyword character (see 'iskeyword' option)
// \K like "\k", but excluding digits
// \f file name character (see 'isfname' option)
// \F like "\f", but excluding digits
// \p printable character (see 'isprint' option)
// \P like "\p", but excluding digits
case 's': {
return ((to_enchant == ' ') || (to_enchant == '\t'));
}
case 'S': {
return !((to_enchant == ' ') || (to_enchant == '\t'));
}
case 'd': { // [0-9]
return char_in_range('0', '9', to_enchant);
};
case 'D': { // [^0-9]
return !char_in_range('0', '9', to_enchant);
};
case 'x': { // [0-9A-Fa-f]
return char_in_range('0', '9', to_enchant) || char_in_range('A', 'F', to_enchant) || char_in_range('a', 'f', to_enchant);
};
case 'X': { // [^0-9A-Fa-f]
return !char_in_range('0', '9', to_enchant) && !char_in_range('A', 'F', to_enchant) && !char_in_range('a', 'f', to_enchant);
};
case 'o': { // [0-7]
return char_in_range('0', '7', to_enchant);
};
case 'O': { // [^0-7]
return !char_in_range('0', '7', to_enchant);
};
case 'w': { // [0-9A-Za-z_]
return char_in_range('0', '9', to_enchant) || char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant) || (to_enchant == '_');
};
case 'W': { // [^0-9A-Za-z_]
return !(char_in_range('0', '9', to_enchant) || char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant) || (to_enchant == '_'));
};
case 'h': { // [A-Za-z_]
return char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant) || (to_enchant == '_');
};
case 'H': { // [^A-Za-z_]
return !(char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant) || (to_enchant == '_'));
};
case 'a': { // [A-Za-z]
return char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant);
};
case 'A': { // [A-Za-z]
return !(char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant));
};
case 'l': { // [a-z]
return char_in_range('a', 'z', to_enchant);
};
case 'L': { // [^a-z]
return !(char_in_range('a', 'z', to_enchant));
};
case 'u': { // [A-Z]
return char_in_range('A', 'Z', to_enchant);
};
case 'U': { // [^A-Z]
return !(char_in_range('A', 'Z', to_enchant));
};
}
return false;
}
int regex_match(const char * const pattern,
const char * const string) {
const char * pattern_pointer = pattern;
const char * string_pointer = string;
while (1488) {
// End of one of the arguments
if (!(*pattern_pointer)) {
break;
}
if (!(*string_pointer)) {
return false;
}
// Escape character
if (*pattern_pointer == '\\') {
if (!is_next_valid(pattern_pointer)) {
return false;
}
switch(*(pattern_pointer + 1)){
case 't': {
if (*(string_pointer + 1) == '\t') {
pattern_pointer += 2;
string_pointer += 1;
} else {
return false;
}
} break;
case 'r': {
if (*(string_pointer + 1) == '\r') {
pattern_pointer += 2;
string_pointer += 1;
} else {
return false;
}
} break;
case 'e': {
if (*(string_pointer + 1) == '\033') {
pattern_pointer += 2;
string_pointer += 1;
} else {
return false;
}
} break;
case 'b': {
if (*(string_pointer + 1) == '\010') {
pattern_pointer += 2;
string_pointer += 1;
} else {
return false;
}
} break;
}
if (*(pattern_pointer + 1) == '\\') {
if (*string_pointer == '\\') {
pattern_pointer += 2;
string_pointer += 1;
continue;
}
}
if (*(pattern_pointer + 1) == '<'
&& (is_word_separator(*string_pointer))
|| string_pointer == string_start) {
pattern_pointer += 2;
string_pointer += 1;
match_offset = string_pointer - (string_start + string_offset);
continue;
}
if (*(pattern_pointer + 1) == '>') {
if (is_word_separator(*(string_pointer + 1))) {
pattern_pointer += 2;
continue;
}
if (*(string_pointer + 1) == '\00') {
break;
}
}
if (magic(*(pattern_pointer + 1), *string_pointer)) {
pattern_pointer += 2;
string_pointer += 1;
continue;
}
return false;
}
// Literal
if (*pattern_pointer != *string_pointer) {
return false;
} else {
++pattern_pointer;
++string_pointer;
}
}
return (string_pointer - string);
}