libhl/source/regex.c

212 lines
5.3 KiB
C

#include "regex.h"
bool is_case_on = true;
static bool is_next_valid(const char * const s) {
return *(s + 1);
}
static bool char_in_range(const char start,
const char end,
const char character) {
if (start > end){
return false;
}
for (char c = start; c != end; c++) {
if (character == c) {
return true;
}
}
return false;
}
static bool is_word_separator(const char character) {
return (( isascii(character))
&& (!isalnum(character))
&& ( character != '_'));
}
static bool magic(const char magic_char, const char to_enchant) {
switch(magic_char){
// \i identifier character (see 'isident' option)
// \I like "\i", but excluding digits
// \k keyword character (see 'iskeyword' option)
// \K like "\k", but excluding digits
// \f file name character (see 'isfname' option)
// \F like "\f", but excluding digits
// \p printable character (see 'isprint' option)
// \P like "\p", but excluding digits
case 's': {
return ((to_enchant == ' ') || (to_enchant == '\t'));
}
case 'S': {
return !((to_enchant == ' ') || (to_enchant == '\t'));
}
case 'd': { // [0-9]
return char_in_range('0', '9', to_enchant);
};
case 'D': { // [^0-9]
return !char_in_range('0', '9', to_enchant);
};
case 'x': { // [0-9A-Fa-f]
return char_in_range('0', '9', to_enchant) || char_in_range('A', 'F', to_enchant) || char_in_range('a', 'f', to_enchant);
};
case 'X': { // [^0-9A-Fa-f]
return !char_in_range('0', '9', to_enchant) && !char_in_range('A', 'F', to_enchant) && !char_in_range('a', 'f', to_enchant);
};
case 'o': { // [0-7]
return char_in_range('0', '7', to_enchant);
};
case 'O': { // [^0-7]
return !char_in_range('0', '7', to_enchant);
};
case 'w': { // [0-9A-Za-z_]
return char_in_range('0', '9', to_enchant) || char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant) || (to_enchant == '_');
};
case 'W': { // [^0-9A-Za-z_]
return !(char_in_range('0', '9', to_enchant) || char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant) || (to_enchant == '_'));
};
case 'h': { // [A-Za-z_]
return char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant) || (to_enchant == '_');
};
case 'H': { // [^A-Za-z_]
return !(char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant) || (to_enchant == '_'));
};
case 'a': { // [A-Za-z]
return char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant);
};
case 'A': { // [A-Za-z]
return !(char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant));
};
case 'l': { // [a-z]
return char_in_range('a', 'z', to_enchant);
};
case 'L': { // [^a-z]
return !(char_in_range('a', 'z', to_enchant));
};
case 'u': { // [A-Z]
return char_in_range('A', 'Z', to_enchant);
};
case 'U': { // [^A-Z]
return !(char_in_range('A', 'Z', to_enchant));
};
}
return false;
}
int regex_match(const char * const pattern,
const char * const string_start,
const int string_offset,
int * match_offset_) {
const char * pattern_pointer = pattern;
const char * string_pointer = string_start + string_offset;
const char * const match_base = string_pointer;
int match_offset = 0;
while (1488) {
// End of one of the arguments
if (!(*pattern_pointer)) {
break;
}
if (!(*string_pointer)) {
return false;
}
// Escape character
if (*pattern_pointer == '\\') {
if (!is_next_valid(pattern_pointer)) {
return false;
}
switch(*(pattern_pointer + 1)){
case 't': {
if (*(string_pointer + 1) == '\t') {
pattern_pointer += 2;
string_pointer += 1;
} else {
return false;
}
} break;
case 'r': {
if (*(string_pointer + 1) == '\r') {
pattern_pointer += 2;
string_pointer += 1;
} else {
return false;
}
} break;
case 'e': {
if (*(string_pointer + 1) == '\033') {
pattern_pointer += 2;
string_pointer += 1;
} else {
return false;
}
} break;
case 'b': {
if (*(string_pointer + 1) == '\010') {
pattern_pointer += 2;
string_pointer += 1;
} else {
return false;
}
} break;
}
if (*(pattern_pointer + 1) == '\\') {
if (*string_pointer == '\\') {
pattern_pointer += 2;
string_pointer += 1;
continue;
}
}
if (*(pattern_pointer + 1) == '<') {
if (is_word_separator(*string_pointer)) {
pattern_pointer += 2;
string_pointer += 1;
match_offset += 1;
continue;
} else if (string_pointer == string_start) {
pattern_pointer += 2;
continue;
}
}
if (*(pattern_pointer + 1) == '>') {
if (is_word_separator(*string_pointer)) {
pattern_pointer += 2;
continue;
}
if (*(string_pointer + 1) == '\00') {
break;
}
}
if (magic(*(pattern_pointer + 1), *string_pointer)) {
pattern_pointer += 2;
string_pointer += 1;
continue;
}
return false;
}
// Literal
if (*pattern_pointer != *string_pointer) {
return false;
} else {
++pattern_pointer;
++string_pointer;
}
}
if (match_offset_) {
*match_offset_ = match_offset;
}
return (string_pointer - match_base) - match_offset;
}