2023-08-19 18:49:10 -04:00
|
|
|
#include "regex.h"
|
|
|
|
|
|
|
|
bool is_case_on = true;
|
|
|
|
|
|
|
|
static bool is_next_valid(const char * const s) {
|
|
|
|
return *(s + 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool char_in_range(const char start,
|
|
|
|
const char end,
|
|
|
|
const char character) {
|
|
|
|
if (start > end){
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (char c = start; c != end; c++) {
|
|
|
|
if (character == c) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool is_word_separator(const char character) {
|
|
|
|
return (( isascii(character))
|
|
|
|
&& (!isalnum(character))
|
|
|
|
&& ( character != '_'));
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool magic(const char magic_char, const char to_enchant) {
|
|
|
|
switch(magic_char){
|
|
|
|
// \i identifier character (see 'isident' option)
|
|
|
|
// \I like "\i", but excluding digits
|
|
|
|
// \k keyword character (see 'iskeyword' option)
|
|
|
|
// \K like "\k", but excluding digits
|
|
|
|
// \f file name character (see 'isfname' option)
|
|
|
|
// \F like "\f", but excluding digits
|
|
|
|
// \p printable character (see 'isprint' option)
|
|
|
|
// \P like "\p", but excluding digits
|
|
|
|
case 's': {
|
|
|
|
return ((to_enchant == ' ') || (to_enchant == '\t'));
|
|
|
|
}
|
|
|
|
case 'S': {
|
|
|
|
return !((to_enchant == ' ') || (to_enchant == '\t'));
|
|
|
|
}
|
|
|
|
case 'd': { // [0-9]
|
|
|
|
return char_in_range('0', '9', to_enchant);
|
|
|
|
};
|
|
|
|
case 'D': { // [^0-9]
|
|
|
|
return !char_in_range('0', '9', to_enchant);
|
|
|
|
};
|
|
|
|
case 'x': { // [0-9A-Fa-f]
|
|
|
|
return char_in_range('0', '9', to_enchant) || char_in_range('A', 'F', to_enchant) || char_in_range('a', 'f', to_enchant);
|
|
|
|
};
|
|
|
|
case 'X': { // [^0-9A-Fa-f]
|
|
|
|
return !char_in_range('0', '9', to_enchant) && !char_in_range('A', 'F', to_enchant) && !char_in_range('a', 'f', to_enchant);
|
|
|
|
};
|
|
|
|
case 'o': { // [0-7]
|
|
|
|
return char_in_range('0', '7', to_enchant);
|
|
|
|
};
|
|
|
|
case 'O': { // [^0-7]
|
|
|
|
return !char_in_range('0', '7', to_enchant);
|
|
|
|
};
|
|
|
|
case 'w': { // [0-9A-Za-z_]
|
|
|
|
return char_in_range('0', '9', to_enchant) || char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant) || (to_enchant == '_');
|
|
|
|
};
|
|
|
|
case 'W': { // [^0-9A-Za-z_]
|
|
|
|
return !(char_in_range('0', '9', to_enchant) || char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant) || (to_enchant == '_'));
|
|
|
|
};
|
|
|
|
case 'h': { // [A-Za-z_]
|
|
|
|
return char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant) || (to_enchant == '_');
|
|
|
|
};
|
|
|
|
case 'H': { // [^A-Za-z_]
|
|
|
|
return !(char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant) || (to_enchant == '_'));
|
|
|
|
};
|
|
|
|
case 'a': { // [A-Za-z]
|
|
|
|
return char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant);
|
|
|
|
};
|
|
|
|
case 'A': { // [A-Za-z]
|
|
|
|
return !(char_in_range('A', 'Z', to_enchant) || char_in_range('a', 'z', to_enchant));
|
|
|
|
};
|
|
|
|
case 'l': { // [a-z]
|
|
|
|
return char_in_range('a', 'z', to_enchant);
|
|
|
|
};
|
|
|
|
case 'L': { // [^a-z]
|
|
|
|
return !(char_in_range('a', 'z', to_enchant));
|
|
|
|
};
|
|
|
|
case 'u': { // [A-Z]
|
|
|
|
return char_in_range('A', 'Z', to_enchant);
|
|
|
|
};
|
|
|
|
case 'U': { // [^A-Z]
|
|
|
|
return !(char_in_range('A', 'Z', to_enchant));
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2023-08-21 14:07:39 -04:00
|
|
|
int regex_match(const char * const pattern,
|
|
|
|
const char * const string_start,
|
|
|
|
const int string_offset,
|
|
|
|
int * match_offset_) {
|
2023-08-19 18:49:10 -04:00
|
|
|
const char * pattern_pointer = pattern;
|
2023-08-21 14:07:39 -04:00
|
|
|
const char * string_pointer = string_start + string_offset;
|
|
|
|
const char * const match_base = string_pointer;
|
|
|
|
int match_offset = 0;
|
2023-08-19 18:49:10 -04:00
|
|
|
|
|
|
|
while (1488) {
|
|
|
|
// End of one of the arguments
|
|
|
|
if (!(*pattern_pointer)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (!(*string_pointer)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Escape character
|
|
|
|
if (*pattern_pointer == '\\') {
|
|
|
|
if (!is_next_valid(pattern_pointer)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch(*(pattern_pointer + 1)){
|
|
|
|
case 't': {
|
|
|
|
if (*(string_pointer + 1) == '\t') {
|
|
|
|
pattern_pointer += 2;
|
|
|
|
string_pointer += 1;
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
} break;
|
|
|
|
case 'r': {
|
|
|
|
if (*(string_pointer + 1) == '\r') {
|
|
|
|
pattern_pointer += 2;
|
|
|
|
string_pointer += 1;
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
} break;
|
|
|
|
case 'e': {
|
|
|
|
if (*(string_pointer + 1) == '\033') {
|
|
|
|
pattern_pointer += 2;
|
|
|
|
string_pointer += 1;
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
} break;
|
|
|
|
case 'b': {
|
|
|
|
if (*(string_pointer + 1) == '\010') {
|
|
|
|
pattern_pointer += 2;
|
|
|
|
string_pointer += 1;
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
} break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (*(pattern_pointer + 1) == '\\') {
|
|
|
|
if (*string_pointer == '\\') {
|
|
|
|
pattern_pointer += 2;
|
|
|
|
string_pointer += 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-08-21 14:07:39 -04:00
|
|
|
if (*(pattern_pointer + 1) == '<') {
|
|
|
|
if (is_word_separator(*string_pointer)) {
|
2023-08-19 18:49:10 -04:00
|
|
|
pattern_pointer += 2;
|
|
|
|
string_pointer += 1;
|
2023-08-21 14:07:39 -04:00
|
|
|
match_offset += 1;
|
2023-08-19 18:49:10 -04:00
|
|
|
continue;
|
2023-08-21 14:07:39 -04:00
|
|
|
} else if (string_pointer == string_start) {
|
|
|
|
pattern_pointer += 2;
|
|
|
|
continue;
|
|
|
|
}
|
2023-08-19 18:49:10 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
if (*(pattern_pointer + 1) == '>') {
|
2023-08-21 14:07:39 -04:00
|
|
|
if (is_word_separator(*string_pointer)) {
|
2023-08-19 18:49:10 -04:00
|
|
|
pattern_pointer += 2;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (*(string_pointer + 1) == '\00') {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (magic(*(pattern_pointer + 1), *string_pointer)) {
|
|
|
|
pattern_pointer += 2;
|
|
|
|
string_pointer += 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Literal
|
|
|
|
if (*pattern_pointer != *string_pointer) {
|
|
|
|
return false;
|
|
|
|
} else {
|
|
|
|
++pattern_pointer;
|
|
|
|
++string_pointer;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-08-21 14:07:39 -04:00
|
|
|
if (match_offset_) {
|
|
|
|
*match_offset_ = match_offset;
|
|
|
|
}
|
|
|
|
return (string_pointer - match_base) - match_offset;
|
2023-08-19 18:49:10 -04:00
|
|
|
}
|