anon před 8 měsíci
rodič
revize
5f2e4a612f
9 změnil soubory, kde provedl 409 přidání a 310 odebrání
  1. +3
    -4
      Makefile
  2. +16
    -248
      include/hl.h
  3. +1
    -1
      include/regex.h
  4. +45
    -0
      include/terminal.h
  5. +1
    -1
      include/vector.h
  6. +247
    -0
      source/hl.c
  7. +94
    -16
      source/main.c
  8. +1
    -0
      source/regex.c
  9. +1
    -40
      source/terminal.c

+ 3
- 4
Makefile Zobrazit soubor

@@ -1,6 +1,6 @@
TARGET:=hl

CFLAGS:=-std=c99
CFLAGS:=-std=c99 -Wall -Wextra -Wpedantic -Wshadow -Wundef
CPPFLAGS:=-Iinclude -D_GNU_SOURCE -D_FORTIFY_SOURCE=2

DEBUG=1
@@ -29,8 +29,8 @@ VPATH=${SRC.dir} ${OBJ.dir}
${OBJ.dir}/%.o: ${SRC.dir}/%.c
${COMPILE.c} $< -o $@

${TARGET}: ${HDR} | ${OBJ}
${LINK.c} $| -o $@
${TARGET}: ${OBJ} | ${HDR}
${LINK.c} $+ -o $@

${SRC} ${HDR}:

@@ -42,7 +42,6 @@ uninstall:

clean:
-rm ${OBJ} ${TARGET}
-rm *.out

test: chad_test



+ 16
- 248
include/hl.h Zobrazit soubor

@@ -1,6 +1,4 @@
/* hl.h
* Copyright 2023 Anon Anonson, Ognjen 'xolatile' Milan Robovic, Emil Williams
* SPDX Identifier: GPL-3.0-only / NO WARRANTY / NO GUARANTEE */
#ifndef HL_H_

#include <stdio.h>
#include <uthash.h>
@@ -50,6 +48,15 @@ extern hl_group_t * keyword_hl;
extern hl_group_t * preprocessor_hl;
extern hl_group_t * symbol_hl;

extern hl_group_t * special_hl;
extern hl_group_t * control_hl;
extern hl_group_t * keyword_hl;
extern hl_group_t * block_hl;
extern hl_group_t * separator_hl;
extern hl_group_t * operator_hl;
extern hl_group_t * comment_hl;
extern hl_group_t * string_literal_hl;

extern void new_display_mode(display_t * mode);
extern int free_token(token_t * token);
extern int append_token(token_t * token);
@@ -75,6 +82,10 @@ extern token_t * new_token(const char * const word,
const token_type_t t,
hl_group_t * const g);

extern token_t * new_region_token(const char * start,
const char * end,
hl_group_t * g);

// TODO: ALIGN PROPERLY...

extern int token_fits(const token_t * const token,
@@ -89,248 +100,5 @@ extern void render_string(const char * const string,
extern int hl_init(void);
extern int hl_deinit(void);

// GLOBALS

vector_t token_table = {
.data = NULL,
.element_size = sizeof(token_t *),
.element_count = 0UL
};

display_t * display_table = NULL;

// --------------------------------
// ### Constructors/Destructors ###
// --------------------------------

void new_display_mode(display_t * mode) {
HASH_ADD_STR(display_table,
key,
mode);
}

int free_token(token_t * token) {
free(token->hl);
regex_free(token->syntax);

return 0;
}

int append_token(token_t * token) {
vector_push(&token_table, &token);

return 0;
}

token_t * new_symbol_token(const char * const c,
hl_group_t * const g) {

token_t * mt = (token_t*)malloc(sizeof(token_t));

mt->hl = g;
mt->t = KEYSYMBOL;
mt->syntax = regex_compile(c);

append_token(mt);

return mt;
}

int new_symbol_tokens(const char * const * symbols,
hl_group_t * const g) {
int i = 0;

while (*symbols) {
if(new_symbol_token(*symbols, g)) {
++i;
} else {
assert(!(bool)"Kinda failed to new symbol token thing.");
}
++symbols;
}

return i;
}

int new_char_tokens(const char * str,
hl_group_t * const g) {
int i = 0;

char buffer[3];
buffer[0] = '\\';
buffer[2] = '\0';

for(const char * s = str; *s != '\0'; s++) {
buffer[1] = *s;
if(new_symbol_token(is_magic(*s) ? buffer : buffer + 1, g)) {
++i;
} else {
assert(!(bool)"Kinda failed to new char token thing.");
}
}

return i;
}

token_t * new_keyword_token(const char * const word,
hl_group_t * const g) {
size_t word_length = strlen(word);
char * new_word = (char*)malloc(word_length + 4 + 1);

memcpy(new_word, "\\<", 2);
memcpy(new_word + 2, word, word_length);
strcpy(new_word + 2 + word_length, "\\>");

token_t * mt = (token_t*)malloc(sizeof(token_t));

mt->hl = g;
mt->t = KEYWORD;
mt->syntax = regex_compile(new_word);

append_token(mt);

return mt;
}

int new_keyword_tokens(const char * const * words,
hl_group_t * const g) {
int i = 0;

while (*words) {
if(new_keyword_token(*words, g)) {
++i;
}
++words;
}

return i;
}

token_t * new_region_token(const char * start,
const char * end,
hl_group_t * g) {
char buffer[100];
buffer[0] = '\0';
strcat(buffer, start);
strcat(buffer, "[\\d\\D]*");
strcat(buffer, end);

token_t * mt = (token_t*)malloc(sizeof(token_t));

mt->hl = g;
mt->t = KEYSYMBOL;
mt->syntax = regex_compile(buffer);

append_token(mt);

return mt;
}

token_t * new_token(const char * const word,
const token_type_t t,
hl_group_t * const g) {
switch (t) {
case KEYSYMBOL: {
return new_symbol_token(word, g);
}
case KEYWORD: {
return new_keyword_token(word, g);
}
case MATCH: {
token_t * mt = (token_t*)malloc(sizeof(token_t));
mt->hl = g;
mt->t = MATCH;
mt->syntax = regex_compile(word);
append_token(mt);
} break;
case REGION: {
} break;
}

return NULL;
}

// --------------------
// ### Highlighting ###
// --------------------

// XXX: meditate on this shit
int token_fits(const token_t * const token,
const char * const to,
const int string_offset,
const bool is_start_of_line,
int * match_offset) { // XXX: rm this
UNUSED(match_offset);
//return regex_match(pattern, to, string_offset, match_offset);
match_t * m = regex_match(token->syntax, to, is_start_of_line, string_offset);
return (m ? m->width : 0);// XXX: nigger leaks
}

void render_string(const char * const string,
const char * const mode) {
for (const char * s = string; *s != '\00';) {
int f = 0;
size_t token_index = 0;
int offset = 0;

for (; token_index < token_table.element_count; token_index++) {
token_t * t = *(token_t**)vector_get(&token_table,
token_index);
const bool is_start_of_line = (s == string) || (*s == '\n');
f = token_fits(t, string, (int)(s - string), is_start_of_line, &offset);
if (f) {
break;
}
}
//
display_t * display;
HASH_FIND_STR(display_table,
mode,
display);
//
if (f) {
for (int i = 0; i < offset; i++) {
token_t * t = *(token_t**)vector_get(&token_table,
token_index);
display->callback(s + i,
0,
t->hl->attributes);
}
token_t * t = *(token_t**)vector_get(&token_table,
token_index);
display->callback(s + offset,
f,
t->hl->attributes);
s += f + offset;
} else {
display->callback(s,
0,
NULL);
++s;
}
}
}

// -------------------------
// ### Library Mangement ###
// -------------------------
hl_group_t * special_hl = NULL;
hl_group_t * control_hl = NULL;
hl_group_t * keyword_hl = NULL;
hl_group_t * block_hl = NULL;
hl_group_t * separator_hl = NULL;
hl_group_t * operator_hl = NULL;
hl_group_t * comment_hl = NULL;
hl_group_t * string_literal_hl = NULL;

int hl_init(void) {
return 0;
}

int hl_deinit(void) {
for (size_t i = 0; i < token_table.element_count; i++) {
free_token(*(token_t**)vector_get(&token_table, i));
}

return 0;
}
#define HL_H_
#endif

+ 1
- 1
include/regex.h Zobrazit soubor

@@ -1,5 +1,4 @@
#ifndef REGEX_H
#define REGEX_H

#include <stdbool.h>

@@ -21,4 +20,5 @@ extern int regex_match(regex_t * regex, const char * const string, const b

extern bool is_magic(const char c);

#define REGEX_H
#endif

+ 45
- 0
include/terminal.h Zobrazit soubor

@@ -0,0 +1,45 @@
#ifndef TERMINAL_H_

#include "hl.h"

// Terminal manipulation
#define TERMINAL_RESET "\033[0m"

#define TERMINAL_COLOR_FG_BLACK "\033[30m"
#define TERMINAL_COLOR_FG_RED "\033[31m"
#define TERMINAL_COLOR_FG_GREEN "\033[32m"
#define TERMINAL_COLOR_FG_YELLOW "\033[33m"
#define TERMINAL_COLOR_FG_BLUE "\033[34m"
#define TERMINAL_COLOR_FG_MAGENTA "\033[35m"
#define TERMINAL_COLOR_FG_CYAN "\033[36m"
#define TERMINAL_COLOR_FG_WHITE "\033[37m"

#define TERMINAL_COLOR_BG_BLACK "\033[40m"
#define TERMINAL_COLOR_BG_RED "\033[41m"
#define TERMINAL_COLOR_BG_GREEN "\033[42m"
#define TERMINAL_COLOR_BG_YELLOW "\033[43m"
#define TERMINAL_COLOR_BG_BLUE "\033[44m"
#define TERMINAL_COLOR_BG_MAGENTA "\033[45m"
#define TERMINAL_COLOR_BG_CYAN "\033[46m"
#define TERMINAL_COLOR_BG_WHITE "\033[47m"

#define TERMINAL_STYLE_BOLD "\033[1m"
#define TERMINAL_STYLE_ITALICS "\033[3m"
#define TERMINAL_STYLE_REVERSE "\033[7m"

typedef struct {
const char * attribute;
const char * foreground_color;
const char * background_color;
} terminal_hl_t;

extern display_t * cterm;

extern void cterm_render_callback(const char * const string,
const int length,
void * const attributes);

extern int terminal_hl_init(void);

#define TERMINAL_H_
#endif

+ 1
- 1
include/vector.h Zobrazit soubor

@@ -1,5 +1,4 @@
#ifndef VECTOR_H
#define VECTOR_H

#include <stddef.h>

@@ -30,4 +29,5 @@ extern void vector_set(vector_t * vector,

extern void vector_free(vector_t * vector);

#define VECTOR_H
#endif

+ 247
- 0
source/hl.c Zobrazit soubor

@@ -0,0 +1,247 @@
#include "hl.h"

#include <assert.h>

vector_t token_table = {
.data = NULL,
.element_size = sizeof(token_t *),
.element_count = 0UL
};

display_t * display_table = NULL;

// -------------------------
// ### Library Mangement ###
// -------------------------
hl_group_t * special_hl = NULL;
hl_group_t * control_hl = NULL;
hl_group_t * keyword_hl = NULL;
hl_group_t * block_hl = NULL;
hl_group_t * separator_hl = NULL;
hl_group_t * operator_hl = NULL;
hl_group_t * comment_hl = NULL;
hl_group_t * string_literal_hl = NULL;

// --------------------------------
// ### Constructors/Destructors ###
// --------------------------------

void new_display_mode(display_t * mode) {
HASH_ADD_STR(display_table,
key,
mode);
}

int free_token(token_t * token) {
free(token->hl);
regex_free(token->syntax);

return 0;
}

int append_token(token_t * token) {
vector_push(&token_table, &token);

return 0;
}

token_t * new_symbol_token(const char * const c,
hl_group_t * const g) {

token_t * mt = (token_t*)malloc(sizeof(token_t));

mt->hl = g;
mt->t = KEYSYMBOL;
mt->syntax = regex_compile(c);

append_token(mt);

return mt;
}

int new_symbol_tokens(const char * const * symbols,
hl_group_t * const g) {
int i = 0;

while (*symbols) {
if(new_symbol_token(*symbols, g)) {
++i;
} else {
assert(!(bool)"Kinda failed to new symbol token thing.");
}
++symbols;
}

return i;
}

int new_char_tokens(const char * str,
hl_group_t * const g) {
int i = 0;

char buffer[3];
buffer[0] = '\\';
buffer[2] = '\0';

for(const char * s = str; *s != '\0'; s++) {
buffer[1] = *s;
if(new_symbol_token(is_magic(*s) ? buffer : buffer + 1, g)) {
++i;
} else {
assert(!(bool)"Kinda failed to new char token thing.");
}
}

return i;
}

token_t * new_keyword_token(const char * const word,
hl_group_t * const g) {
//char * new_word = strdup(word);
//size_t word_length = strlen(word);
//char * new_word = (char*)malloc(word_length + 4 + 1);

//memcpy(new_word, "\\<", 2);
//memcpy(new_word + 2, word, word_length);
//strcpy(new_word + 2 + word_length, "\\>");

token_t * mt = (token_t*)malloc(sizeof(token_t));

mt->hl = g;
mt->t = KEYWORD;
//mt->syntax = regex_compile(new_word);
mt->syntax = regex_compile(word);

append_token(mt);

return mt;
}

int new_keyword_tokens(const char * const * words,
hl_group_t * const g) {
int i = 0;

while (*words) {
if(new_keyword_token(*words, g)) {
++i;
}
++words;
}

return i;
}

token_t * new_region_token(const char * start,
const char * end,
hl_group_t * g) {
char buffer[100];
buffer[0] = '\0';
strcat(buffer, start);
strcat(buffer, "[\\d\\D]*");
strcat(buffer, end);

token_t * mt = (token_t*)malloc(sizeof(token_t));

mt->hl = g;
mt->t = KEYSYMBOL;
mt->syntax = regex_compile(buffer);

append_token(mt);

return mt;
}

token_t * new_token(const char * const word,
const token_type_t t,
hl_group_t * const g) {
switch (t) {
case KEYSYMBOL: {
return new_symbol_token(word, g);
}
case KEYWORD: {
return new_keyword_token(word, g);
}
case MATCH: {
token_t * mt = (token_t*)malloc(sizeof(token_t));
mt->hl = g;
mt->t = MATCH;
mt->syntax = regex_compile(word);
append_token(mt);
} break;
case REGION: {
} break;
}

return NULL;
}

// --------------------
// ### Highlighting ###
// --------------------

int token_fits(const token_t * const token,
const char * const to,
const int string_offset,
const bool is_start_of_line,
int * match_offset) {
UNUSED(match_offset);
//return regex_match(pattern, to, string_offset, match_offset);
return regex_match(token->syntax, to, is_start_of_line, string_offset);
}

void render_string(const char * const string,
const char * const mode) {
for (const char * s = string; *s != '\00';) {
int f = 0;
size_t token_index = 0;
int offset = 0;

for (; token_index < token_table.element_count; token_index++) {
token_t * t = *(token_t**)vector_get(&token_table,
token_index);
const bool is_start_of_line = (s == string) || (*s == '\n');
f = token_fits(t, string, (int)(s - string), is_start_of_line, &offset);
if (f) {
break;
}
}
//
display_t * display;
HASH_FIND_STR(display_table,
mode,
display);
//
if (f) {
for (int i = 0; i < offset; i++) {
token_t * t = *(token_t**)vector_get(&token_table,
token_index);
display->callback(s + i,
0,
t->hl->attributes);
}
token_t * t = *(token_t**)vector_get(&token_table,
token_index);
display->callback(s + offset,
f,
t->hl->attributes);
s += f + offset;
} else {
display->callback(s,
0,
NULL);
++s;
}
}
}

int hl_init(void) {
return 0;
}

int hl_deinit(void) {
for (size_t i = 0; i < token_table.element_count; i++) {
free_token(*(token_t**)vector_get(&token_table, i));
}

return 0;
}

+ 94
- 16
source/main.c Zobrazit soubor

@@ -8,38 +8,116 @@
#include <unistd.h>
#include <fcntl.h>

#include "terminal_hl.h"
#include "terminal.h"

#define ALLOCATION_CHUNK (10UL)

static char * buffer = NULL;
static size_t buffer_size = 0;
static const char * argv0;

int main(void) {
// Buffer init
buffer = realloc(buffer, ALLOCATION_CHUNK);
static char *
slurp(const char * fn)
{
FILE * fp = fopen(fn, "r");
if (fp)
{
size_t len;
char * b;
fseek(fp, 0, SEEK_END);
len = ftell(fp);
rewind(fp);
b = malloc(len + 1);
if (b && fread(b, 1, len, fp))
{
b[len] = '\0';
}
fclose(fp);
return b;
}
else
{ return NULL; }
}

static char *
get_stdin(void)
{
size_t buffer_size = 0;
char * buffer = malloc(ALLOCATION_CHUNK);
do {
if (!((buffer_size + 1) % ALLOCATION_CHUNK)) {
size_t chunks = (buffer_size + 1) / ALLOCATION_CHUNK;
buffer = realloc(buffer, ++chunks * ALLOCATION_CHUNK);
buffer = realloc(buffer, ((buffer_size + 1) / ALLOCATION_CHUNK + 1) * ALLOCATION_CHUNK);
}
buffer[buffer_size] = '\0';
/* TODO handle me */
assert(read(STDIN_FILENO, &buffer[buffer_size], sizeof (*buffer)) != -1);
if (read(STDIN_FILENO, &buffer[buffer_size], sizeof (*buffer)) == -1)
{
free(buffer);
fprintf(stderr, "%s: Failed to read from STDIN\n", argv0);
return NULL;
}
++buffer_size;
} while (buffer[buffer_size - 1]);

buffer[buffer_size - 1] = '\0';
return buffer;
}

/* TODO: fix the shit going on with syntax/c.h , replace with a function,
* and ideally how make it hotswappable. */
int
main(int argc,
char ** argv) {
int arg = 0;
int syn = 0;
char * buffer = NULL;

argv0 = argv[0];

// Highlight init
terminal_hl_init();
//
#include "syntax/c.h"
//

render_string(buffer, "cterm");
putchar('\n');
while (++argv,
--argc)
{
if (**argv == '-')
{
syn = 1;
/* fprintf(stderr, "handle '%s'\n", *argv+1); */
/* lazy as hell, TODO use uthash */
if (strcmp(*argv+1, "c") == 0)
{
#include "syntax/c.h"
}
else
{
fprintf(stderr, "%s: Unimplemented syntax '%s'\n", argv0, *argv+1);
return 1;
}
}
else
{
if (!syn)
{
#include "syntax/c.h"
}
free(buffer);
arg = 1;
buffer = slurp(*argv);
render_string(buffer, "cterm");
if (!buffer)
{
perror(argv0);
return 1;
}
}
}
if (!arg)
{
if (!syn)
{
#include "syntax/c.h"
}
buffer = get_stdin();
render_string(buffer, "cterm");
}

fflush(stdout);
//hl_deinit();
free(buffer);


+ 1
- 0
source/regex.c Zobrazit soubor

@@ -564,6 +564,7 @@ regex_t * regex_compile(const char * const pattern) {

long_continue:
cs.is_at_the_beginning = false;
long_continue:;
}

regex->accepting_state = state;


include/terminal_hl.h → source/terminal.c Zobrazit soubor

@@ -1,43 +1,4 @@
#include "hl.h"

// Terminal manipulation
#define TERMINAL_RESET "\033[0m"

#define TERMINAL_COLOR_FG_BLACK "\033[30m"
#define TERMINAL_COLOR_FG_RED "\033[31m"
#define TERMINAL_COLOR_FG_GREEN "\033[32m"
#define TERMINAL_COLOR_FG_YELLOW "\033[33m"
#define TERMINAL_COLOR_FG_BLUE "\033[34m"
#define TERMINAL_COLOR_FG_MAGENTA "\033[35m"
#define TERMINAL_COLOR_FG_CYAN "\033[36m"
#define TERMINAL_COLOR_FG_WHITE "\033[37m"

#define TERMINAL_COLOR_BG_BLACK "\033[40m"
#define TERMINAL_COLOR_BG_RED "\033[41m"
#define TERMINAL_COLOR_BG_GREEN "\033[42m"
#define TERMINAL_COLOR_BG_YELLOW "\033[43m"
#define TERMINAL_COLOR_BG_BLUE "\033[44m"
#define TERMINAL_COLOR_BG_MAGENTA "\033[45m"
#define TERMINAL_COLOR_BG_CYAN "\033[46m"
#define TERMINAL_COLOR_BG_WHITE "\033[47m"

#define TERMINAL_STYLE_BOLD "\033[1m"
#define TERMINAL_STYLE_ITALICS "\033[3m"
#define TERMINAL_STYLE_REVERSE "\033[7m"

typedef struct {
const char * attribute;
const char * foreground_color;
const char * background_color;
} terminal_hl_t;

extern display_t * cterm;

extern void cterm_render_callback(const char * const string,
const int length,
void * const attributes);

extern int terminal_hl_init(void);
#include "terminal.h"

display_t * cterm = &(display_t) {
.key = "cterm",

Načítá se…
Zrušit
Uložit