From 94b151462a4ae16826ae2a157efbd287885dc563 Mon Sep 17 00:00:00 2001 From: anon Date: Thu, 24 Aug 2023 04:11:46 +0200 Subject: [PATCH] auto esc magic char tokens --- source/hl.h | 31 +++++++++++++++++-------------- source/regex.c | 20 ++++++++++++++++---- source/regex.h | 2 ++ 3 files changed, 35 insertions(+), 18 deletions(-) diff --git a/source/hl.h b/source/hl.h index 22ae2a7..7528277 100644 --- a/source/hl.h +++ b/source/hl.h @@ -34,7 +34,7 @@ typedef struct { typedef struct { hl_group_t * hl; token_type_t t; - char * syntax; + regex_t * syntax; } token_t; // GLOBALS @@ -56,7 +56,7 @@ void new_display_mode(display_t * mode) { int free_token(token_t * token) { free(token->hl); - free(token->syntax); + regex_free(token->syntax); return 0; } @@ -67,15 +67,23 @@ int append_token(token_t * token) { return 0; } -token_t * new_symbol_token(const char * const word, - hl_group_t * const g) { - char * new_word = strdup(word); +token_t * new_symbol_token(const char * const c, + hl_group_t * const g) { + char * new_word; + if (is_magic(*c)) { + new_word = (char *)malloc(sizeof(char)*3); + new_word[0] = '\\'; + new_word[1] = *c; + new_word[2] = '\00'; + } else { + new_word = strdup(c); + } token_t * mt = (token_t*)malloc(sizeof(token_t)); mt->hl = g; mt->t = KEYSYMBOL; - mt->syntax = new_word; + mt->syntax = regex_compile(new_word); append_token(mt); @@ -126,7 +134,7 @@ token_t * new_keyword_token(const char * const word, mt->hl = g; mt->t = KEYWORD; - mt->syntax = new_word; + mt->syntax = regex_compile(new_word); append_token(mt); @@ -173,15 +181,10 @@ token_t * new_token(const char * const word, int token_fits(const token_t * const token, const char * const to, const int string_offset, - int * match_offset) { - const char * const pattern = token->syntax; - - if (! pattern) { - return true; - } + int * match_offset) { //return regex_match(pattern, to, string_offset, match_offset); - return regex_search(NULL, pattern + string_offset); + return regex_search(token->syntax, to + string_offset); } void render_string(const char * const string, diff --git a/source/regex.c b/source/regex.c index cef41ad..a662b8f 100644 --- a/source/regex.c +++ b/source/regex.c @@ -33,7 +33,7 @@ typedef struct { s += n; \ } while (0) -bool is_quantifier(const char c){ +static bool is_quantifier(const char c) { for (const char * s = "+*?"; *s != '\00'; s++) { if (*s == c) { return true; @@ -42,8 +42,20 @@ bool is_quantifier(const char c){ return false; } +bool is_magic(const char c) { + if (is_quantifier(c)) { + return true; + } + for (const char * s = "\\[]"; *s != '\00'; s++) { + if (*s == c) { + return true; + } + } + return false; +} -int escape_1_to_1(const char c, char * whitelist) { + +static int escape_1_to_1(const char c, char * whitelist) { switch(c) { case 't': { strcat(whitelist, "\t"); @@ -83,7 +95,7 @@ int escape_1_to_1(const char c, char * whitelist) { return 0; } -int escape_1_to_N(const char c, char * whitelist) { +static int escape_1_to_N(const char c, char * whitelist) { switch(c) { case 'i': { const char identifier_chars[] = "@0123456789_\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337"; @@ -175,7 +187,7 @@ int escape_1_to_N(const char c, char * whitelist) { return 0; } -int compile_range(const char * const range, +static int compile_range(const char * const range, char * whitelist) { assert(range[0] == '[' && "Not a range."); diff --git a/source/regex.h b/source/regex.h index c6c714f..7b1ef10 100644 --- a/source/regex.h +++ b/source/regex.h @@ -17,4 +17,6 @@ extern regex_t * regex_compile(const char * const pattern); extern bool regex_search(regex_t * regex, const char * const string); extern int regex_free(regex_t * const regex); +extern bool is_magic(const char c); + #endif