]> git.xolatile.top Git - public-libhl.git/commitdiff
'=' support; ignore start end atoms for now
authoranon <anon@anon.anon>
Thu, 24 Aug 2023 17:17:14 +0000 (19:17 +0200)
committeranon <anon@anon.anon>
Thu, 24 Aug 2023 17:18:36 +0000 (19:18 +0200)
source/hl.h
source/regex.c

index 8cae9f6c678e876ee4e2a815480445ec7deafa06..89c8fc435b86c00e2027e23b552e5b18d64e5dd3 100644 (file)
@@ -130,12 +130,13 @@ int new_char_tokens(const char       *       characters,
 
 token_t * new_keyword_token(const char         * const word,
                                   hl_group_t   * const    g) {
-       size_t   word_length = strlen(word);
-       char   * new_word    = (char*)malloc(word_length + 4 + 1);
+       char   * new_word = strdup(word);
+       //size_t   word_length = strlen(word);
+       //char   * new_word    = (char*)malloc(word_length + 4 + 1);
 
-       memcpy(new_word, "\\<", 2);
-       memcpy(new_word + 2, word, word_length);
-       strcpy(new_word + 2 + word_length, "\\>");
+       //memcpy(new_word, "\\<", 2);
+       //memcpy(new_word + 2, word, word_length);
+       //strcpy(new_word + 2 + word_length, "\\>");
 
        token_t * mt = (token_t*)malloc(sizeof(token_t));
 
index 2adcfa1707b602a22b5a0adb30497cfa7734ac8c..1c1f7eaa1a3d588a3c26f4b239f99e7da2cef0bc 100644 (file)
@@ -11,7 +11,7 @@
 // ### Char tests ###
 // ------------------
 static bool is_quantifier(const char c) {
-       for (const char * s = "+*?"; *s != '\00'; s++) {
+       for (const char * s = "+*?="; *s != '\00'; s++) {
                if (*s == c) {
                        return true;
                }
@@ -40,6 +40,7 @@ typedef struct {
        int in;
        char input;
        int to;
+       int width;
 } delta_t;
 
 typedef struct {
@@ -75,6 +76,9 @@ static int escape_1_to_1(const char c, char * whitelist) {
                case '.': {
                        strcat(whitelist, ".");
                } return 1;
+               case '=': {
+                       strcat(whitelist, "=");
+               } return 1;
                case '?': {
                        strcat(whitelist, "?");
                } return 1;
@@ -248,12 +252,26 @@ static int escape_1_to_N(const char c, char * whitelist) {
        return 0;
 }
 
+//static int compile_hologram(char * hologram, char * whitelist) {
+//     if (hologram[0] == '\\') {
+//             switch (hologram[1]) {
+//                     case '<': {
+//                             const char very_word_chars[] = "abcdefghijklmnopqrstuwxyz"
+//                                                                                        "ABCDEFGHIJKLMNOPQRSTUWXYZ"
+//                                                                                        "_";
+//                             strcat(whitelist, very_word_chars);
+//                             is_negative = true;
+//                             HOOK_ALL(0, whitelist, 0)
+//                     } break;
+//             }
+//     }
+//}
+
 static int compile_range(const char * const     range,
                                char *       whitelist,
                                                           bool *     is_negative) {
-       assert(range[0] == '[' && "Not a range.");
+       assert((range[0] == '[') && "Not a range.");
 
-       int r = 0;
        const char * s;
        if (range[1] == '^') {
                *is_negative = true;
@@ -262,21 +280,20 @@ static int compile_range(const char * const     range,
                s = range + 1;
        }
        for (; *s != ']'; s++) {
-               assert(*s != '\00' && "Unclosed range.");
+               assert((*s != '\0') && "Unclosed range.");
                char c = *s;
                if (escape_1_to_1(c, whitelist)
                ||  escape_1_to_N(c, whitelist)) {
                        ;
                } else if (*(s+1) == '-') {
                        char end = *(s+2);
-                       assert(c < end && "Endless range.");
+                       assert((c < end) && "Endless range.");
                        for (char cc = c; cc < end+1; cc++) {
                                strncat(whitelist,   &cc, 1);
-                               strncat(whitelist, "\00", 1);
+                               strncat(whitelist, "\0", 1);
                        }
                        s += 2;
                } else {
-                       ++r;
                        strncat(whitelist,    &c, 1);
                        strncat(whitelist, "\00", 1);
                }
@@ -288,7 +305,7 @@ static int compile_range(const char * const     range,
 static bool catch_(const regex_t * const regex,
                          int     * const state) {
 
-       for (int i = 0; i < regex->catch_table.element_size; i++){
+       for (size_t i = 0; i < regex->catch_table.element_size; i++){
                const offshoot_t * const offshoot = (vector_get(&regex->catch_table, i));
                if (offshoot->in == *state) {
                        *state = offshoot->to;
@@ -300,18 +317,18 @@ static bool catch_(const regex_t * const regex,
 
 #define HALT_AND_CATCH_FIRE -1
 
-#define HOOK_ALL(from, str, to) do {                   \
-       int hook_to = (is_negative) ? -1 : state + to;     \
-       for (char * s = str; *s != '\00'; s++) {           \
-               vector_push(&regex->delta_table,               \
-                       &(delta_t){state + from, *s, hook_to}      \
-               );                                             \
-       }                                                  \
-       if (do_catch) {                                    \
-               vector_push(&regex->catch_table,               \
-                       &(offshoot_t){state + from, hook_to}       \
-               );                                             \
-       }                                                  \
+#define HOOK_ALL(from, str, to) do {                      \
+       int hook_to = (is_negative) ? -1 : state + to;        \
+       for (char * s = str; *s != '\0'; s++) {               \
+               vector_push(&regex->delta_table,                  \
+                       &(delta_t){state + from, *s, hook_to, width}  \
+               );                                                \
+       }                                                     \
+       if (do_catch || is_negative) {                        \
+               vector_push(&regex->catch_table,                  \
+                       &(offshoot_t){state + from, hook_to}          \
+               );                                                \
+       }                                                     \
 } while (0)
 
 #define EAT(n) do { \
@@ -321,25 +338,30 @@ static bool catch_(const regex_t * const regex,
 regex_t * regex_compile(const char * const pattern) {
        regex_t * regex = (regex_t *)malloc(sizeof(regex_t));
        regex->str = strdup(pattern);
-       vector_init(&regex->delta_table, sizeof(delta_t), 32);
-       vector_init(&regex->catch_table, sizeof(offshoot_t), 16);
+       vector_init(&regex->delta_table, sizeof(delta_t), 0);
+       vector_init(&regex->catch_table, sizeof(offshoot_t), 0);
 
        int state = 0;
 
        char whitelist[64];
        bool do_catch;
        bool is_negative;
+       int width;
        for (const char * s = pattern; *s != '\00';) {
                // Get token
                assert(!is_quantifier(*pattern) && "Pattern starts with quantifier.");
                whitelist[0] = '\00';
                do_catch     = false;
+               width        = 1;
 
                switch (*s) {
                        case '.': {
                                do_catch = true;
                        } break;
                        case '\\': {
+                               //if (compile_hologram(*s, whitelist)) {
+                               //      break;
+                               //}
                                EAT(1);
                                if(escape_1_to_1(*s, whitelist)
                                || escape_1_to_N(*s, whitelist)){
@@ -361,6 +383,7 @@ regex_t * regex_compile(const char * const pattern) {
 
                // Get quantifier
                switch (*s) {
+                       case '=':
                        case '?': {
                                HOOK_ALL(0, whitelist, +1);
                                EAT(1);
@@ -406,11 +429,11 @@ static bool regex_assert(const regex_t * const  regex,
 
        for (const char * s = string; *s != '\00'; s++) {
                // delta
-               for (int i = 0; i < regex->delta_table.element_count; i++) {
+               for (size_t i = 0; i < regex->delta_table.element_count; i++) {
                        const delta_t * const delta = (delta_t *)(vector_get(&regex->delta_table, i));
                        if ((delta->in == state) 
                        &&  (delta->input == *s)) {
-                               if(regex_assert(regex, s+1, delta->to)){
+                               if(regex_assert(regex, s + delta->width, delta->to)){
                                        return true;
                                }
                        }