]> git.xolatile.top Git - public-libhl.git/commitdiff
'^' works
authoranon <anon@anon.anon>
Tue, 29 Aug 2023 14:23:10 +0000 (16:23 +0200)
committeranon <anon@anon.anon>
Tue, 29 Aug 2023 14:23:10 +0000 (16:23 +0200)
source/hl.h
source/regex.c
source/regex.h
tests/carrot.input [new file with mode: 0644]

index 873517791bc14697caf535748a9bfd87ce05af9d..e88f265ae1c05945e9a8cc397665f866304832b6 100644 (file)
@@ -71,10 +71,11 @@ extern token_t * new_token(const char         * const word,
 
 // TODO: ALIGN PROPERLY...
 
-extern int token_fits(const token_t * const  token,
-                      const char    * const  to,
+extern int token_fits(const token_t * const          token,
+                      const char    * const             to,
                       const int              string_offset,
-                            int     *        match_offset);
+                      const bool            is_start_of_line,
+                            int     *         match_offset);
 
 extern void render_string(const char * const string,
                           const char * const mode);
@@ -232,6 +233,11 @@ token_t * new_token(const char         * const word,
                        return new_keyword_token(word, g);
                }
                case MATCH: {
+                       token_t * mt = (token_t*)malloc(sizeof(token_t));
+                               mt->hl     = g;
+                               mt->t      = MATCH;
+                               mt->syntax = regex_compile(word);
+                       append_token(mt);
                } break;
                case REGION: {
                } break;
@@ -244,14 +250,15 @@ token_t * new_token(const char         * const word,
 // ### Highlighting ###
 // --------------------
 
-int token_fits(const token_t * const token,
-               const char    * const to,
-               const int             string_offset,
+int token_fits(const token_t * const            token,
+               const char    * const               to,
+               const int                string_offset,
+               const bool            is_start_of_line,
                      int     *       match_offset) {
        UNUSED(match_offset);
 
        //return regex_match(pattern, to, string_offset, match_offset);
-       return regex_match(token->syntax, to + string_offset);
+       return regex_match(token->syntax, to, is_start_of_line, string_offset);
 }
 
 void render_string(const char * const string,
@@ -264,7 +271,8 @@ void render_string(const char * const string,
                for (; token_index < token_table.element_count; token_index++) {
                        token_t * t = *(token_t**)vector_get(&token_table,
                                                             token_index);
-                       f = token_fits(t, string, (int) (s - string), &offset);
+                       const bool is_start_of_line = (s == string) || (*s == '\n');
+                       f = token_fits(t, string, (int)(s - string), is_start_of_line, &offset);
                        if (f) {
                                break;
                        }
index 9d01efa04dd067fcb3fd88476aa6f0ac60f108da..378431147c81fe3e34a8c153f317ef0c033fbbfa 100644 (file)
@@ -24,7 +24,7 @@ bool is_magic(const char c) {
        if (is_quantifier(c)) {
                return true;
        }
-       for (const char * s = "\\[]."; *s != '\00'; s++) {
+       for (const char * s = "\\[].^"; *s != '\00'; s++) {
                if (*s == c) {
                        return true;
                }
@@ -47,11 +47,18 @@ typedef struct {
 typedef struct {
        int in;
        int to;
+       int width;
 } offshoot_t;
 
 typedef struct {
        bool    * do_catch;
        bool    * is_negative;
+// these might be obsolite but im leaving them for now
+       bool    * do_loop_hook;
+       bool    * do_follow_hook;
+       bool    * do_loop_shoot;
+       bool    * do_follow_shoot;
+// ---
        int     * state;
        int     * width;
        char    * whitelist;
@@ -88,6 +95,9 @@ static int escape_1_to_1(const char c, compiler_state * cs) {
                case '.': {
                        strcat(target_list, ".");
                } return 1;
+               case '^': {
+                       strcat(target_list, "^");
+               } return 1;
                case '=': {
                        strcat(target_list, "=");
                } return 1;
@@ -365,7 +375,7 @@ void HOOK_ALL(      int              from,
                     int                to,
                     compiler_state *   cs) {
 
-       int hook_to = (to == HALT_AND_CATCH_FIRE) ? -1 : ((*cs->state) + to);
+       int hook_to = (to == HALT_AND_CATCH_FIRE) ? HALT_AND_CATCH_FIRE : ((*cs->state) + to);
 
 
        for (const char * s = str; *s != '\0'; s++) {
@@ -379,26 +389,39 @@ void HOOK_ALL(      int              from,
        }
 }
 
-void OFFSHOOT(int             from,
-              int               to,
-              compiler_state *  cs) {
+void ABSOLUTE_OFFSHOOT(int             from,
+                       int               to,
+                       int            width,
+                       compiler_state *  cs) {
        offshoot_t * offshoot = malloc(sizeof(offshoot_t));
-       offshoot->in = *cs->state + from; 
-       offshoot->to = *cs->state + to;
+       offshoot->in    = from; 
+       offshoot->to    = to;
+       offshoot->width = width;
        vector_push(&cs->regex->catch_table,
                    &offshoot);
 }
 
+void OFFSHOOT(int             from,
+              int               to,
+              int            width,
+              compiler_state *  cs) {
+       ABSOLUTE_OFFSHOOT(*cs->state + from, *cs->state + to, width, cs);
+}
+
 regex_t * regex_compile(const char * const pattern) {
        regex_t * regex = (regex_t *)malloc(sizeof(regex_t));
        regex->str = strdup(pattern);
        vector_init(&regex->delta_table, sizeof(delta_t*), 0UL);
        vector_init(&regex->catch_table, sizeof(offshoot_t*), 0UL);
 
-       int state = 0;
+       int state = 2;
 
        bool do_catch;
        bool is_negative;
+       bool do_loop_hook;
+       bool do_follow_hook;
+       bool do_loop_shoot;
+       bool do_follow_shoot;
        int width;
        char whitelist[64];
        char blacklist[64];
@@ -416,14 +439,32 @@ regex_t * regex_compile(const char * const pattern) {
        for (const char * s = pattern; *s != '\00';) {
                // Reset the compiler
                assert(!is_quantifier(*pattern) && "Pattern starts with quantifier.");
-               whitelist[0] = '\00';
-               blacklist[0] = '\00';
-               do_catch     = false;
-               is_negative  = false;
+               whitelist[0]    =  '\0';
+               blacklist[0]    =  '\0';
+               do_catch        = false;
+               is_negative     = false;
+               do_loop_hook    = false;
+               do_follow_hook  = false;
+               do_loop_shoot   = false;
+               do_follow_shoot = false;
                width        = 1;
 
                // Translate char
                switch (*s) {
+                       case '^': {
+                               if (s == pattern) {
+                                       ABSOLUTE_OFFSHOOT(0,                   2, 0, &cs);
+                                       ABSOLUTE_OFFSHOOT(1, HALT_AND_CATCH_FIRE, 0, &cs);
+                               }
+                               whitelist[0] = '\n';
+                               whitelist[1] = '\0';
+                               HOOK_ALL(0, whitelist, 0, &cs);
+                               if (s != pattern) {
+                                       state += 1;
+                               }
+                               s += 1;
+                               goto long_continue;
+                       } break;
                        case '.': {
                                compile_dot(&cs);
                        } break;
@@ -435,8 +476,8 @@ regex_t * regex_compile(const char * const pattern) {
                                s += compile_range(s, &cs) - 1;
                        } break;
                        default: {
-                               whitelist[0] = *s;
-                               whitelist[1] = '\00';
+                               whitelist[0] =   *s;
+                               whitelist[1] = '\0';
                        } break;
                }
 
@@ -446,37 +487,38 @@ regex_t * regex_compile(const char * const pattern) {
                switch (*s) {
                        case '=':
                        case '?': {
+                               do_loop_hook = true;
                                HOOK_ALL(0, whitelist, +1, &cs);
                                if (do_catch || is_negative) {
-                                       OFFSHOOT(0, +1, &cs);
+                                       OFFSHOOT(0, +1, 1, &cs);
                                }
                                s += 1;
                        } break;
                        case '*': {
                                HOOK_ALL(0, whitelist,  0, &cs);
                                if (do_catch) {
-                                       OFFSHOOT(0, +1, &cs);
+                                       OFFSHOOT(0, +1, 1, &cs);
                                } else if (is_negative) {
-                                       OFFSHOOT(0,  0, &cs);
+                                       OFFSHOOT(0,  0, 1, &cs);
                                }
                                s += 1;
                        } break;
                        case '+': {
                                HOOK_ALL(0, whitelist, +1, &cs);
                                if (do_catch || is_negative) {
-                                       OFFSHOOT(0, +1, &cs);
+                                       OFFSHOOT(0, +1, 1, &cs);
                                }
                                state += 1;
                                HOOK_ALL(0, whitelist,  0, &cs);
                                if (do_catch || is_negative) {
-                                       OFFSHOOT(0, 0, &cs);
+                                       OFFSHOOT(0, 0, 1, &cs);
                                }
                                s += 1;
                        } break;
                        default: { // Literal
                                HOOK_ALL(0, whitelist, +1, &cs);
                                if (do_catch || is_negative) {
-                                       OFFSHOOT(0, +1, &cs);
+                                       OFFSHOOT(0, +1, 1, &cs);
                                }
                                state += 1;
                        } break;
@@ -489,6 +531,7 @@ regex_t * regex_compile(const char * const pattern) {
                        filter_blacklist(whitelist, blacklist, filtered_blacklist);
                        HOOK_ALL(0, filtered_blacklist, HALT_AND_CATCH_FIRE, &cs);
                }
+               long_continue:
        }
 
        regex->accepting_state = state;
@@ -509,37 +552,40 @@ int regex_free(regex_t * const regex) {
 // -----------------
 // ### Searching ###
 // -----------------
-static bool catch_(const regex_t * const regex,
+static int catch_(const regex_t * const regex,
                          int     * const state) {
        for (size_t i = 0; i < regex->catch_table.element_count; i++){
                const offshoot_t * const offshoot = *(offshoot_t**)vector_get(&regex->catch_table, i);
                if (offshoot->in == *state) {
                        *state = offshoot->to;
-                       return true;
+                       return offshoot->width;
                }
        }
-       return false;
+       return HALT_AND_CATCH_FIRE;
 }
 
-static int regex_assert(const regex_t * const  regex,
-                         const char    * const string,
-                               int              state,
-                               int              width) {
-       for (const char * s = string; *s != '\00'; s++) {
+static int regex_assert(const regex_t * const         regex,
+                        const char    * const        string,
+                        const int             string_offset,
+                              int                     state,
+                              int                     width) { // XXX: im pretty sure this is actually redundant and the width should be calculated from string - s
+       for (const char * s = (string + string_offset); *s != '\00';) {
                // delta
                for (size_t i = 0; i < regex->delta_table.element_count; i++) {
                        const delta_t * const delta = *(delta_t**)vector_get(&regex->delta_table, i);
                        if ((delta->in == state) 
                        &&  (delta->input == *s)) {
-                               int r = regex_assert(regex, s + delta->width, delta->to, width + 1);
+                               int r = regex_assert(regex, string, (s - string) + delta->width, delta->to, width + 1);
                                if(r){
                                        return r;
                                }
                        }
                }
 
-               if (catch_(regex, &state)) {
-                       width += 1;
+               const int catch_width = catch_(regex, &state);
+               if ((catch_width != HALT_AND_CATCH_FIRE)
+               &&  (state != HALT_AND_CATCH_FIRE)) {
+                       s += catch_width;
                        continue;
                }
 
@@ -549,8 +595,10 @@ static int regex_assert(const regex_t * const  regex,
        return false;
 }
 
-int regex_match(      regex_t *        regex,
-                const char    * const string) {
+int regex_match(      regex_t *                    regex,
+                const char    * const             string,
+                const bool            is_start_of_string,
+                const int                  string_offset) {    // XXX: remove this useless piece of shit of a parameter nigger
        if (regex == NULL) {
                return false;
        }
@@ -558,11 +606,13 @@ int regex_match(      regex_t *        regex,
                return true;
        }
 
-       return regex_assert(regex, string, 0, 0);
+       const int initial_state = (int)(!is_start_of_string);
+
+       return regex_assert(regex, string, string_offset, initial_state, 0);
 }
 
 bool regex_search(      regex_t *        regex,
                   const char    * const string) {
 
-       return (bool)regex_match(regex, string);
+       return (bool)regex_match(regex, string, true, 0);
 }
index 0049fcc08cd0e74b32d70db3dffa4fa7601003c4..f35670d1e1ae97f13edf6a79bd3de1552b509f18 100644 (file)
@@ -16,7 +16,7 @@ typedef struct {
 extern regex_t * regex_compile(const char * const pattern);
 extern int       regex_free(regex_t * const regex);
 extern bool      regex_search(regex_t * regex, const char * const string);
-extern int       regex_match(regex_t * regex, const char * const string);
+extern int       regex_match(regex_t * regex, const char * const string, const bool start_of_string, const int string_offset);
 
 extern bool is_magic(const char c);
 
diff --git a/tests/carrot.input b/tests/carrot.input
new file mode 100644 (file)
index 0000000..f9dcfc9
--- /dev/null
@@ -0,0 +1,8 @@
+^
+^ ^
+       ^ ^ ^^
+ ^ ^^ ^3^ ^
+^
+^
+ ^
+^