]> git.xolatile.top Git - public-libhl.git/commitdiff
good enough master
authoranon <anon@anon.anon>
Tue, 26 Sep 2023 14:42:25 +0000 (16:42 +0200)
committeranon <anon@anon.anon>
Tue, 26 Sep 2023 14:42:25 +0000 (16:42 +0200)
Makefile
source/jeger.c

index 60d800045228bf65e6852d92d2a1231a12b87a13..2641e1ce1ff2d42bb45ab790c3df86afe7125d75 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -45,6 +45,9 @@ clean:
 
 test: chad_test
 
 
 test: chad_test
 
-.PHONY: test clean install
+run:
+       hl < source/main.c
+
+.PHONY: test clean install run
 
 .DEFAULT_GOAL:=${TARGET}
 
 .DEFAULT_GOAL:=${TARGET}
index 343f6390ec7fe36ebe389e92b079ba06a8355be9..2a844afdcddd5b12430d9a1ea4afa0c0637706c8 100644 (file)
@@ -121,7 +121,7 @@ typedef struct {
        int       flags;
        int       state;
        int       width;
        int       flags;
        int       state;
        int       width;
-       int       width2;
+       int       match_width;
        char    * whitelist;
        char    * blacklist;
 } compiler_state;
        char    * whitelist;
        char    * blacklist;
 } compiler_state;
@@ -131,7 +131,11 @@ typedef struct {
 // ----------------------------------
 // ### Regex creation/destruction ###
 // ----------------------------------
 // ----------------------------------
 // ### Regex creation/destruction ###
 // ----------------------------------
-static const int HALT_AND_CATCH_FIRE = INT_MIN;
+enum {
+       ASSERTION_FAILURE   =       0,
+       ASSERTION_SUCCESS   =       1,
+       HALT_AND_CATCH_FIRE = INT_MIN,
+};
 
 #define ASSERT_HALT(a) ((a == HALT_AND_CATCH_FIRE) ? HALT_AND_CATCH_FIRE : (cs->state + a))
 
 
 #define ASSERT_HALT(a) ((a == HALT_AND_CATCH_FIRE) ? HALT_AND_CATCH_FIRE : (cs->state + a))
 
@@ -148,7 +152,7 @@ void HOOK_ALL(const int                         from,
                        .input         = *s,
                        .to            = ASSERT_HALT(to),
                        .pattern_width = cs->width,
                        .input         = *s,
                        .to            = ASSERT_HALT(to),
                        .pattern_width = cs->width,
-                       .match_width   = cs->width2,
+                       .match_width   = cs->match_width,
                };
                vector_push(&regex->delta_table,
                            &delta);
                };
                vector_push(&regex->delta_table,
                            &delta);
@@ -490,6 +494,11 @@ regex_t * regex_compile(const char * const pattern) {
        char whitelist[64];
        char blacklist[64];
 
        char whitelist[64];
        char blacklist[64];
 
+       static const int REGEX_PREVERSABLE_FLAGS = IS_AT_THE_BEGINNING
+                                                | FORCE_START_OF_STRING
+                                                | DO_FORBID_START_OF_STRING
+                                                ;
+
        compiler_state cs = {
                .flags     = IS_AT_THE_BEGINNING,
                .state     = JEGER_INIT_STATE,
        compiler_state cs = {
                .flags     = IS_AT_THE_BEGINNING,
                .state     = JEGER_INIT_STATE,
@@ -500,11 +509,11 @@ regex_t * regex_compile(const char * const pattern) {
        for (const char * s = pattern; *s != '\00';) {
                assert(!is_quantifier(*s) && "Pattern starts with quantifier.");
                // Reset the compiler
        for (const char * s = pattern; *s != '\00';) {
                assert(!is_quantifier(*s) && "Pattern starts with quantifier.");
                // Reset the compiler
-               whitelist[0] = '\0';
-               blacklist[0] = '\0';
-               cs.flags    &= (IS_AT_THE_BEGINNING | FORCE_START_OF_STRING);
-               cs.width     = 1;
-               cs.width2    = 1;
+               whitelist[0]   = '\0';
+               blacklist[0]   = '\0';
+               cs.flags      &= REGEX_PREVERSABLE_FLAGS;
+               cs.width       = 1;
+               cs.match_width = 1;
 
                // Translate char
                switch (*s) {
 
                // Translate char
                switch (*s) {
@@ -535,12 +544,6 @@ regex_t * regex_compile(const char * const pattern) {
                        } break;
                }
 
                        } break;
                }
 
-               /* Ew */
-               if (*s == '\\'
-               &&  is_hologram_escape(*(s+1))) {
-                       ++s;
-               }
-
                // Compile char
                switch (*s) {
                        // holograms
                // Compile char
                switch (*s) {
                        // holograms
@@ -555,49 +558,58 @@ regex_t * regex_compile(const char * const pattern) {
                                }
                                s += 1;
                        } break;
                                }
                                s += 1;
                        } break;
-                       case '<': {
-                               // XXX: make this legible
-                               if (cs.flags & IS_AT_THE_BEGINNING
-                               && !(cs.flags & DO_CATCH)
-                               && !(cs.flags & IS_NEGATIVE)
-                               && whitelist[0] == '\0') {
-                                       // ---
-                                       cs.flags |= INCREMENT_STATE;
-                                       cs.flags |= DO_FORBID_START_OF_STRING;
-                                       strcat(whitelist, JEGER_CHAR_symbol_chars);
-                                       // ---
-                                       ABSOLUTE_OFFSHOOT( JEGER_SOS_STATE, JEGER_INIT_STATE+1, 0, 0, regex);
-                                       ABSOLUTE_OFFSHOOT(JEGER_INIT_STATE, JEGER_INIT_STATE+2, 1, 0, regex);
-                                       HOOK_ALL(0, whitelist, HALT_AND_CATCH_FIRE, &cs, regex);
-                                       // ---
-                                       ++cs.state;
-                                       cs.width = 0;
-                                       cs.width2 = 0;
-                                       HOOK_ALL(0, whitelist, +1, &cs, regex);
-                                       cs.width = 1;
-                                       OFFSHOOT(0, +1, 1, 0, &cs, regex);
-                                       // ---
+                       case '\\': {
+                               if(is_hologram_escape(*(s+1))) {
+                                       ++s;
                                } else {
                                } else {
-                                       HOOK_ALL(0, whitelist, +1, &cs, regex);
-                                       if ((cs.flags & DO_CATCH)
-                                       ||  (cs.flags & IS_NEGATIVE)) {
-                                               OFFSHOOT(+1, +2, 1, 1, &cs, regex);
-                                       } else {
-                                               cs.flags |= INCREMENT_STATE;
-                                       }
-                                       OFFSHOOT(0, +1, 1, 0, &cs, regex);
+                                       goto DEFAULT;
+                               }
+                               switch(*s){
+                                       case '<': {
+                                               // XXX: make this legible
+                                               if (cs.flags & IS_AT_THE_BEGINNING
+                                               && !(cs.flags & DO_CATCH)
+                                               && !(cs.flags & IS_NEGATIVE)
+                                               && whitelist[0] == '\0') {
+                                                       // ---
+                                                       cs.flags |= INCREMENT_STATE;
+                                                       cs.flags |= DO_FORBID_START_OF_STRING;
+                                                       strcat(whitelist, JEGER_CHAR_symbol_chars);
+                                                       // ---
+                                                       ABSOLUTE_OFFSHOOT( JEGER_SOS_STATE, JEGER_INIT_STATE+1, 0, 0, regex);
+                                                       ABSOLUTE_OFFSHOOT(JEGER_INIT_STATE, JEGER_INIT_STATE+2, 1, 0, regex);
+                                                       HOOK_ALL(0, whitelist, HALT_AND_CATCH_FIRE, &cs, regex);
+                                                       // ---
+                                                       ++cs.state;
+                                                       cs.width = 0;
+                                                       cs.match_width = 0;
+                                                       HOOK_ALL(0, whitelist, +1, &cs, regex);
+                                                       cs.width = 1;
+                                                       OFFSHOOT(0, +1, 1, 0, &cs, regex);
+                                                       // ---
+                                               } else {
+                                                       HOOK_ALL(0, whitelist, +1, &cs, regex);
+                                                       if ((cs.flags & DO_CATCH)
+                                                       ||  (cs.flags & IS_NEGATIVE)) {
+                                                               OFFSHOOT(+1, +2, 1, 1, &cs, regex);
+                                                       } else {
+                                                               cs.flags |= INCREMENT_STATE;
+                                                       }
+                                                       OFFSHOOT(0, +1, 1, 0, &cs, regex);
+                                               }
+                                               cs.flags |= IS_NEGATIVE;
+                                               strcat(blacklist, JEGER_CHAR_symbol_chars);
+                                               s += 1;
+                                       } break;
+                                       case '>': {
+                                               HOOK_ALL(0, whitelist, +1, &cs, regex);
+                                               cs.flags |= IS_NEGATIVE | INCREMENT_STATE;
+                                               strcat(blacklist, JEGER_CHAR_symbol_chars);
+                                               OFFSHOOT(+1, +2, 0, 0, &cs, regex); 
+                                               ++cs.state;
+                                               s += 1;
+                                       } break;
                                }
                                }
-                               cs.flags |= IS_NEGATIVE;
-                               strcat(blacklist, JEGER_CHAR_symbol_chars);
-                               s += 1;
-                       } break;
-                       case '>': {
-                               HOOK_ALL(0, whitelist, +1, &cs, regex);
-                               cs.flags |= IS_NEGATIVE | INCREMENT_STATE;
-                               strcat(blacklist, JEGER_CHAR_symbol_chars);
-                               OFFSHOOT(+1, +2, 0, 0, &cs, regex); 
-                               ++cs.state;
-                               s += 1;
                        } break;
                        // quantifiers
                        case '=':
                        } break;
                        // quantifiers
                        case '=':
@@ -631,6 +643,7 @@ regex_t * regex_compile(const char * const pattern) {
                                }
                                s += 1;
                        } break;
                                }
                                s += 1;
                        } break;
+                       DEFAULT:
                        default: { // Literal
                                cs.flags |= INCREMENT_STATE;
                                HOOK_ALL(0, whitelist, +1, &cs, regex);
                        default: { // Literal
                                cs.flags |= INCREMENT_STATE;
                                HOOK_ALL(0, whitelist, +1, &cs, regex);
@@ -653,6 +666,7 @@ regex_t * regex_compile(const char * const pattern) {
                        ++cs.state;
                }
 
                        ++cs.state;
                }
 
+               // Purge SOS flag
                cs.flags &= (~IS_AT_THE_BEGINNING);
        }
 
                cs.flags &= (~IS_AT_THE_BEGINNING);
        }
 
@@ -697,12 +711,12 @@ const offshoot_t * catch_table_lookup(const regex_t * const regex,
 }
 
 static
 }
 
 static
-bool regex_assert(const regex_t * const         regex,
+int regex_assert(const regex_t * const         regex,
                   const char    * const        string,
                         int                     state,
                         match_t * const         match) {
        if (state == HALT_AND_CATCH_FIRE) {
                   const char    * const        string,
                         int                     state,
                         match_t * const         match) {
        if (state == HALT_AND_CATCH_FIRE) {
-               return false;
+               return HALT_AND_CATCH_FIRE;
        }
 
        bool last_stand = false;
        }
 
        bool last_stand = false;
@@ -743,11 +757,16 @@ bool regex_assert(const regex_t * const         regex,
                                        do_reset = true;
                                }
                                const int r = regex_assert(regex, s + delta->pattern_width, delta->to, match);
                                        do_reset = true;
                                }
                                const int r = regex_assert(regex, s + delta->pattern_width, delta->to, match);
-                               if(r){
+                               if(r == ASSERTION_SUCCESS){
                                        match->width += delta->match_width;
                                        return r;
                                        match->width += delta->match_width;
                                        return r;
-                               } else if (do_reset) {
-                                       match->_pos_ptr = NULL;
+                               } else {
+                                       if (r == ASSERTION_FAILURE) {
+                                               was_found = false;
+                                       }
+                                       if (do_reset) {
+                                               match->_pos_ptr = NULL;
+                                       }
                                }
                        }
                }
                                }
                        }
                }
@@ -765,7 +784,7 @@ bool regex_assert(const regex_t * const         regex,
                }
        }
 
                }
        }
 
-       return (state == regex->accepting_state);
+       return ((state == regex->accepting_state) ? ASSERTION_SUCCESS : ASSERTION_FAILURE);
 }
 
 match_t * regex_match(const regex_t * const              regex,
 }
 
 match_t * regex_match(const regex_t * const              regex,
@@ -796,7 +815,8 @@ match_t * regex_match(const regex_t * const              regex,
                                .width    =    0,
                        };
 
                                .width    =    0,
                        };
 
-                       if (regex_assert(regex, s, initial_state, match)) {
+                       if (regex_assert(regex, s, initial_state, match) == 1) {
+                               //printf("true:  %s\n", s);
                                if (match->_pos_ptr) {
                                        match->position = (match->_pos_ptr - string);
                                } else {
                                if (match->_pos_ptr) {
                                        match->position = (match->_pos_ptr - string);
                                } else {
@@ -808,6 +828,7 @@ match_t * regex_match(const regex_t * const              regex,
                                s += ((match->width > 0) ? match->width : 1);
                                match = (match_t *)malloc(sizeof(match_t));
                        } else {
                                s += ((match->width > 0) ? match->width : 1);
                                match = (match_t *)malloc(sizeof(match_t));
                        } else {
+                               //printf("false: %s\n", s);
                                ++s;
                        }
                } while (*s != '\0');
                                ++s;
                        }
                } while (*s != '\0');