int flags;
int state;
int width;
- int width2;
+ int match_width;
char * whitelist;
char * blacklist;
} compiler_state;
// ----------------------------------
// ### Regex creation/destruction ###
// ----------------------------------
-static const int HALT_AND_CATCH_FIRE = INT_MIN;
+enum {
+ ASSERTION_FAILURE = 0,
+ ASSERTION_SUCCESS = 1,
+ HALT_AND_CATCH_FIRE = INT_MIN,
+};
#define ASSERT_HALT(a) ((a == HALT_AND_CATCH_FIRE) ? HALT_AND_CATCH_FIRE : (cs->state + a))
.input = *s,
.to = ASSERT_HALT(to),
.pattern_width = cs->width,
- .match_width = cs->width2,
+ .match_width = cs->match_width,
};
vector_push(®ex->delta_table,
&delta);
char whitelist[64];
char blacklist[64];
+ static const int REGEX_PREVERSABLE_FLAGS = IS_AT_THE_BEGINNING
+ | FORCE_START_OF_STRING
+ | DO_FORBID_START_OF_STRING
+ ;
+
compiler_state cs = {
.flags = IS_AT_THE_BEGINNING,
.state = JEGER_INIT_STATE,
for (const char * s = pattern; *s != '\00';) {
assert(!is_quantifier(*s) && "Pattern starts with quantifier.");
// Reset the compiler
- whitelist[0] = '\0';
- blacklist[0] = '\0';
- cs.flags &= (IS_AT_THE_BEGINNING | FORCE_START_OF_STRING);
- cs.width = 1;
- cs.width2 = 1;
+ whitelist[0] = '\0';
+ blacklist[0] = '\0';
+ cs.flags &= REGEX_PREVERSABLE_FLAGS;
+ cs.width = 1;
+ cs.match_width = 1;
// Translate char
switch (*s) {
} break;
}
- /* Ew */
- if (*s == '\\'
- && is_hologram_escape(*(s+1))) {
- ++s;
- }
-
// Compile char
switch (*s) {
// holograms
}
s += 1;
} break;
- case '<': {
- // XXX: make this legible
- if (cs.flags & IS_AT_THE_BEGINNING
- && !(cs.flags & DO_CATCH)
- && !(cs.flags & IS_NEGATIVE)
- && whitelist[0] == '\0') {
- // ---
- cs.flags |= INCREMENT_STATE;
- cs.flags |= DO_FORBID_START_OF_STRING;
- strcat(whitelist, JEGER_CHAR_symbol_chars);
- // ---
- ABSOLUTE_OFFSHOOT( JEGER_SOS_STATE, JEGER_INIT_STATE+1, 0, 0, regex);
- ABSOLUTE_OFFSHOOT(JEGER_INIT_STATE, JEGER_INIT_STATE+2, 1, 0, regex);
- HOOK_ALL(0, whitelist, HALT_AND_CATCH_FIRE, &cs, regex);
- // ---
- ++cs.state;
- cs.width = 0;
- cs.width2 = 0;
- HOOK_ALL(0, whitelist, +1, &cs, regex);
- cs.width = 1;
- OFFSHOOT(0, +1, 1, 0, &cs, regex);
- // ---
+ case '\\': {
+ if(is_hologram_escape(*(s+1))) {
+ ++s;
} else {
- HOOK_ALL(0, whitelist, +1, &cs, regex);
- if ((cs.flags & DO_CATCH)
- || (cs.flags & IS_NEGATIVE)) {
- OFFSHOOT(+1, +2, 1, 1, &cs, regex);
- } else {
- cs.flags |= INCREMENT_STATE;
- }
- OFFSHOOT(0, +1, 1, 0, &cs, regex);
+ goto DEFAULT;
+ }
+ switch(*s){
+ case '<': {
+ // XXX: make this legible
+ if (cs.flags & IS_AT_THE_BEGINNING
+ && !(cs.flags & DO_CATCH)
+ && !(cs.flags & IS_NEGATIVE)
+ && whitelist[0] == '\0') {
+ // ---
+ cs.flags |= INCREMENT_STATE;
+ cs.flags |= DO_FORBID_START_OF_STRING;
+ strcat(whitelist, JEGER_CHAR_symbol_chars);
+ // ---
+ ABSOLUTE_OFFSHOOT( JEGER_SOS_STATE, JEGER_INIT_STATE+1, 0, 0, regex);
+ ABSOLUTE_OFFSHOOT(JEGER_INIT_STATE, JEGER_INIT_STATE+2, 1, 0, regex);
+ HOOK_ALL(0, whitelist, HALT_AND_CATCH_FIRE, &cs, regex);
+ // ---
+ ++cs.state;
+ cs.width = 0;
+ cs.match_width = 0;
+ HOOK_ALL(0, whitelist, +1, &cs, regex);
+ cs.width = 1;
+ OFFSHOOT(0, +1, 1, 0, &cs, regex);
+ // ---
+ } else {
+ HOOK_ALL(0, whitelist, +1, &cs, regex);
+ if ((cs.flags & DO_CATCH)
+ || (cs.flags & IS_NEGATIVE)) {
+ OFFSHOOT(+1, +2, 1, 1, &cs, regex);
+ } else {
+ cs.flags |= INCREMENT_STATE;
+ }
+ OFFSHOOT(0, +1, 1, 0, &cs, regex);
+ }
+ cs.flags |= IS_NEGATIVE;
+ strcat(blacklist, JEGER_CHAR_symbol_chars);
+ s += 1;
+ } break;
+ case '>': {
+ HOOK_ALL(0, whitelist, +1, &cs, regex);
+ cs.flags |= IS_NEGATIVE | INCREMENT_STATE;
+ strcat(blacklist, JEGER_CHAR_symbol_chars);
+ OFFSHOOT(+1, +2, 0, 0, &cs, regex);
+ ++cs.state;
+ s += 1;
+ } break;
}
- cs.flags |= IS_NEGATIVE;
- strcat(blacklist, JEGER_CHAR_symbol_chars);
- s += 1;
- } break;
- case '>': {
- HOOK_ALL(0, whitelist, +1, &cs, regex);
- cs.flags |= IS_NEGATIVE | INCREMENT_STATE;
- strcat(blacklist, JEGER_CHAR_symbol_chars);
- OFFSHOOT(+1, +2, 0, 0, &cs, regex);
- ++cs.state;
- s += 1;
} break;
// quantifiers
case '=':
}
s += 1;
} break;
+ DEFAULT:
default: { // Literal
cs.flags |= INCREMENT_STATE;
HOOK_ALL(0, whitelist, +1, &cs, regex);
++cs.state;
}
+ // Purge SOS flag
cs.flags &= (~IS_AT_THE_BEGINNING);
}
}
static
-bool regex_assert(const regex_t * const regex,
+int regex_assert(const regex_t * const regex,
const char * const string,
int state,
match_t * const match) {
if (state == HALT_AND_CATCH_FIRE) {
- return false;
+ return HALT_AND_CATCH_FIRE;
}
bool last_stand = false;
do_reset = true;
}
const int r = regex_assert(regex, s + delta->pattern_width, delta->to, match);
- if(r){
+ if(r == ASSERTION_SUCCESS){
match->width += delta->match_width;
return r;
- } else if (do_reset) {
- match->_pos_ptr = NULL;
+ } else {
+ if (r == ASSERTION_FAILURE) {
+ was_found = false;
+ }
+ if (do_reset) {
+ match->_pos_ptr = NULL;
+ }
}
}
}
}
}
- return (state == regex->accepting_state);
+ return ((state == regex->accepting_state) ? ASSERTION_SUCCESS : ASSERTION_FAILURE);
}
match_t * regex_match(const regex_t * const regex,
.width = 0,
};
- if (regex_assert(regex, s, initial_state, match)) {
+ if (regex_assert(regex, s, initial_state, match) == 1) {
+ //printf("true: %s\n", s);
if (match->_pos_ptr) {
match->position = (match->_pos_ptr - string);
} else {
s += ((match->width > 0) ? match->width : 1);
match = (match_t *)malloc(sizeof(match_t));
} else {
+ //printf("false: %s\n", s);
++s;
}
} while (*s != '\0');