NULL
};
-new_char_tokens("+-&|.()[]{}", operator_hl);
-new_keyword_tokens(c_keywords, control_hl);
-new_keyword_tokens(preprocessor_keywords, special_hl);
-new_region_token("/\\*", "\\*/", comment_hl);
-new_region_token("//", "\\n", comment_hl);
-new_region_token("\"", "\"", string_literal_hl);
-new_region_token("<", ">", string_literal_hl);
+//new_char_tokens("+-&|.()[]{}", operator_hl);
+//new_keyword_tokens(c_keywords, control_hl);
+//new_keyword_tokens(preprocessor_keywords, special_hl);
+//new_region_token("/\\*", "\\*/", comment_hl);
+//new_region_token("//", "\\n", comment_hl);
+//new_region_token("\"", "\"", string_literal_hl);
+//new_region_token("<", ">", string_literal_hl);
+//new_keyword_token("keyword", special_hl);
+new_keyword_token("while", operator_hl);
} offshoot_t;
typedef struct {
+ // XXX:
+ // These should share a mask
+ // Not even sure why they are pointers to begin with
bool * do_catch;
bool * is_negative;
+ bool is_at_the_beginning;
+ bool do_skip;
// these might be obsolite but im leaving them for now
bool * do_loop_hook;
bool * do_follow_hook;
// ----------------------------------
// ### Regex creation/destruction ###
// ----------------------------------
+#define HALT_AND_CATCH_FIRE INT_MIN
+
+static void HOOK_ALL( int from,
+ const char * const str,
+ int to,
+ compiler_state * cs) {
+
+ int hook_to = (to == HALT_AND_CATCH_FIRE) ? HALT_AND_CATCH_FIRE : ((*cs->state) + to);
+
+
+ for (const char * s = str; *s != '\0'; s++) {
+ delta_t * delta = malloc(sizeof(delta_t));
+ delta->in = *cs->state + from;
+ delta->input = *s;
+ delta->to = hook_to;
+ delta->width = *cs->width;
+ vector_push(&cs->regex->delta_table,
+ &delta);
+ }
+}
+
+static void ABSOLUTE_OFFSHOOT(int from,
+ int to,
+ int width,
+ compiler_state * cs) {
+ offshoot_t * offshoot = malloc(sizeof(offshoot_t));
+ offshoot->in = from;
+ offshoot->to = to;
+ offshoot->width = width;
+ vector_push(&cs->regex->catch_table,
+ &offshoot);
+}
+
+static void OFFSHOOT(int from,
+ int to,
+ int width,
+ compiler_state * cs) {
+ ABSOLUTE_OFFSHOOT(*cs->state + from, *cs->state + to, width, cs);
+}
+
static int escape_1_to_1(const char c, compiler_state * cs) {
char * target_list = (*cs->is_negative) ? cs->blacklist : cs->whitelist;
switch (c) {
return 0;
}
-//static int compile_hologram(char * hologram, char * whitelist) {
-// if (hologram[0] == '\\') {
-// switch (hologram[1]) {
-// case '<': {
-// const char very_word_chars[] = "abcdefghijklmnopqrstuwxyz"
-// "ABCDEFGHIJKLMNOPQRSTUWXYZ"
-// "_";
-// strcat(whitelist, very_word_chars);
-// is_negative = true;
-// HOOK_ALL(0, whitelist, 0)
-// } break;
-// }
-// }
-//}
+static int escape_hologram(const char c, compiler_state * cs) {
+ switch (c) {
+ case '<': {
+ if (cs->is_at_the_beginning) {
+ ABSOLUTE_OFFSHOOT(0, 2, 0, cs);
+ cs->do_skip = true;
+ }
+ const char very_word_chars[] = "abcdefghijklmnopqrstuwxyz"
+ "ABCDEFGHIJKLMNOPQRSTUWXYZ"
+ "_";
+ *cs->is_negative = true; // effectless currently; should be used to trigger the following lines in the main compile loop
+ strcat(cs->blacklist, very_word_chars);
+ HOOK_ALL(0, cs->blacklist, HALT_AND_CATCH_FIRE, cs);
+ OFFSHOOT(0, 0, 1, cs);
+
+ return sizeof(very_word_chars)-1;
+ };
+ case '>': {
+ const char very_word_chars[] = "abcdefghijklmnopqrstuwxyz"
+ "ABCDEFGHIJKLMNOPQRSTUWXYZ"
+ "_";
+ *cs->is_negative = true;
+ strcat(cs->blacklist, very_word_chars);
+
+ return 1;
+ }
+ }
+ return 0;
+}
static int compile_dot(compiler_state * cs) {
*cs->do_catch = true;
static int compile_escape(const char c,
compiler_state * cs) {
- return escape_1_to_1(c, cs)
- || escape_1_to_N(c, cs)
+ return escape_1_to_1(c, cs)
+ || escape_1_to_N(c, cs)
|| escape_to_negative(c, cs)
- //|| compile_hologram(*s, whitelist)
+ || escape_hologram(c, cs)
;
}
compiler_state * cs) {
assert((range[0] == '[') && "Not a range.");
- char * target_list = (*cs->is_negative) ? cs->blacklist : cs->whitelist;
-
const char * s;
if (range[1] == '^') {
*cs->is_negative = true;
} else {
s = range + 1;
}
+
+ char * target_list = (*cs->is_negative) ? cs->blacklist : cs->whitelist;
+
for (; *s != ']'; s++) {
assert((*s != '\0') && "Unclosed range.");
char c = *s;
}
}
strncat(filtered, blacklist, 1);
- long_continue:;
- }
-}
-
-#define HALT_AND_CATCH_FIRE INT_MIN
-
-void HOOK_ALL( int from,
- const char * const str,
- int to,
- compiler_state * cs) {
-
- int hook_to = (to == HALT_AND_CATCH_FIRE) ? HALT_AND_CATCH_FIRE : ((*cs->state) + to);
-
-
- for (const char * s = str; *s != '\0'; s++) {
- delta_t * delta = malloc(sizeof(delta_t));
- delta->in = *cs->state + from;
- delta->input = *s;
- delta->to = hook_to;
- delta->width = *cs->width;
- vector_push(&cs->regex->delta_table,
- &delta);
+ long_continue:
+ ;
}
}
-void ABSOLUTE_OFFSHOOT(int from,
- int to,
- int width,
- compiler_state * cs) {
- offshoot_t * offshoot = malloc(sizeof(offshoot_t));
- offshoot->in = from;
- offshoot->to = to;
- offshoot->width = width;
- vector_push(&cs->regex->catch_table,
- &offshoot);
-}
-
-void OFFSHOOT(int from,
- int to,
- int width,
- compiler_state * cs) {
- ABSOLUTE_OFFSHOOT(*cs->state + from, *cs->state + to, width, cs);
-}
-
regex_t * regex_compile(const char * const pattern) {
regex_t * regex = (regex_t *)malloc(sizeof(regex_t));
regex->str = strdup(pattern);
int state = 2;
+ // this is plain retarded
bool do_catch;
bool is_negative;
bool do_loop_hook;
char blacklist[64];
compiler_state cs = {
- .do_catch = &do_catch,
- .is_negative = &is_negative,
- .state = &state,
- .width = &width,
- .whitelist = whitelist,
- .blacklist = blacklist,
- .regex = regex,
+ .do_catch = &do_catch,
+ .is_negative = &is_negative,
+ .is_at_the_beginning = true,
+ .do_skip = false,
+ .state = &state,
+ .width = &width,
+ .whitelist = whitelist,
+ .blacklist = blacklist,
+ .regex = regex,
};
for (const char * s = pattern; *s != '\00';) {
blacklist[0] = '\0';
do_catch = false;
is_negative = false;
+ cs.do_skip = false;
+ /**/
do_loop_hook = false;
do_follow_hook = false;
do_loop_shoot = false;
do_follow_shoot = false;
+ /**/
width = 1;
// Translate char
switch (*s) {
case '^': {
- if (s == pattern) {
+ if (cs.is_at_the_beginning) {
ABSOLUTE_OFFSHOOT(0, 2, 0, &cs);
ABSOLUTE_OFFSHOOT(1, HALT_AND_CATCH_FIRE, 0, &cs);
}
if (s != pattern) {
state += 1;
}
- s += 1;
- goto long_continue;
+ cs.do_skip = true;
} break;
case '.': {
compile_dot(&cs);
whitelist[1] = '\0';
} break;
}
-
+
s += 1;
+ if (cs.do_skip) {
+ goto long_continue;
+ }
+
// Compile with quantifier
switch (*s) {
case '=':
filter_blacklist(whitelist, blacklist, filtered_blacklist);
HOOK_ALL(0, filtered_blacklist, HALT_AND_CATCH_FIRE, &cs);
}
+
long_continue:
+ cs.is_at_the_beginning = false;
}
regex->accepting_state = state;
int state,
int width) { // XXX: im pretty sure this is actually redundant and the width should be calculated from string - s
for (const char * s = (string + string_offset); *s != '\00';) {
+ // XXX: this should be a jump search for the instate and then a linear
// delta
+ //int left = 0;
+ //int right = regex->delta_table.element_count - 1;
+ //int i;
+ //while(left <= right) }
for (size_t i = 0; i < regex->delta_table.element_count; i++) {
+ //i = (left + right) / 2;
const delta_t * const delta = *(delta_t**)vector_get(®ex->delta_table, i);
if ((delta->in == state)
&& (delta->input == *s)) {
continue;
}
+ // XXX: the extra catch might not be necessary if we were to compile to a simpler form
+ catch_(regex, &state);
return (state == regex->accepting_state) ? width : false;
}