mirror of
https://git.lain.church/emil/libhl.git
synced 2025-06-07 03:06:43 +00:00
i deserve a blowjob
This commit is contained in:
parent
085af2baad
commit
9b54a3f3e8
@ -15,7 +15,10 @@ typedef struct {
|
|||||||
} regex_t;
|
} regex_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int position;
|
union {
|
||||||
|
int position;
|
||||||
|
const char * _pos_ptr;
|
||||||
|
};
|
||||||
int width;
|
int width;
|
||||||
} match_t;
|
} match_t;
|
||||||
|
|
||||||
@ -25,5 +28,6 @@ extern bool regex_search(const regex_t * const regex, const char * const st
|
|||||||
extern match_t * regex_match(const regex_t * const regex, const char * const string, const bool start_of_string);
|
extern match_t * regex_match(const regex_t * const regex, const char * const string, const bool start_of_string);
|
||||||
|
|
||||||
extern bool is_magic(const char c);
|
extern bool is_magic(const char c);
|
||||||
|
extern bool is_sentinel(const match_t * const match);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -195,7 +195,7 @@ void render_string(const char * const string,
|
|||||||
token_t * t = *(token_t**)vector_get(&token_table,
|
token_t * t = *(token_t**)vector_get(&token_table,
|
||||||
i);
|
i);
|
||||||
match_t * match = regex_match(t->syntax, string, true);
|
match_t * match = regex_match(t->syntax, string, true);
|
||||||
if (match->position == -1) {
|
if (is_sentinel(match)) {
|
||||||
free(match);
|
free(match);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -212,7 +212,7 @@ void render_string(const char * const string,
|
|||||||
max = &sentinel;
|
max = &sentinel;
|
||||||
for (int h = 0; h < rrs; h++) {
|
for (int h = 0; h < rrs; h++) {
|
||||||
result_t * const current_result = r + h;
|
result_t * const current_result = r + h;
|
||||||
for (int j = 0; current_result->m[j].position != -1; j++) {
|
for (int j = 0; !is_sentinel(&(current_result->m[j])); j++) {
|
||||||
if (current_result->m[j].position == (s - string)) {
|
if (current_result->m[j].position == (s - string)) {
|
||||||
if (current_result->m[j].width > max->m->width) {
|
if (current_result->m[j].width > max->m->width) {
|
||||||
current_result->i = j;
|
current_result->i = j;
|
||||||
|
148
source/jeger.c
148
source/jeger.c
@ -8,8 +8,13 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#if DEBUG
|
||||||
|
# include <stdio.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#define JEGER_INIT_STATE 2
|
#define JEGER_SOS_STATE 0
|
||||||
|
#define JEGER_NSOS_STATE 1
|
||||||
|
#define JEGER_INIT_STATE 2
|
||||||
|
|
||||||
// ------------------
|
// ------------------
|
||||||
// ### Char tests ###
|
// ### Char tests ###
|
||||||
@ -40,6 +45,15 @@ bool is_magic(const char c) {
|
|||||||
;
|
;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// -------------------
|
||||||
|
// ### Match tests ###
|
||||||
|
// -------------------
|
||||||
|
bool is_sentinel(const match_t * const match) {
|
||||||
|
return (match->position == -1)
|
||||||
|
&& (match->width == -1)
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
// -----------------
|
// -----------------
|
||||||
// ### Char sets ###
|
// ### Char sets ###
|
||||||
// -----------------
|
// -----------------
|
||||||
@ -52,13 +66,13 @@ bool is_magic(const char c) {
|
|||||||
#define JEGER_CHAR_SET_lower_hex "abcdef"
|
#define JEGER_CHAR_SET_lower_hex "abcdef"
|
||||||
#define JEGER_CHAR_SET_upper_hex "ABCDEF"
|
#define JEGER_CHAR_SET_upper_hex "ABCDEF"
|
||||||
#define JEGER_CHAR_SET_oct_241_to_277 \
|
#define JEGER_CHAR_SET_oct_241_to_277 \
|
||||||
"\241\242\243\244\245" \
|
"\241\242\243\244\245" \
|
||||||
"\246\247\250\251\252" \
|
"\246\247\250\251\252" \
|
||||||
"\253\254\255\256\257" \
|
"\253\254\255\256\257" \
|
||||||
"\260\261\262\263\264" \
|
"\260\261\262\263\264" \
|
||||||
"\265\266\267\270\271" \
|
"\265\266\267\270\271" \
|
||||||
"\272\273\274\275\276" \
|
"\272\273\274\275\276" \
|
||||||
"\277"
|
"\277"
|
||||||
#define JEGER_CHAR_SET_oct_300_to_337 \
|
#define JEGER_CHAR_SET_oct_300_to_337 \
|
||||||
"\300\301\302\303\304" \
|
"\300\301\302\303\304" \
|
||||||
"\305\306\307\310\311" \
|
"\305\306\307\310\311" \
|
||||||
@ -68,13 +82,13 @@ bool is_magic(const char c) {
|
|||||||
"\331\332\333\334\335" \
|
"\331\332\333\334\335" \
|
||||||
"\336\337"
|
"\336\337"
|
||||||
#define JEGER_CHAR_SET_file_extra "/.-_+,#$%~="
|
#define JEGER_CHAR_SET_file_extra "/.-_+,#$%~="
|
||||||
#define JEGER_CHAR_SET_whitespace " \t\v\n"
|
#define JEGER_CHAR_SET_whitespace " " "\t\v\n"
|
||||||
|
|
||||||
static const char JEGER_CHAR_very_word_chars[] =
|
static const char JEGER_CHAR_symbol_chars[] =
|
||||||
JEGER_CHAR_SET_underscore
|
JEGER_CHAR_SET_underscore
|
||||||
JEGER_CHAR_SET_lower
|
JEGER_CHAR_SET_lower
|
||||||
JEGER_CHAR_SET_upper
|
JEGER_CHAR_SET_upper
|
||||||
;
|
;
|
||||||
|
|
||||||
// ----------------------
|
// ----------------------
|
||||||
// ### Internal Types ###
|
// ### Internal Types ###
|
||||||
@ -95,17 +109,19 @@ typedef struct {
|
|||||||
} offshoot_t;
|
} offshoot_t;
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
DO_CATCH = 0x00000001 << 0,
|
DO_CATCH = 0x00000001 << 0,
|
||||||
IS_NEGATIVE = 0x00000001 << 1,
|
IS_NEGATIVE = 0x00000001 << 1,
|
||||||
IS_AT_THE_BEGINNING = 0x00000001 << 2,
|
IS_AT_THE_BEGINNING = 0x00000001 << 2,
|
||||||
FORCE_START_OF_STRING = 0x00000001 << 3,
|
FORCE_START_OF_STRING = 0x00000001 << 3,
|
||||||
INCREMENT_STATE = 0x00000001 << 4,
|
DO_FORBID_START_OF_STRING = 0x00000001 << 4,
|
||||||
|
INCREMENT_STATE = 0x00000001 << 5,
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int flags;
|
int flags;
|
||||||
int state;
|
int state;
|
||||||
int width;
|
int width;
|
||||||
|
int width2;
|
||||||
char * whitelist;
|
char * whitelist;
|
||||||
char * blacklist;
|
char * blacklist;
|
||||||
} compiler_state;
|
} compiler_state;
|
||||||
@ -132,7 +148,7 @@ void HOOK_ALL(const int from,
|
|||||||
.input = *s,
|
.input = *s,
|
||||||
.to = ASSERT_HALT(to),
|
.to = ASSERT_HALT(to),
|
||||||
.pattern_width = cs->width,
|
.pattern_width = cs->width,
|
||||||
.match_width = 1,
|
.match_width = cs->width2,
|
||||||
};
|
};
|
||||||
vector_push(®ex->delta_table,
|
vector_push(®ex->delta_table,
|
||||||
&delta);
|
&delta);
|
||||||
@ -318,9 +334,9 @@ int escape_1_to_N(const char c,
|
|||||||
return sizeof(word_chars)-1;
|
return sizeof(word_chars)-1;
|
||||||
};
|
};
|
||||||
case 'h': {
|
case 'h': {
|
||||||
// #global JEGER_CHAR_very_word_chars
|
// #global JEGER_CHAR_symbol_chars
|
||||||
strcpy(target_list, JEGER_CHAR_very_word_chars);
|
strcpy(target_list, JEGER_CHAR_symbol_chars);
|
||||||
return sizeof(JEGER_CHAR_very_word_chars)-1;
|
return sizeof(JEGER_CHAR_symbol_chars)-1;
|
||||||
};
|
};
|
||||||
case 'a': {
|
case 'a': {
|
||||||
const char alpha_chars[] = JEGER_CHAR_SET_lower
|
const char alpha_chars[] = JEGER_CHAR_SET_lower
|
||||||
@ -346,7 +362,7 @@ int escape_1_to_N(const char c,
|
|||||||
|
|
||||||
static inline
|
static inline
|
||||||
int escape_to_negative(const char c,
|
int escape_to_negative(const char c,
|
||||||
compiler_state * const cs) {
|
compiler_state * const cs) {
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case 'D': {
|
case 'D': {
|
||||||
const char digit_chars[] = JEGER_CHAR_SET_digits;
|
const char digit_chars[] = JEGER_CHAR_SET_digits;
|
||||||
@ -488,6 +504,7 @@ regex_t * regex_compile(const char * const pattern) {
|
|||||||
blacklist[0] = '\0';
|
blacklist[0] = '\0';
|
||||||
cs.flags &= (IS_AT_THE_BEGINNING | FORCE_START_OF_STRING);
|
cs.flags &= (IS_AT_THE_BEGINNING | FORCE_START_OF_STRING);
|
||||||
cs.width = 1;
|
cs.width = 1;
|
||||||
|
cs.width2 = 1;
|
||||||
|
|
||||||
// Translate char
|
// Translate char
|
||||||
switch (*s) {
|
switch (*s) {
|
||||||
@ -503,7 +520,7 @@ regex_t * regex_compile(const char * const pattern) {
|
|||||||
if (compile_escape(*s, &cs)) {
|
if (compile_escape(*s, &cs)) {
|
||||||
s += 1;
|
s += 1;
|
||||||
} else if (is_hologram_escape(*s)) {
|
} else if (is_hologram_escape(*s)) {
|
||||||
;
|
s -= 1;
|
||||||
} else {
|
} else {
|
||||||
assert("Unknown escape.");
|
assert("Unknown escape.");
|
||||||
}
|
}
|
||||||
@ -518,6 +535,12 @@ regex_t * regex_compile(const char * const pattern) {
|
|||||||
} break;
|
} break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Ew */
|
||||||
|
if (*s == '\\'
|
||||||
|
&& is_hologram_escape(*(s+1))) {
|
||||||
|
++s;
|
||||||
|
}
|
||||||
|
|
||||||
// Compile char
|
// Compile char
|
||||||
switch (*s) {
|
switch (*s) {
|
||||||
// holograms
|
// holograms
|
||||||
@ -533,18 +556,47 @@ regex_t * regex_compile(const char * const pattern) {
|
|||||||
s += 1;
|
s += 1;
|
||||||
} break;
|
} break;
|
||||||
case '<': {
|
case '<': {
|
||||||
cs.flags |= IS_NEGATIVE | INCREMENT_STATE;
|
// XXX: make this legible
|
||||||
if (cs.flags & IS_AT_THE_BEGINNING) {
|
if (cs.flags & IS_AT_THE_BEGINNING
|
||||||
ABSOLUTE_OFFSHOOT(0, JEGER_INIT_STATE+1, 0, 0, regex);
|
&& !(cs.flags & DO_CATCH)
|
||||||
|
&& !(cs.flags & IS_NEGATIVE)
|
||||||
|
&& whitelist[0] == '\0') {
|
||||||
|
// ---
|
||||||
|
cs.flags |= INCREMENT_STATE;
|
||||||
|
cs.flags |= DO_FORBID_START_OF_STRING;
|
||||||
|
strcat(whitelist, JEGER_CHAR_symbol_chars);
|
||||||
|
// ---
|
||||||
|
ABSOLUTE_OFFSHOOT( JEGER_SOS_STATE, JEGER_INIT_STATE+1, 0, 0, regex);
|
||||||
|
ABSOLUTE_OFFSHOOT(JEGER_INIT_STATE, JEGER_INIT_STATE+2, 1, 0, regex);
|
||||||
|
HOOK_ALL(0, whitelist, HALT_AND_CATCH_FIRE, &cs, regex);
|
||||||
|
// ---
|
||||||
|
++cs.state;
|
||||||
|
cs.width = 0;
|
||||||
|
cs.width2 = 0;
|
||||||
|
HOOK_ALL(0, whitelist, +1, &cs, regex);
|
||||||
|
cs.width = 1;
|
||||||
|
OFFSHOOT(0, +1, 1, 0, &cs, regex);
|
||||||
|
// ---
|
||||||
|
} else {
|
||||||
|
HOOK_ALL(0, whitelist, +1, &cs, regex);
|
||||||
|
if ((cs.flags & DO_CATCH)
|
||||||
|
|| (cs.flags & IS_NEGATIVE)) {
|
||||||
|
OFFSHOOT(+1, +2, 1, 1, &cs, regex);
|
||||||
|
} else {
|
||||||
|
cs.flags |= INCREMENT_STATE;
|
||||||
|
}
|
||||||
|
OFFSHOOT(0, +1, 1, 0, &cs, regex);
|
||||||
}
|
}
|
||||||
strcat(blacklist, JEGER_CHAR_very_word_chars);
|
cs.flags |= IS_NEGATIVE;
|
||||||
OFFSHOOT(0, 0, 1, 0, &cs, regex);
|
strcat(blacklist, JEGER_CHAR_symbol_chars);
|
||||||
s += 1;
|
s += 1;
|
||||||
} break;
|
} break;
|
||||||
case '>': {
|
case '>': {
|
||||||
|
HOOK_ALL(0, whitelist, +1, &cs, regex);
|
||||||
cs.flags |= IS_NEGATIVE | INCREMENT_STATE;
|
cs.flags |= IS_NEGATIVE | INCREMENT_STATE;
|
||||||
strcat(blacklist, JEGER_CHAR_very_word_chars);
|
strcat(blacklist, JEGER_CHAR_symbol_chars);
|
||||||
OFFSHOOT(0, 1, 0, 0, &cs, regex);
|
OFFSHOOT(+1, +2, 0, 0, &cs, regex);
|
||||||
|
++cs.state;
|
||||||
s += 1;
|
s += 1;
|
||||||
} break;
|
} break;
|
||||||
// quantifiers
|
// quantifiers
|
||||||
@ -605,11 +657,13 @@ regex_t * regex_compile(const char * const pattern) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Init state hookups
|
// Init state hookups
|
||||||
ABSOLUTE_OFFSHOOT(0, JEGER_INIT_STATE, 0, 0, regex);
|
if (!(cs.flags & DO_FORBID_START_OF_STRING)) {
|
||||||
|
ABSOLUTE_OFFSHOOT(JEGER_SOS_STATE, JEGER_INIT_STATE, 0, 0, regex);
|
||||||
|
}
|
||||||
if (cs.flags & FORCE_START_OF_STRING) {
|
if (cs.flags & FORCE_START_OF_STRING) {
|
||||||
ABSOLUTE_OFFSHOOT(1, HALT_AND_CATCH_FIRE, 0, 0, regex);
|
ABSOLUTE_OFFSHOOT(JEGER_NSOS_STATE, HALT_AND_CATCH_FIRE, 0, 0, regex);
|
||||||
} else {
|
} else {
|
||||||
ABSOLUTE_OFFSHOOT(1, JEGER_INIT_STATE, 0, 0, regex);
|
ABSOLUTE_OFFSHOOT(JEGER_NSOS_STATE, JEGER_INIT_STATE, 0, 0, regex);
|
||||||
}
|
}
|
||||||
|
|
||||||
regex->accepting_state = cs.state;
|
regex->accepting_state = cs.state;
|
||||||
@ -682,14 +736,18 @@ bool regex_assert(const regex_t * const regex,
|
|||||||
|
|
||||||
if ((delta->in == state)
|
if ((delta->in == state)
|
||||||
&& (delta->input == *s)) {
|
&& (delta->input == *s)) {
|
||||||
|
bool do_reset = false;
|
||||||
was_found = true;
|
was_found = true;
|
||||||
|
if (!match->_pos_ptr && delta->match_width) {
|
||||||
|
match->_pos_ptr = s;
|
||||||
|
do_reset = true;
|
||||||
|
}
|
||||||
const int r = regex_assert(regex, s + delta->pattern_width, delta->to, match);
|
const int r = regex_assert(regex, s + delta->pattern_width, delta->to, match);
|
||||||
if(r){
|
if(r){
|
||||||
if (match->position == -1) {
|
|
||||||
match->position = (s - string);
|
|
||||||
}
|
|
||||||
match->width += delta->match_width;
|
match->width += delta->match_width;
|
||||||
return r;
|
return r;
|
||||||
|
} else if (do_reset) {
|
||||||
|
match->_pos_ptr = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -729,17 +787,21 @@ match_t * regex_match(const regex_t * const regex,
|
|||||||
// Find all matches
|
// Find all matches
|
||||||
{
|
{
|
||||||
const char * s = string;
|
const char * s = string;
|
||||||
|
int initial_state;
|
||||||
do {
|
do {
|
||||||
int initial_state;
|
|
||||||
initial_state = (int)(!(is_start_of_string && (s == string)));
|
initial_state = (int)(!(is_start_of_string && (s == string)));
|
||||||
|
|
||||||
*match = (match_t){
|
*match = (match_t){
|
||||||
.position = -1,
|
._pos_ptr = NULL,
|
||||||
.width = 0,
|
.width = 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
if (regex_assert(regex, s, initial_state, match)) {
|
if (regex_assert(regex, s, initial_state, match)) {
|
||||||
match->position = (s - string);
|
if (match->_pos_ptr) {
|
||||||
|
match->position = (match->_pos_ptr - string);
|
||||||
|
} else {
|
||||||
|
match->position = (s - string);
|
||||||
|
}
|
||||||
|
|
||||||
vector_push(&matches, match);
|
vector_push(&matches, match);
|
||||||
|
|
||||||
@ -773,7 +835,7 @@ bool regex_search(const regex_t * const regex,
|
|||||||
const char * const string) {
|
const char * const string) {
|
||||||
|
|
||||||
match_t * m = regex_match(regex, string, true);
|
match_t * m = regex_match(regex, string, true);
|
||||||
const bool r = (m->position != -1);
|
const bool r = !is_sentinel(m);
|
||||||
free(m);
|
free(m);
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user