This commit is contained in:
anon 2024-12-06 20:51:16 +01:00
parent d7030bb80e
commit a6235c8a53
15 changed files with 0 additions and 1771 deletions

View File

@ -1,17 +0,0 @@
CXXFLAGS := -fuse-ld=mold -ggdb -Wall -Wextra -Wpedantic
ifeq (${DEBUG}, 1)
CXXFLAGS += -DDEBUG
endif
OUT := regtest
main:
${CXX} ${CXXFLAGS} ${CPPFLAGS} source/main.cpp source/vector.c source/jeger.c -o ${OUT}
run:
${OUT}
test: run
clean:
rm ${OUT}

View File

@ -1,56 +1 @@
# Jëger
A regex engine.
### Syntax
The aim was to follow Vim's regex syntax. Esoteric special characters such as "\zs" are not implemented, however the just is supported.
```C
match_t * regex_match(const regex_t * const regex, const char * const string, const bool start_of_string);
```
Returns a sentinel terminated array of `match_t` objects.
The sentinel object is defined as `(match_t){ .position = -1, .width = -1, };`.
```C
bool is_sentinel(const match_t * const match);
```
This is the function you must check whether a `match_t` is a sentinel or not.
I.e. make this the break condition while looping the results.
| Symbol | Meaning (TODO: fill in) |
| :----: | :---------------------: |
| . | |
| ? | One or zero of the previous token |
| = | Same as ? |
| * | Any number of the previous token |
| + | One or more of the previous token |
| \\> | End of word |
| ^ | Start of string |
| \t | Tab |
| \n | New line |
| \b | |
| \i | |
| \I | |
| \k | |
| \K | |
| \f | |
| \F | |
| \p | |
| \P | |
| \s | |
| \d | Digit char |
| \D | Not digit char |
| \x | Hex char|
| \X | Not hex char |
| \o | Octal char |
| \O | Not octal char |
| \w | Word char|
| \W | Not word char|
| \h | |
| \a | Ascii letter |
| \l | Lowercase ascii letter |
| \L | Not (lowercase ascii letter) |
| \u | Uppercase ascii letter |
| \U | Not (uppercase ascii letter) |
| [\<range\>] | Any of \<range\> |
| [\^\<range\>] | None of \<range\> |

View File

@ -1,24 +0,0 @@
#CPPFLAGS := -DDEBUG
#CXXFLAGS := -O0 -ggdb
CXXFLAGS := -O2
BUILD.cpp := ${CXX} ${CXXFLAGS} ${CPPFLAGS}
build: jeger gnu
jeger: ../source/vector.c ../source/jeger.c jeger_racer.cpp
g++ -shared ../source/vector.c -o vector.so
g++ -shared ../source/jeger.c -o jeger.so
${BUILD.cpp} -I../source/ jeger_racer.cpp vector.so jeger.so -o jeger_racer.out
gnu: gnu_racer.cpp
${BUILD.cpp} gnu_racer.cpp -o gnu_racer.out
test: build
export LD_LIBRARY_PATH=$$(realpath .):$$LD_LIBRARY_PATH
perf stat -r 10000 ./gnu_racer.out
perf stat -r 10000 ./jeger_racer.out
clean:
rm *.so *.out
.PHONY: test

View File

@ -1,95 +0,0 @@
#if DEBUG
# include <assert.h>
#endif
#include <stddef.h>
#include <regex.h>
static
void TEST(const char * const what,
const char * const on,
const bool expect){
regex_t r;
regcomp(&r, what, REG_EXTENDED | REG_NOSUB);
int result = regexec(&r, on, 0, NULL, 0);
regfree(&r);
#if DEBUG
assert((result != -1) && (expect == !((bool)result)));
#endif
}
signed main(){
TEST( R"del(abc)del", "abc", true);
TEST(R"del(efg1)del", "efg1", true);
TEST( R"del(nig)del", "ger", false);
TEST( R"del(ss)del", "sss", true);
TEST( R"del(sss)del", "ss", false);
TEST( R"del(ab+c)del", "abc", true);
TEST(R"del(ef+g1)del", "effffg1", true);
TEST(R"del(efg1+)del", "efg", false);
TEST(R"del(efg1+)del", "efg1", true);
TEST(R"del(efg1+)del", "efg11", true);
TEST( R"del(a+a)del", "aaa", true);
TEST( R"del(a+a)del", "aa", true);
TEST( R"del(a+a)del", "a", false);
TEST( R"del(a+a)del", "aaa", true);
TEST(R"del(a+\+)del", "aaa", false);
TEST( R"del(ab*c)del", "abc", true);
TEST(R"del(ef*g1)del", "effffg1", true);
TEST(R"del(efg1*)del", "efg", true);
TEST(R"del(efg1*)del", "efg1", true);
TEST(R"del(efg1*)del", "efg11", true);
TEST( R"del(ne.)del", "net", true);
TEST( R"del(ne.)del", "ne", false);
TEST(R"del(ne.+)del", "neoo", true);
TEST(R"del(ne.*)del", "neoo", true);
TEST(R"del(ne.*)del", "ne", true);
TEST( R"del(ne.o)del", "neto", true);
TEST(R"del(ne.+o)del", "nettto", true);
TEST(R"del(ne.+o)del", "neo", false);
TEST(R"del(ne.+o)del", "neoo", true);
TEST(R"del(ne.*o)del", "neo", true);
TEST(R"del(ne.)del", "ne\t", true);
TEST(R"del(ne\t)del", "ne", false);
TEST( "ne\t", "ne\t", true); //XXX
TEST(R"del(ne )del", "net", false);
TEST(R"del(ne)del", "ne\t", true);
TEST(R"del(\sa)del", " a", true);
TEST(R"del(\sa)del", " a ", true);
TEST(R"del(\wi)del", "hi", true);
TEST(R"del(\w+)del", "asd", true);
TEST(R"del(\w*)del", "", true);
TEST( R"del([A-Za-z]+)del", "HelloWorld", true);
TEST(R"del([A-Za-z]+g)del", "HelloWorldg", true);
TEST(R"del([A-Za-z]+g)del", "g", false);
TEST(R"del([A-Za-z]*g)del", "g", true);
TEST(R"del([A-Za-z]+1)del", "1", false);
TEST( R"del([^0-9])del", "0", false);
TEST( R"del([^A-Za-z])del", "HelloWorld", false);
TEST(R"del([^A-Za-z]+g)del", "313g", true);
TEST( R"del([^0-9])del", "HelloWorld", true);
TEST( R"del([^a])del", "ba", true);
TEST( R"del(^\^)del", "^^", true);
TEST( R"del(^\^)del", " ^", false);
TEST(R"del(^ \^)del", " ^", true);
TEST( R"del(^a*)del", "asd", true);
TEST( R"del(^)del", "", true);
TEST( R"del(\<test)del", "test", true);
TEST( R"del(test\>)del", "test", true);
TEST( R"del(\<test)del", "atest", false);
TEST( R"del(test\>)del", "testa", false);
TEST(R"del(\<test\>)del", "test", true);
}

View File

@ -1,87 +0,0 @@
#if DEBUG
# include <assert.h>
#endif
#include <jeger.h>
static
void TEST(const char * const what,
const char * const on,
const bool expect){
regex_t * r = regex_compile(what);
bool result = regex_search(r, on);
regex_free(r);
#if DEBUG
assert(expect == result);
#endif
}
signed main(){
TEST( R"del(abc)del", "abc", true);
TEST(R"del(efg1)del", "efg1", true);
TEST( R"del(nig)del", "ger", false);
TEST( R"del(ss)del", "sss", true);
TEST( R"del(sss)del", "ss", false);
TEST( R"del(ab+c)del", "abc", true);
TEST(R"del(ef+g1)del", "effffg1", true);
TEST(R"del(efg1+)del", "efg", false);
TEST(R"del(efg1+)del", "efg1", true);
TEST(R"del(efg1+)del", "efg11", true);
TEST( R"del(a+a)del", "aaa", true);
TEST( R"del(a+a)del", "aa", true);
TEST( R"del(a+a)del", "a", false);
TEST( R"del(a+a)del", "aaa", true);
TEST(R"del(a+\+)del", "aaa", false);
TEST( R"del(ab*c)del", "abc", true);
TEST(R"del(ef*g1)del", "effffg1", true);
TEST(R"del(efg1*)del", "efg", true);
TEST(R"del(efg1*)del", "efg1", true);
TEST(R"del(efg1*)del", "efg11", true);
TEST( R"del(ne.)del", "net", true);
TEST( R"del(ne.)del", "ne", false);
TEST(R"del(ne.+)del", "neoo", true);
TEST(R"del(ne.*)del", "neoo", true);
TEST(R"del(ne.*)del", "ne", true);
TEST( R"del(ne.o)del", "neto", true);
TEST(R"del(ne.+o)del", "nettto", true);
TEST(R"del(ne.+o)del", "neo", false);
TEST(R"del(ne.+o)del", "neoo", true);
TEST(R"del(ne.*o)del", "neo", true);
TEST(R"del(ne.)del", "ne\t", true);
TEST(R"del(ne\t)del", "ne", false);
TEST(R"del(ne\t)del", "ne\t", true);
TEST(R"del(ne\t)del", "net", false);
TEST(R"del(ne)del", "ne\t", true);
TEST(R"del(\sa)del", " a", true);
TEST(R"del(\sa)del", " a ", true);
TEST(R"del(\wi)del", "hi", true);
TEST(R"del(\w+)del", "asd", true);
TEST(R"del(\w*)del", "", true);
TEST( R"del([A-Za-z]+)del", "HelloWorld", true);
TEST(R"del([A-Za-z]+g)del", "HelloWorldg", true);
TEST(R"del([A-Za-z]+g)del", "g", false);
TEST(R"del([A-Za-z]*g)del", "g", true);
TEST(R"del([A-Za-z]+1)del", "1", false);
TEST( R"del(^\^)del", "^^", true);
TEST( R"del(^\^)del", " ^", false);
TEST(R"del(^ \^)del", " ^", true);
TEST( R"del(^a*)del", "asd", true);
TEST( R"del(^)del", "", true);
TEST( R"del(\<test)del", "test", true);
TEST( R"del(test\>)del", "test", true);
TEST( R"del(\<test)del", "atest", false);
TEST( R"del(test\>)del", "testa", false);
TEST(R"del(\<test\>)del", "test", true);
}

View File

@ -1,55 +0,0 @@
class RegexPrinter:
def __init__(self, val):
self.val = val
def to_string(self):
# Init
s = "{"
# Regular shit
s += "accepting_state = " + str(self.val['accepting_state']) + ", str = " + str(self.val['str']) + ",\n"
# Delta
delta_t_ptr_ptr_t = gdb.lookup_type("delta_t").pointer().pointer()
dt = self.val['delta_table']
s += "delta_table = {\n"
d0 = 0
for i in range(0, dt['element_count']):
s += "\t"
s += (
str(
(
dt['data'].cast(delta_t_ptr_ptr_t)
+
i
).dereference().dereference()
)
)
s += ",\n"
s = s[:-2]
s += "\n },\n"
# Offshoot
offshoot_t_ptr_ptr = gdb.lookup_type("offshoot_t").pointer().pointer()
dt = self.val['catch_table']
s += "offshoot_table = { \n"
for i in range(0, dt['element_count']):
s += "\t"
s += (
str(
(
dt['data'].cast(offshoot_t_ptr_ptr)
+
i
).dereference().dereference()
)
)
s += ",\n"
s = s[:-2]
s += "\n }\n"
# Closour
s += "}"
return s
def regex_lookup(val):
if str(val.type) == 'regex_t' or str(val.type) == 'const regex_t':
return RegexPrinter(val)
return None
gdb.pretty_printers.append(regex_lookup)

View File

@ -1,8 +0,0 @@
define regspect
delete
break regex_match
ignore 1 $arg0 - 1
run
tui refresh
print * regex
end

View File

@ -1,125 +0,0 @@
# Abstraction
+---------------------+
| |
| |
| State register |
| |
| |
+---------------------+
+---------------------------------+
| State transition table |
+---------------------------------+
+---------------------------------+
| Fallback transition table |
+---------------------------------+
---
State transition table look up
+ success --> continue
+ fail --> look up fallback table
* success --> continue
* fail --> return
EOS ? --> look up fallback table
+ success --> is 0 width?
* success --> continue
* fail --> return
+ fail --> return
---
# Legend
| | Start | End |
| :--: | :---: | :-: |
| Line | SOS | EOS |
| Word | SOW | EOW |
##### HALT\_AND\_CATCH\_FIRE
H&C is a special state signalling that we have hit a dead end.
The reason why need it and we cant just instanly quick is backtracking.
---
##### [^example]
This is a negative range.
```
let myNegativeRange = {'e', 'x', 'a', 'm', 'p', 'l'}
```
None of the characters in `$myNegativeRange` must be accepted.
The way this is a compiled is that we first hook all chars in `$myNegativeRange` to H&C,
then define an OFFSHOOT of width 1.
Put differently:
if we read something illegal we abort this branch,
if what we read was not illegal, we deduct that it must have been legal and we continue.
Handling "negatives" this way allows us to be "alphabet agnostic" in a sense.
Many implementations will presume ASCII, with its fixed 7/8 bit width
and create look up tables.
Which is fast and cute, but this strategy becomes a giant memory hog
if we ever wanted to use it on, say UTF-8 (from 256 te/c (table entries per char) to 4'294'967'295 te/c).
#### .
This is the dot operator.
It matches any 1 char.
Similar how negative ranges are implemented,
it takes advantage of the fallback table.
It simply ignores the state transition table and rather unconditionally hooks itself to the next state.
#### ^
This is the carrot operator.
It matches the SOS.
For explanation purposes multilining (match '\n') is irrelevant.
That behaves just like a literal.
What is more interesting is how SOS is recognized.
Since `regex_assert()` is recursive the current state is continuesly passed along,
however at out first frame, it's not just always 0.
`regex_match()` decides depending on the current position of the string.
Basically we have the first 2 states (0, 1) reserved and always missing from the state transmission table.
+ 0 - SOS
+ 1 - !SOS
Normally both are _hooked_ to state 2,
and we pretend nothing has ever happened.
But when carrot operator is compiled, it sets a special compiler flag FORCE\_START\_OF\_STRING,
which forbids the hooking of state 1 to 2,
therefor when `regex_match()` calls from, say position 2,
it passes in 1 as the starting state,
no state transition table entry will be found since thats forbidden to begin with,
no jumps are found(!),
the machine checks whether the current state (1) is the accepting state (>=2)
and finally returns failiour.
#### \<
This is the SOW operator.
SOW must match:
```
^myword
[^\h]myword
```
Not only that, this combination is key,
either it has to be the SOS
or there has to be at least something which is not a symbol char.
With out the last condition "eexample" would match "\\\<exaple\\\>"
as the iteration of `regex_match()` reaches "example".
From a more practical perspective:
``` C
\<myword\>
// Must match
"myword"
" myword"
```

View File

@ -1,7 +0,0 @@
[ ] wchar\_t support
[ ] UTF-8 support
[ ] arbitrary memory support (this probably covers UTF-8 support)
[ ] documentation thats not shit
[ ] HOOK\_ALL / OFFSHOOT width parameter inconsistency
[ ] nesting is going real wild
[ ] states could be optimized by chopping of the init padding if not flagged required

View File

@ -1,863 +0,0 @@
#ifdef __cplusplus
# pragma GCC diagnostic ignored "-Wc++20-extensions"
#endif
#include "jeger.h"
#include <assert.h>
#include <string.h>
#include <limits.h>
#include <stdlib.h>
#if DEBUG
# include <stdio.h>
#endif
#define JEGER_SOS_STATE 0
#define JEGER_NSOS_STATE 1
#define JEGER_INIT_STATE 2
// ------------------
// ### Char tests ###
// ------------------
static inline
bool mystrchr(const char * const str, const char c){
for (const char * s = str; *s != '\00'; s++) {
if (*s == c) {
return true;
}
}
return false;
}
static inline
bool is_quantifier(const char c) {
return mystrchr("=?+*", c);
}
static inline
bool is_hologram_escape(const char c) {
return mystrchr("<>", c);
}
bool is_magic(const char c) {
return is_quantifier(c)
|| mystrchr("\\[].^", c)
;
}
// -------------------
// ### Match tests ###
// -------------------
bool is_sentinel(const match_t * const match) {
return (match->position == -1)
&& (match->width == -1)
;
}
// -----------------
// ### Char sets ###
// -----------------
#define JEGER_CHAR_SET_at "@"
#define JEGER_CHAR_SET_underscore "_"
#define JEGER_CHAR_SET_lower "abcdefghijklmnopqrstuwxyz"
#define JEGER_CHAR_SET_upper "ABCDEFGHIJKLMNOPQRSTUWXYZ"
#define JEGER_CHAR_SET_digits "0123456789"
#define JEGER_CHAR_SET_octal_digits "01234567"
#define JEGER_CHAR_SET_lower_hex "abcdef"
#define JEGER_CHAR_SET_upper_hex "ABCDEF"
#define JEGER_CHAR_SET_oct_241_to_277 \
"\241\242\243\244\245" \
"\246\247\250\251\252" \
"\253\254\255\256\257" \
"\260\261\262\263\264" \
"\265\266\267\270\271" \
"\272\273\274\275\276" \
"\277"
#define JEGER_CHAR_SET_oct_300_to_337 \
"\300\301\302\303\304" \
"\305\306\307\310\311" \
"\312\313\314\315\316" \
"\317\320\321\322\323" \
"\324\325\326\327\330" \
"\331\332\333\334\335" \
"\336\337"
#define JEGER_CHAR_SET_file_extra "/.-_+,#$%~="
#define JEGER_CHAR_SET_whitespace " " "\t\v\n"
static const char JEGER_CHAR_symbol_chars[] =
JEGER_CHAR_SET_underscore
JEGER_CHAR_SET_lower
JEGER_CHAR_SET_upper
;
// ----------------------
// ### Internal Types ###
// ----------------------
typedef struct {
int in;
char input;
int to;
int pattern_width;
int match_width;
} delta_t;
typedef struct {
int in;
int to;
int pattern_width;
int match_width;
} offshoot_t;
enum {
DO_CATCH = 0x00000001 << 0,
IS_NEGATIVE = 0x00000001 << 1,
IS_AT_THE_BEGINNING = 0x00000001 << 2,
FORCE_START_OF_STRING = 0x00000001 << 3,
DO_FORBID_START_OF_STRING = 0x00000001 << 4,
INCREMENT_STATE = 0x00000001 << 5,
};
typedef struct {
int flags;
int state;
int width;
int match_width;
char * whitelist;
char * blacklist;
} compiler_state;
// ----------------------------------
// ### Regex creation/destruction ###
// ----------------------------------
enum {
ASSERTION_FAILURE = 0,
ASSERTION_SUCCESS = 1,
HALT_AND_CATCH_FIRE = INT_MIN,
};
#define ASSERT_HALT(a) ((a == HALT_AND_CATCH_FIRE) ? HALT_AND_CATCH_FIRE : (cs->state + a))
static
void HOOK_ALL(const int from,
const char * const str,
const int to,
const compiler_state * const cs,
regex_t * regex) {
for (const char * s = str; *s != '\0'; s++) {
delta_t * delta = (delta_t *)malloc(sizeof(delta_t));
*delta = (delta_t){
.in = cs->state + from,
.input = *s,
.to = ASSERT_HALT(to),
.pattern_width = cs->width,
.match_width = cs->match_width,
};
vector_push(&regex->delta_table,
&delta);
}
}
static
void ABSOLUTE_OFFSHOOT(const int from,
const int to,
const int width,
const int match_width,
regex_t * regex) {
offshoot_t * offshoot = (offshoot_t *)malloc(sizeof(offshoot_t));
*offshoot = (offshoot_t){
.in = from,
.to = to,
.pattern_width = width,
.match_width = match_width,
};
vector_push(&regex->catch_table,
&offshoot);
}
static
void OFFSHOOT(const int from,
const int to,
const int width,
const int match_width,
const compiler_state * cs,
regex_t * regex) {
ABSOLUTE_OFFSHOOT(cs->state + from, ASSERT_HALT(to), width, match_width, regex);
}
static
int escape_1_to_1(const char c,
const compiler_state * const cs) {
char * target_list = (cs->flags & IS_NEGATIVE) ? cs->blacklist : cs->whitelist;
switch (c) {
case 't': {
strcat(target_list, "\t");
} return 1;
case 'n': {
strcat(target_list, "\n");
} return 1;
case 'r': {
strcat(target_list, "\r");
} return 1;
case 'b': {
strcat(target_list, "\b");
} return 1;
case '[': {
strcat(target_list, "[");
} return 1;
case ']': {
strcat(target_list, "]");
} return 1;
case '.': {
strcat(target_list, ".");
} return 1;
case '^': {
strcat(target_list, "^");
} return 1;
case '=': {
strcat(target_list, "=");
} return 1;
case '?': {
strcat(target_list, "?");
} return 1;
case '+': {
strcat(target_list, "+");
} return 1;
case '*': {
strcat(target_list, "*");
} return 1;
case '\\': {
strcat(target_list, "\\");
} return 1;
}
return 0;
}
static
int escape_1_to_N(const char c,
const compiler_state * const cs) {
char * target_list = (cs->flags & IS_NEGATIVE) ? cs->blacklist : cs->whitelist;
switch(c) {
case 'i': {
const char identifier_chars[] = JEGER_CHAR_SET_at
JEGER_CHAR_SET_underscore
JEGER_CHAR_SET_digits
JEGER_CHAR_SET_oct_300_to_337
;
strcpy(target_list, identifier_chars);
return sizeof(identifier_chars)-1;
};
case 'I': {
const char identifier_chars[] = JEGER_CHAR_SET_at
JEGER_CHAR_SET_underscore
JEGER_CHAR_SET_oct_300_to_337
;
strcpy(target_list, identifier_chars);
return sizeof(identifier_chars)-1;
};
case 'k': {
const char keyword_chars[] = JEGER_CHAR_SET_at
JEGER_CHAR_SET_underscore
JEGER_CHAR_SET_digits
JEGER_CHAR_SET_oct_300_to_337
;
strcpy(target_list, keyword_chars);
return sizeof(keyword_chars)-1;
};
case 'K': {
const char keyword_chars[] = JEGER_CHAR_SET_at
JEGER_CHAR_SET_underscore
JEGER_CHAR_SET_oct_300_to_337
;
strcpy(target_list, keyword_chars);
return sizeof(keyword_chars)-1;
};
case 'f': {
const char filename_chars[] = JEGER_CHAR_SET_at
JEGER_CHAR_SET_digits
JEGER_CHAR_SET_file_extra
;
strcpy(target_list, filename_chars);
return sizeof(filename_chars)-1;
};
case 'F': {
const char filename_chars[] = JEGER_CHAR_SET_at
JEGER_CHAR_SET_file_extra
;
strcpy(target_list, filename_chars);
return sizeof(filename_chars)-1;
};
case 'p': {
const char printable_chars[] = JEGER_CHAR_SET_at
JEGER_CHAR_SET_oct_241_to_277
JEGER_CHAR_SET_oct_300_to_337
;
strcpy(target_list, printable_chars);
return sizeof(printable_chars)-1;
};
case 'P': {
const char printable_chars[] = JEGER_CHAR_SET_at
JEGER_CHAR_SET_oct_241_to_277
JEGER_CHAR_SET_oct_300_to_337
;
strcpy(target_list, printable_chars);
return sizeof(printable_chars)-1;
};
case 's': {
const char whitespace_chars[] = JEGER_CHAR_SET_whitespace;
strcpy(target_list, whitespace_chars);
return sizeof(whitespace_chars)-1;
};
case 'd': {
const char digit_chars[] = JEGER_CHAR_SET_digits;
strcpy(target_list, digit_chars);
return sizeof(digit_chars)-1;
};
case 'x': {
const char hex_chars[] = JEGER_CHAR_SET_digits
JEGER_CHAR_SET_lower_hex
JEGER_CHAR_SET_upper_hex
;
strcpy(target_list, hex_chars);
return sizeof(hex_chars)-1;
};
case 'o': {
const char oct_chars[] = JEGER_CHAR_SET_octal_digits;
strcpy(target_list, oct_chars);
return sizeof(oct_chars)-1;
};
case 'w': {
const char word_chars[] = JEGER_CHAR_SET_underscore
JEGER_CHAR_SET_digits
JEGER_CHAR_SET_lower
JEGER_CHAR_SET_upper
;
strcpy(target_list, word_chars);
return sizeof(word_chars)-1;
};
case 'h': {
// #global JEGER_CHAR_symbol_chars
strcpy(target_list, JEGER_CHAR_symbol_chars);
return sizeof(JEGER_CHAR_symbol_chars)-1;
};
case 'a': {
const char alpha_chars[] = JEGER_CHAR_SET_lower
JEGER_CHAR_SET_upper
;
strcpy(target_list, alpha_chars);
return sizeof(alpha_chars)-1;
};
case 'l': {
const char lower_alpha_chars[] = JEGER_CHAR_SET_lower;
strcpy(target_list, lower_alpha_chars);
return sizeof(lower_alpha_chars)-1;
};
case 'u': {
const char upper_alpha_chars[] = JEGER_CHAR_SET_upper;
strcpy(target_list, upper_alpha_chars);
return sizeof(upper_alpha_chars)-1;
};
}
return 0;
}
static inline
int escape_to_negative(const char c,
compiler_state * const cs) {
switch (c) {
case 'D': {
const char digit_chars[] = JEGER_CHAR_SET_digits;
strcpy(cs->blacklist, digit_chars);
cs->flags |= IS_NEGATIVE;
return sizeof(digit_chars)-1;
};
case 'X': {
const char hex_chars[] = JEGER_CHAR_SET_digits
JEGER_CHAR_SET_lower_hex
JEGER_CHAR_SET_upper_hex
;
strcpy(cs->blacklist, hex_chars);
cs->flags |= IS_NEGATIVE;
return sizeof(hex_chars)-1;
};
case 'O': {
const char oct_chars[] = JEGER_CHAR_SET_octal_digits;
strcpy(cs->blacklist, oct_chars);
cs->flags |= IS_NEGATIVE;
return sizeof(oct_chars)-1;
};
case 'W': {
const char word_chars[] = JEGER_CHAR_SET_underscore
JEGER_CHAR_SET_digits
JEGER_CHAR_SET_lower
JEGER_CHAR_SET_upper
;
strcpy(cs->blacklist, word_chars);
cs->flags |= IS_NEGATIVE;
return sizeof(word_chars)-1;
};
case 'L': {
const char lower_alpha_chars[] = JEGER_CHAR_SET_lower;
strcpy(cs->blacklist, lower_alpha_chars);
cs->flags |= IS_NEGATIVE;
return sizeof(lower_alpha_chars)-1;
};
case 'U': {
const char upper_alpha_chars[] = JEGER_CHAR_SET_upper;
strcpy(cs->blacklist, upper_alpha_chars);
cs->flags |= IS_NEGATIVE;
return sizeof(upper_alpha_chars)-1;
};
}
return 0;
}
static inline
int compile_dot(compiler_state * const cs) {
cs->flags |= DO_CATCH;
return true;
}
static inline
int compile_escape(const char c,
compiler_state * const cs) {
return escape_1_to_1(c, cs)
|| escape_1_to_N(c, cs)
|| escape_to_negative(c, cs)
;
}
static
int compile_range(const char * const range,
compiler_state * const cs) {
assert((range[0] == '[') && "Not a range.");
const char * s;
if (range[1] == '^') {
cs->flags |= IS_NEGATIVE;
s = range + 2;
} else {
s = range + 1;
}
char * target_list = (cs->flags & IS_NEGATIVE) ? cs->blacklist : cs->whitelist;
for (; *s != ']'; s++) {
assert((*s != '\0') && "Unclosed range.");
char c = *s;
if (c == '\\') {
s += 1;
assert(compile_escape(*s, cs) && "Unknown escape.");
} else if (*(s+1) == '-') {
char end = *(s+2);
assert((c < end) && "Endless range.");
for (char cc = c; cc < end+1; cc++) {
strncat(target_list, &cc, 1);
strncat(target_list, "\0", 1);
}
s += 2;
} else {
strncat(target_list, &c, 1);
}
}
return ((s - range) + 1);
}
static
void filter_blacklist(const char * whitelist,
const char * blacklist,
char * filtered) {
for (; *blacklist != '\0'; blacklist++) {
for (; *whitelist != '\0'; whitelist++) {
if (*blacklist == *whitelist) {
goto long_continue;
}
}
strncat(filtered, blacklist, 1);
long_continue:
;
}
}
regex_t * regex_compile(const char * const pattern) {
regex_t * regex = (regex_t *)malloc(sizeof(regex_t));
regex->str = strdup(pattern);
vector_init(&regex->delta_table, sizeof(delta_t*), 0UL);
vector_init(&regex->catch_table, sizeof(offshoot_t*), 0UL);
char whitelist[64];
char blacklist[64];
static const int REGEX_PRESERVABLE_FLAGS = IS_AT_THE_BEGINNING
| FORCE_START_OF_STRING
| DO_FORBID_START_OF_STRING
;
compiler_state cs = {
.flags = IS_AT_THE_BEGINNING,
.state = JEGER_INIT_STATE,
.whitelist = whitelist,
.blacklist = blacklist,
};
for (const char * s = pattern; *s != '\00';) {
assert(!is_quantifier(*s) && "Pattern starts with quantifier.");
// Reset the compiler
whitelist[0] = '\0';
blacklist[0] = '\0';
cs.flags &= REGEX_PRESERVABLE_FLAGS;
cs.width = 1;
cs.match_width = 1;
// Translate char
switch (*s) {
case '^': {
;
} break;
case '.': {
compile_dot(&cs);
s += 1;
} break;
case '\\': {
s += 1;
if (compile_escape(*s, &cs)) {
s += 1;
} else if (is_hologram_escape(*s)) {
s -= 1;
} else {
assert("Unknown escape.");
}
} break;
case '[': {
s += compile_range(s, &cs);
} break;
default: { // Literal
whitelist[0] = *s;
whitelist[1] = '\0';
s += 1;
} break;
}
// Compile char
switch (*s) {
// holograms
case '^': {
whitelist[0] = '\n';
whitelist[1] = '\0';
HOOK_ALL(0, whitelist, 0, &cs, regex);
if (cs.flags & IS_AT_THE_BEGINNING) {
cs.flags |= FORCE_START_OF_STRING;
} else {
cs.flags |= INCREMENT_STATE;
}
s += 1;
} break;
case '\\': {
if(is_hologram_escape(*(s+1))) {
++s;
} else {
goto DEFAULT;
}
switch(*s){
case '<': {
// XXX: make this legible
if (cs.flags & IS_AT_THE_BEGINNING
&& !(cs.flags & DO_CATCH)
&& !(cs.flags & IS_NEGATIVE)
&& whitelist[0] == '\0') {
// ---
cs.flags |= INCREMENT_STATE;
cs.flags |= DO_FORBID_START_OF_STRING;
strcat(whitelist, JEGER_CHAR_symbol_chars);
// ---
ABSOLUTE_OFFSHOOT( JEGER_SOS_STATE, JEGER_INIT_STATE+1, 0, 0, regex);
ABSOLUTE_OFFSHOOT(JEGER_INIT_STATE, JEGER_INIT_STATE+2, 1, 0, regex);
HOOK_ALL(0, whitelist, HALT_AND_CATCH_FIRE, &cs, regex);
// ---
++cs.state;
cs.width = 0;
cs.match_width = 0;
HOOK_ALL(0, whitelist, +1, &cs, regex);
cs.width = 1;
OFFSHOOT(0, +1, 1, 0, &cs, regex);
// ---
} else {
HOOK_ALL(0, whitelist, +1, &cs, regex);
if ((cs.flags & DO_CATCH)
|| (cs.flags & IS_NEGATIVE)) {
OFFSHOOT(+1, +2, 1, 1, &cs, regex);
} else {
cs.flags |= INCREMENT_STATE;
}
OFFSHOOT(0, +1, 1, 0, &cs, regex);
}
cs.flags |= IS_NEGATIVE;
strcat(blacklist, JEGER_CHAR_symbol_chars);
s += 1;
} break;
case '>': {
HOOK_ALL(0, whitelist, +1, &cs, regex);
cs.flags |= IS_NEGATIVE | INCREMENT_STATE;
strcat(blacklist, JEGER_CHAR_symbol_chars);
OFFSHOOT(+1, +2, 0, 0, &cs, regex);
++cs.state;
s += 1;
} break;
}
} break;
// quantifiers
case '=':
case '?': {
HOOK_ALL(0, whitelist, +1, &cs, regex);
if ((cs.flags & DO_CATCH)
|| (cs.flags & IS_NEGATIVE)) {
OFFSHOOT(0, +1, 1, 1, &cs, regex);
}
s += 1;
} break;
case '*': {
HOOK_ALL(0, whitelist, 0, &cs, regex);
if ((cs.flags & DO_CATCH)
|| (cs.flags & IS_NEGATIVE)) {
OFFSHOOT(0, 0, 1, 1, &cs, regex);
}
s += 1;
} break;
case '+': {
cs.flags |= INCREMENT_STATE;
HOOK_ALL(0, whitelist, +1, &cs, regex);
if ((cs.flags & DO_CATCH)
|| (cs.flags & IS_NEGATIVE)) {
OFFSHOOT(0, +1, 1, 1, &cs, regex);
}
HOOK_ALL(+1, whitelist, +1, &cs, regex);
if ((cs.flags & DO_CATCH)
|| (cs.flags & IS_NEGATIVE)) {
OFFSHOOT(+1, +1, 1, 1, &cs, regex);
}
s += 1;
} break;
DEFAULT:
default: { // Literal
cs.flags |= INCREMENT_STATE;
HOOK_ALL(0, whitelist, +1, &cs, regex);
if ((cs.flags & DO_CATCH)
|| (cs.flags & IS_NEGATIVE)) {
OFFSHOOT(0, +1, 1, 1, &cs, regex);
}
} break;
}
// Compile blacklist
if (*blacklist) {
char filtered_blacklist[64];
filtered_blacklist[0] = '\0';
filter_blacklist(whitelist, blacklist, filtered_blacklist);
HOOK_ALL(0, filtered_blacklist, HALT_AND_CATCH_FIRE, &cs, regex);
}
if (cs.flags & INCREMENT_STATE) {
++cs.state;
}
// Purge SOS flag
cs.flags &= (~IS_AT_THE_BEGINNING);
}
// Init state hookups
if (!(cs.flags & DO_FORBID_START_OF_STRING)) {
ABSOLUTE_OFFSHOOT(JEGER_SOS_STATE, JEGER_INIT_STATE, 0, 0, regex);
}
if (cs.flags & FORCE_START_OF_STRING) {
ABSOLUTE_OFFSHOOT(JEGER_NSOS_STATE, HALT_AND_CATCH_FIRE, 0, 0, regex);
} else {
ABSOLUTE_OFFSHOOT(JEGER_NSOS_STATE, JEGER_INIT_STATE, 0, 0, regex);
}
regex->accepting_state = cs.state;
return regex;
}
int regex_free(regex_t * const regex) {
free(regex->str);
vector_free(&regex->delta_table);
vector_free(&regex->catch_table);
free(regex);
return 0;
}
// -----------------
// ### Searching ###
// -----------------
static
const offshoot_t * catch_table_lookup(const regex_t * const regex,
const int * const state) {
for (size_t i = 0; i < regex->catch_table.element_count; i++){
const offshoot_t * const offshoot = *(offshoot_t**)vector_get(&regex->catch_table, i);
if (offshoot->in == *state) {
return offshoot;
}
}
return NULL;
}
static
int regex_assert(const regex_t * const regex,
const char * const string,
int state,
match_t * const match) {
if (state == HALT_AND_CATCH_FIRE) {
return HALT_AND_CATCH_FIRE;
}
bool last_stand = false;
bool was_found;
const char * s = string;
LOOP: {
was_found = false;
if (*s == '\0') {
last_stand = true;
goto PERFORM_CATCH_LOOKUP;
}
// Jump search for the correct state
const int jump = 10;
size_t i = jump;
while (i < regex->delta_table.element_count) {
const delta_t * const delta = *(delta_t**)vector_get(&regex->delta_table, i);
if (delta->in >= state) {
break;
}
i += jump;
}
i -= jump;
// Linear search finish up
for (; i < regex->delta_table.element_count; i++) {
const delta_t * const delta = *(delta_t**)vector_get(&regex->delta_table, i);
if (delta->in > state) {
break;
}
if ((delta->in == state)
&& (delta->input == *s)) {
bool do_reset = false;
was_found = true;
if (!match->_pos_ptr && delta->match_width) {
match->_pos_ptr = s;
do_reset = true;
}
const int r = regex_assert(regex, s + delta->pattern_width, delta->to, match);
if(r == ASSERTION_SUCCESS){
match->width += delta->match_width;
return r;
} else {
if (r == ASSERTION_FAILURE) {
was_found = false;
}
if (do_reset) {
match->_pos_ptr = NULL;
}
}
}
}
}
PERFORM_CATCH_LOOKUP: {
if (!was_found) {
const offshoot_t * const my_catch = catch_table_lookup(regex, &state);
if (my_catch && (!my_catch->pattern_width || !last_stand)) {
state = my_catch->to;
s += my_catch->pattern_width;
match->width += my_catch->match_width;
goto LOOP;
}
}
}
return ((state == regex->accepting_state) ? ASSERTION_SUCCESS : ASSERTION_FAILURE);
}
match_t * regex_match(const regex_t * const regex,
const char * const string,
const bool is_start_of_string) {
vector_t matches;
vector_init(&matches, sizeof(match_t), 0);
match_t * match = (match_t *)malloc(sizeof(match_t));
/* Non-existent regex does not match anything.
* Not to be confused with an empty regex.
*/
if (regex == NULL) {
goto FINISH;
}
// Find all matches
{
const char * s = string;
int initial_state;
do {
initial_state = (int)(!(is_start_of_string && (s == string)));
*match = (match_t){
._pos_ptr = NULL,
.width = 0,
};
if (regex_assert(regex, s, initial_state, match) == 1) {
//printf("true: %s\n", s);
if (match->_pos_ptr) {
match->position = (match->_pos_ptr - string);
} else {
match->position = (s - string);
}
vector_push(&matches, match);
s += ((match->width > 0) ? match->width : 1);
match = (match_t *)malloc(sizeof(match_t));
} else {
//printf("false: %s\n", s);
++s;
}
} while (*s != '\0');
}
FINISH:
// Insert sentinel
*match = (match_t){
.position = -1,
.width = -1,
};
vector_push(&matches, match);
// Hide internal vector usage
const size_t data_size = matches.element_size * matches.element_count;
match_t * r = (match_t *)malloc(data_size);
memcpy(r, matches.data, data_size);
vector_free(&matches);
return r;
}
bool regex_search(const regex_t * const regex,
const char * const string) {
match_t * m = regex_match(regex, string, true);
const bool r = !is_sentinel(m);
free(m);
return r;
}

View File

@ -1,33 +0,0 @@
#ifndef JEGER_H
#define JEGER_H
#include <stdbool.h>
#include "vector.h"
extern bool is_case_on;
typedef struct {
int accepting_state;
char * str;
vector_t delta_table; // <delta_t>
vector_t catch_table; // <offshoot_t>
} regex_t;
typedef struct {
union {
int position;
const char * _pos_ptr;
};
int width;
} match_t;
extern regex_t * regex_compile(const char * const pattern);
extern int regex_free(regex_t * const regex);
extern bool regex_search(const regex_t * const regex, const char * const string);
extern match_t * regex_match(const regex_t * const regex, const char * const string, const bool start_of_string);
extern bool is_magic(const char c);
extern bool is_sentinel(const match_t * const match);
#endif

View File

@ -1,204 +0,0 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "test.hpp"
signed main() {
TEST( R"del(abc)del", "abc", true);
TEST(R"del(efg1)del", "efg1", true);
TEST( R"del(nig)del", "ger", false);
TEST( R"del(ss)del", "sss", true);
TEST( R"del(sss)del", "ss", false);
puts("");
TEST( R"del(ab+c)del", "abc", true);
TEST(R"del(ef+g1)del", "effffg1", true);
TEST(R"del(efg1+)del", "efg", false);
TEST(R"del(efg1+)del", "efg1", true);
TEST(R"del(efg1+)del", "efg11", true);
puts("");
TEST( R"del(a+a)del", "aaa", true);
TEST( R"del(a+a)del", "aa", true);
TEST( R"del(a+a)del", "a", false);
TEST( R"del(a+a)del", "aaa", true);
TEST(R"del(a+\+)del", "aaa", false);
puts("");
TEST( R"del(ab*c)del", "abc", true);
TEST(R"del(ef*g1)del", "effffg1", true);
TEST(R"del(efg1*)del", "efg", true);
TEST(R"del(efg1*)del", "efg1", true);
TEST(R"del(efg1*)del", "efg11", true);
puts("");
TEST( R"del(ne.)del", "net", true);
TEST( R"del(ne.)del", "ne", false);
TEST(R"del(ne.+)del", "neoo", true);
TEST(R"del(ne.*)del", "neoo", true);
TEST(R"del(ne.*)del", "ne", true);
puts("");
TEST( R"del(ne.o)del", "neto", true);
TEST(R"del(ne.+o)del", "nettto", true);
TEST(R"del(ne.+o)del", "neo", false);
TEST(R"del(ne.+o)del", "neoo", true);
TEST(R"del(ne.*o)del", "neo", true);
puts("");
TEST(R"del(ne.)del", "ne\t", true);
TEST(R"del(ne\t)del", "ne", false);
TEST(R"del(ne\t)del", "ne\t", true);
TEST(R"del(ne\t)del", "net", false);
TEST(R"del(ne)del", "ne\t", true);
puts("");
TEST(R"del(\sa)del", " a", true);
TEST(R"del(\sa)del", " a ", true);
TEST(R"del(\wi)del", "hi", true);
TEST(R"del(\w+)del", "asd", true);
TEST(R"del(\w*)del", "", true);
puts("");
TEST( R"del([A-Za-z]+)del", "HelloWorld", true);
TEST(R"del([A-Za-z]+g)del", "HelloWorldg", true);
TEST(R"del([A-Za-z]+g)del", "g", false);
TEST(R"del([A-Za-z]*g)del", "g", true);
TEST(R"del([A-Za-z]+1)del", "1", false);
puts("");
TEST( R"del([^0-9])del", "0", false);
TEST( R"del([^A-Za-z])del", "HelloWorld", false);
TEST(R"del([^A-Za-z]+g)del", "313g", true);
TEST( R"del([^0-9])del", "HelloWorld", true);
TEST( R"del([^a])del", "ba", true);
puts("");
TEST( R"del([^0-9]*[^a-z])del", "a0", true);
TEST( R"del([^0-9]*[^a-z])del", "0a", false);
TEST( R"del([^0-9]?[^a-z])del", "a", false);
TEST( R"del([^0-9]?[^a-z])del", "0", false);
TEST(R"del([^0-9]+[^a-z]*)del", "aa", true);
puts("");
TEST( R"del(^\^)del", "^^", true);
TEST( R"del(^\^)del", " ^", false);
TEST(R"del(^ \^)del", " ^", true);
TEST( R"del(^a*)del", "asd", true);
TEST( R"del(^)del", "", true);
puts("");
TEST( R"del(\<test)del", "test", true);
TEST( R"del(test\>)del", "test", true);
TEST( R"del(\<test)del", "ttest", false);
TEST( R"del(test\>)del", "testa", false);
TEST(R"del(\<test\>)del", "test", true);
puts("");
TEST( R"del(\<test)del", " test ", true);
TEST( R"del(test\>)del", " test ", true);
TEST( R"del(\<test)del", " ttest ", false);
TEST( R"del(test\>)del", "testa ", false);
TEST(R"del(\<test\>)del", " test ", true);
puts("");
TEST( R"del(\<int\>)del", "printf", false);
TEST(R"del(.\<print\>.)del", " print ", true);
TEST(R"del(.\<print\>.)del", "fprint", false);
TEST(R"del(.\<print\>.)del", "printf", false);
TEST(R"del(.\<print\>.)del", "fprintf", false);
puts("");
TEST(R"del(\<while\>)del", "while", true);
TEST(R"del(\<while\>)del", " while ", true);
TEST(R"del(\<while\>)del", "9while ", true);
TEST(R"del(\<while\>)del", "for while {", true);
TEST(R"del(\<while\>)del", "for while{", true);
puts("");
TEST(R"del(/\*[\d\D]*\*/)del", "908", false);
TEST(R"del(/\*[\d\D]*\*/)del", "/*908*/", true);
TEST(R"del(/\*[\d\D]*\*/)del", "/*asd*/", true);
TEST(R"del(/\*[\d\D]*\*/)del", "/* asd */", true);
TEST(R"del(/\*[\d\D]*\*/)del", "/* as* */", true);
if (test_counter == passed_tests) {
fputs("\033[32m", stdout);
} else {
fputs("\033[31m", stdout);
}
printf("\nPassed %d out of %d tests.\033[0m\n", passed_tests, test_counter);
printf("\tPositives: %d/%d\n", positive_successes, positives);
printf("\tNegatives: %d/%d\n", negative_successes, negatives);
puts("");
puts("");
TEST2( R"del(abc)del", "abc", match_t{ 0, strlen("abc")});
TEST2(R"del(efg1)del", "efg1", match_t{ 0, strlen("efg1")});
TEST2( R"del(nig)del", "ger", match_t{-1, -1});
TEST2( R"del(ss)del", "sss", match_t{ 0, 2});
TEST2( R"del(sss)del", "ss", match_t{-1, -1});
puts("");
puts("");
TEST2( R"del(ab+c)del", "abc", match_t{ 0, strlen("abc")});
TEST2(R"del(ef+g1)del", "effffg1", match_t{ 0, strlen("effffg1")});
TEST2(R"del(efg1+)del", "efg", match_t{-1, -1});
TEST2(R"del(efg1+)del", "efg1", match_t{ 0, strlen("efg1")});
TEST2(R"del(efg1+)del", "efg11", match_t{ 0, strlen("efg11")});
puts("");
puts("");
TEST2( R"del(a+a)del", " aaa", match_t{ 1, strlen("aaa")});
TEST2( R"del(a+a)del", " aa", match_t{ 1, strlen("aa")});
TEST2( R"del(a+a)del", " a", match_t{-1, -1});
TEST2( R"del(a+a)del", " aaa", match_t{ 3, strlen("aaa")});
TEST2(R"del(a+\+)del", "aaa+", match_t{ 0, strlen("aaa+")});
puts("");
puts("");
TEST2(R"del(\<while\>)del", "while", match_t{0, strlen("while")});
TEST2(R"del(\<while\>)del", " while", match_t{1, strlen("while")});
TEST2(R"del(\<while\>)del", "for while", match_t{4, strlen("while")});
TEST2(R"del(\<while\>)del", "for9while", match_t{4, strlen("while")});
TEST2(R"del(\<while\>)del", "for9while ", match_t{4, strlen("while")});
puts("");
puts("");
TEST2(R"del(\+)del", "akjh ab+ snabd", match_t{ strlen("akjh ab+")-1, 1});
TEST2(R"del(\*)del", "a*jh abn snabd", match_t{ strlen("a*")-1, 1});
TEST2(R"del(\=)del", "ak=h abn snabd", match_t{ strlen("ak=")-1, 1});
TEST2(R"del(\?)del", "akjh abn s?abd", match_t{ strlen("akjh abn s?")-1, 1});
TEST2(R"del(\+)del", "akjh abn snab+", match_t{strlen("akjh abn snab+")-1, 1});
if(test_counter2 == passed_tests2) {
fputs("\033[32m", stdout);
} else {
fputs("\033[31m", stdout);
}
printf("\nPassed %d out of %d tests.\033[0m\n", passed_tests2, test_counter2);
return 0;
}

View File

@ -1,101 +0,0 @@
#include "jeger.h"
static int test_counter = 0;
static int passed_tests = 0;
static int positives = 0;
static int positive_successes = 0;
static int negatives = 0;
static int negative_successes = 0;
static int test_counter2 = 0;
static int passed_tests2 = 0;
static
void asprint_match_t( char * * destination,
const match_t * const match) {
if (match) {
asprintf(destination, "%p {%d, %d}", (void *)match, match->position, match->width);
} else {
asprintf(destination, "0x000000000000 {N/A, N/A}");
}
}
static
void print_leader(const bool passed, const int n) {
if (passed) {
printf("\033[32;1mSuccess\033[0m. %02d\033[1m - \033[0m", n);
} else {
printf("\033[31;1mFailure\033[0m. %02d\033[1m - \033[0m", n);
}
}
static
void do_flush(void) {
if(!(test_counter % 5)) {
fflush(stdout);
}
}
static
void TEST(const char * const what,
const char * const on,
const bool expect) {
do_flush();
++test_counter;
regex_t * r = regex_compile(what);
bool result = regex_search(r, on);
regex_free(r);
bool passed = (result == expect);
expect ? ++positives : ++negatives;
print_leader(passed, test_counter);
char * quoted_what, * quoted_on;
asprintf(&quoted_what, "'%s'", what);
asprintf(&quoted_on, "'%s'", on);
printf("%14s\033[1m vs \033[0m%14s\033[1m:\033[0m Result = %d, Expected = %d\n", quoted_what, quoted_on, result, expect);
free(quoted_what);
free(quoted_on);
if (passed) {
++passed_tests;
expect ? ++positive_successes : ++negative_successes;
}
}
static
void TEST2(const char * const what,
const char * const on,
const match_t expect){
do_flush();
++test_counter2;
regex_t * r = regex_compile(what);
match_t * result = regex_match(r, on, true);
bool passed = (result->position == expect.position
&& result->width == expect.width
);
print_leader(passed, test_counter2);
char * quoted_what, * quoted_on;
asprintf(&quoted_what, "'%s'", what);
asprintf(&quoted_on, "'%s'", on);
char * result_string, * expect_string;
asprint_match_t(&result_string, result);
asprint_match_t(&expect_string, &expect);
printf("%s\033[1m vs \033[0m%s\033[1m:\033[0m\n\tResult = %s\n\tExpected = %s\n", quoted_what, quoted_on, result_string, expect_string);
free(quoted_what);
free(quoted_on);
free(result_string);
free(expect_string);
free(result);
if (passed) {
++passed_tests2;
}
}

View File

@ -1,64 +0,0 @@
#include "vector.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
void vector_init(vector_t * vector,
size_t element_size,
size_t element_count) {
assert(element_size);
vector->data = NULL;
vector->element_size = element_size;
vector->element_count = element_count;
vector->data = (char *)calloc(vector->element_count, vector->element_size);
assert(vector->data);
}
void vector_push(vector_t * vector,
void * data) {
assert(vector);
vector->element_count += 1;
vector->data = (char *)realloc(vector->data,
vector->element_size * vector->element_count);
assert(vector->data);
memcpy(&vector->data[(vector->element_count - 1) * vector->element_size],
data,
vector->element_size);
}
void vector_pop(vector_t * vector) {
assert(vector); // UNUSED
}
void * vector_get(const vector_t * const vector,
const size_t element) {
assert(vector);
assert(element < vector->element_count);
return &vector->data[vector->element_size * element];
}
void vector_set(vector_t * vector,
void * data,
size_t element) {
assert(vector);
assert(element >= vector->element_count);
memcpy(&vector->data[vector->element_size * element],
data,
vector->element_size);
}
void vector_free(vector_t * vector) {
free(vector->data);
}

View File

@ -1,33 +0,0 @@
#ifndef VECTOR_H
#define VECTOR_H
#include <stddef.h>
// TODO: Handle error warnings?
// TODO: Implement more useful functions?
typedef struct {
char * data;
size_t element_size;
size_t element_count;
} vector_t;
extern void vector_init(vector_t * vector,
size_t element_size,
size_t element_count);
extern void vector_push(vector_t * vector,
void * data);
extern void vector_pop(vector_t * vector);
extern void * vector_get(const vector_t * const vector,
const size_t element);
extern void vector_set(vector_t * vector,
void * data,
size_t element);
extern void vector_free(vector_t * vector);
#endif