attempt to handle multi-state rules
This commit is contained in:
parent
abe69a7221
commit
bc485a8b82
2
Makefile
2
Makefile
@ -19,7 +19,7 @@ ${OUTPUT}: object/main.o object/opts.o object/generator.o object/jeger.yy.o
|
||||
${LINK.cpp} -o ${OUTPUT} $^
|
||||
|
||||
test:
|
||||
./${OUTPUT} -t -d test/brainfuck.l 2>&1 | tool/hl_table
|
||||
./${OUTPUT} -d -t test/brainfuck.l 2>&1 | tool/hl_table
|
||||
cat jeger.yy.c | tool/hl_table
|
||||
gcc -o bf.out jeger.yy.c -ggdb
|
||||
./bf.out test/hw.bf
|
||||
|
@ -37,8 +37,14 @@ void put_header(FILE * f, const int alphabet_size, const int no_match) {
|
||||
fputs("#define AS_SYMBOL(c) c\n", /* (c-'a')\n */ f);
|
||||
|
||||
if (do_trace) {
|
||||
DEFINE_STR(TRACE, "fprintf(stderr, \"--accepting rule at line %d (\\\"%.*s\\\")\\n\", __LINE__, mlen, ss);");
|
||||
DEFINE_STR(TRACE_DEFAULT, "fprintf(stderr, \"--accepting default rule (\"%c\")\\n\", *ss);");
|
||||
DEFINE_STR(
|
||||
TRACE(l),
|
||||
"fprintf(stderr, \"--accepting rule at line %d (\\\"%.*s\\\")\\n\", l, mlen, ss);"
|
||||
);
|
||||
DEFINE_STR(
|
||||
TRACE_DEFAULT,
|
||||
"fprintf(stderr, \"--accepting default rule (\\\"%c\\\")\\n\", *ss);"
|
||||
);
|
||||
} else {
|
||||
DEFINE_STR(TRACE, "");
|
||||
DEFINE_STR(TRACE_DEFAULT, "");
|
||||
@ -79,14 +85,6 @@ void put_table(FILE * f, const int * table, char * * prefixes, int n_cases, int
|
||||
|
||||
static
|
||||
void put_state_table(FILE * f, int * states) {
|
||||
// XXX do i even need this table?
|
||||
fprintf(f, "int state_table[%d] = {\n", n_states);
|
||||
for (int i = 0; i < n_states; i++) {
|
||||
if (states[i] == -1) { break; } // XXX
|
||||
fprintf(f, "\t[%d] = %d,\n", i, states[i]);
|
||||
}
|
||||
fputs("};\n\n", f);
|
||||
|
||||
for (int i = 0; i < n_states; i++) {
|
||||
fprintf(
|
||||
f,
|
||||
@ -182,8 +180,10 @@ void make_and_put_table(FILE * f) {
|
||||
= TOKEN_OFFSET+1 + rule_index
|
||||
;
|
||||
|
||||
put_table(stderr, (int*)table, prefixes, n_rules, alphabet_size);
|
||||
fputs("/* ================== */\n", stderr);
|
||||
if (do_debug > 1) {
|
||||
put_table(stderr, (int*)table, prefixes, n_rules, alphabet_size);
|
||||
fputs("/* ================== */\n", stderr);
|
||||
}
|
||||
}
|
||||
|
||||
const int n_cases = next_free_slot;
|
||||
@ -198,14 +198,24 @@ void put_functions(FILE * f) {
|
||||
fputs(yy_lookup_str, f);
|
||||
|
||||
fputs(yy_lex_str_start, f);
|
||||
fprintf(
|
||||
f,
|
||||
"\tcase %d: {\n"
|
||||
"TRACE_DEFAULT;\n"
|
||||
"\t} break;\n",
|
||||
TOKEN_OFFSET
|
||||
);
|
||||
for (rule_t * rule = rules; rule->code != NULL; rule++) {
|
||||
fprintf(
|
||||
f,
|
||||
"\tcase %ld: {\n"
|
||||
"TRACE;\n"
|
||||
"TRACE(%d);\n"
|
||||
"%s\n"
|
||||
"\t} break;\n",
|
||||
TOKEN_OFFSET + 1 + (rule - rules), rule->code);
|
||||
TOKEN_OFFSET + 1 + (rule - rules),
|
||||
rule->line,
|
||||
rule->code
|
||||
);
|
||||
}
|
||||
fputs(yy_lex_str_end, f);
|
||||
}
|
||||
|
@ -8,6 +8,7 @@ typedef struct {
|
||||
int state;
|
||||
char * pattern;
|
||||
char * code;
|
||||
int line;
|
||||
} rule_t;
|
||||
|
||||
typedef enum {
|
||||
|
107
source/jeger.l
107
source/jeger.l
@ -25,11 +25,6 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
typedef struct {
|
||||
char * pattern;
|
||||
char * code;
|
||||
} rule_t2;
|
||||
|
||||
static void set_alphanet_range(char s, char e) {
|
||||
// XXX not implemented
|
||||
}
|
||||
@ -45,23 +40,23 @@
|
||||
va_end(va);
|
||||
}
|
||||
|
||||
string definition_section_code_buffer_str;
|
||||
string code_section_code_buffer_str;
|
||||
|
||||
static map<string, vector<rule_t2>> rules_map;
|
||||
static map<string, vector<rule_t2>>::iterator current_state;
|
||||
static string patter_buffer;
|
||||
static string code_buffer;
|
||||
|
||||
static int nest_counter = 0;
|
||||
|
||||
static int source_state;
|
||||
static string * source_buffer;
|
||||
|
||||
static string definition_section_code_buffer_str;
|
||||
static string code_section_code_buffer_str;
|
||||
|
||||
static map<string, vector<rule_t>> rules_map;
|
||||
static vector<map<string, vector<rule_t>>::iterator> current_states;
|
||||
static string patter_buffer;
|
||||
static string code_buffer;
|
||||
static int line_buffer;
|
||||
%}
|
||||
%x IN_DEFINITION_SECTION IN_RULE_SECTION IN_CODE_SECTION
|
||||
%x IN_DEFINITION_SECTION_CODE
|
||||
%x IN_RULE_LIST IN_OPTION_LIST
|
||||
%x IN_STATE_DEFINITION
|
||||
%x IN_STATE_HEAD IN_STATE_DEFINITION
|
||||
%x IN_CODE IN_STRING IN_COMMENT IN_MULTILINE_COMMENT
|
||||
|
||||
rule_name [A-Z_][A-Z0-9_]*
|
||||
@ -91,12 +86,12 @@ value \"[-a-z]+\"
|
||||
}
|
||||
\/\* {
|
||||
definition_section_code_buffer_str += yytext;
|
||||
source_state = IN_DEFINITION_SECTION;
|
||||
source_state = IN_DEFINITION_SECTION;
|
||||
source_buffer = &definition_section_code_buffer_str;
|
||||
BEGIN IN_MULTILINE_COMMENT;
|
||||
}
|
||||
. {
|
||||
yyerror("baaaa");
|
||||
yyerror("Unknown character encountered inside definition section ('%c') (temp warning)", yytext[0]);
|
||||
}
|
||||
\n { ; }
|
||||
}
|
||||
@ -162,26 +157,41 @@ prefix={value} {
|
||||
\%\% {
|
||||
BEGIN IN_CODE_SECTION;
|
||||
}
|
||||
\<{rule_name}\>\{ {
|
||||
string state_name(yytext+1, yyleng-3);
|
||||
current_state = rules_map.find(state_name);
|
||||
if (current_state == rules_map.end()) {
|
||||
yyerror("State '%s' was never declared.", state_name.c_str());
|
||||
}
|
||||
|
||||
patter_buffer = "";
|
||||
code_buffer = "";
|
||||
|
||||
BEGIN IN_STATE_DEFINITION;
|
||||
\< {
|
||||
BEGIN IN_STATE_HEAD;
|
||||
}
|
||||
. {
|
||||
yyerror("Rule section giberish (temp warning).");
|
||||
yyerror("Rule section giberish (temp warning) ('%s').", yytext); // XXX
|
||||
}
|
||||
\n { ; }
|
||||
}
|
||||
|
||||
<IN_STATE_HEAD>{
|
||||
{rule_name} {
|
||||
string state_name(yytext);
|
||||
map<string, vector<rule_t>>::iterator current_state = rules_map.find(state_name);
|
||||
if (current_state == rules_map.end()) {
|
||||
yyerror("State '%s' was never declared.", state_name.c_str());
|
||||
} else {
|
||||
current_states.push_back(current_state);
|
||||
}
|
||||
}
|
||||
\>\{ {
|
||||
patter_buffer = "";
|
||||
code_buffer = "";
|
||||
|
||||
BEGIN IN_STATE_DEFINITION;
|
||||
}
|
||||
, |
|
||||
{wsnl} { ; }
|
||||
. {
|
||||
yyerror("Unknown character inside state head (%c).", yytext[0]);
|
||||
}
|
||||
}
|
||||
|
||||
<IN_STATE_DEFINITION>{
|
||||
\} {
|
||||
current_states.clear();
|
||||
BEGIN IN_RULE_SECTION;
|
||||
}
|
||||
. {
|
||||
@ -192,6 +202,7 @@ prefix={value} {
|
||||
}
|
||||
{wsnl}+\{ {
|
||||
BEGIN IN_CODE;
|
||||
line_buffer = yylineno;
|
||||
nest_counter = 0;
|
||||
}
|
||||
\n { ; }
|
||||
@ -205,13 +216,17 @@ prefix={value} {
|
||||
\} {
|
||||
--nest_counter;
|
||||
if (nest_counter == -1) {
|
||||
current_state->second.push_back((rule_t2) {
|
||||
.pattern = strdup(patter_buffer.c_str()),
|
||||
.code = strdup(code_buffer.c_str()),
|
||||
});
|
||||
for (const auto ¤t_state : current_states) {
|
||||
current_state->second.push_back((rule_t) {
|
||||
.state = -1, // NOTE: initialized elsewhere
|
||||
.pattern = strdup(patter_buffer.c_str()),
|
||||
.code = strdup(code_buffer.c_str()),
|
||||
.line = line_buffer,
|
||||
});
|
||||
}
|
||||
|
||||
patter_buffer = "";
|
||||
code_buffer = "";
|
||||
code_buffer = "";
|
||||
|
||||
BEGIN IN_STATE_DEFINITION;
|
||||
} else {
|
||||
@ -279,30 +294,35 @@ prefix={value} {
|
||||
|
||||
static
|
||||
void dump_parse_results(void) {
|
||||
puts(definition_section_code_buffer_str.c_str());
|
||||
puts("----------");
|
||||
fputs("--- Definition section code buffer ---\n", stderr);
|
||||
fputs(definition_section_code_buffer_str.c_str(), stderr);
|
||||
fputs("\n----------\n", stderr);
|
||||
|
||||
fputs("--- Patterns ---\n", stderr);
|
||||
for (const auto &i : rules_map) {
|
||||
printf("%s:\n", i.first.c_str());
|
||||
fprintf(stderr, "%s:\n", i.first.c_str());
|
||||
for (const auto &h : i.second) {
|
||||
printf("\tpattern:\n%s\n" "\tcode:\n%s\n", h.pattern, h.code);
|
||||
fprintf(stderr, "\tpattern:\n%s\n" "\tcode:\n%s\n", h.pattern, h.code);
|
||||
}
|
||||
puts("--");
|
||||
fputs("--\n", stderr);
|
||||
}
|
||||
|
||||
puts("----------");
|
||||
puts(code_section_code_buffer_str.c_str());
|
||||
fputs("--- Code section code buffer ---\n", stderr);
|
||||
fputs(code_section_code_buffer_str.c_str(), stderr);
|
||||
fputs("\n----------\n", stderr);
|
||||
}
|
||||
|
||||
static
|
||||
void dump_rules(void) {
|
||||
for (rule_t * rule = rules; rule->pattern != NULL; rule++) {
|
||||
printf("{ .state = %d, .pattern = %s, }\n",
|
||||
fprintf(
|
||||
stderr,
|
||||
"{ .state = %d, .pattern = `%s` }\n",
|
||||
rule->state,
|
||||
rule->pattern
|
||||
);
|
||||
}
|
||||
puts("{ .state = 0, .pattern = NULL, }");
|
||||
fputs("{ .state = 0, .pattern = NULL }\n", stderr);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
@ -335,6 +355,7 @@ int parse(const char * filename) {
|
||||
.state = state,
|
||||
.pattern = rule.pattern,
|
||||
.code = rule.code,
|
||||
.line = rule.line,
|
||||
};
|
||||
}
|
||||
++state;
|
||||
|
@ -7,7 +7,7 @@
|
||||
|
||||
static
|
||||
char * to_output_name(const char * filename) {
|
||||
return strdup("jeger.yy.c");
|
||||
return strdup("jeger.yy.c"); // XXX temp
|
||||
}
|
||||
|
||||
signed main(const int argc, const char * argv[]) {
|
||||
@ -28,6 +28,7 @@ signed main(const int argc, const char * argv[]) {
|
||||
|
||||
deinit_parser();
|
||||
deinit_jeger();
|
||||
deinit_opts();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -7,7 +7,7 @@
|
||||
#include "jeger.h"
|
||||
|
||||
bool do_trace = false;
|
||||
bool do_debug = false;
|
||||
int do_debug = 0;
|
||||
|
||||
char * output_filename = NULL;
|
||||
char * input_filename = NULL;
|
||||
@ -47,7 +47,7 @@ int parse_arguments(const int argc, const char * * argv) {
|
||||
do_trace = true;
|
||||
} break;
|
||||
case 'd': {
|
||||
do_debug = true;
|
||||
++do_debug;
|
||||
} break;
|
||||
default: {
|
||||
usage();
|
||||
@ -61,3 +61,8 @@ int parse_arguments(const int argc, const char * * argv) {
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void deinit_opts(void) {
|
||||
free(output_filename);
|
||||
free(input_filename);
|
||||
}
|
||||
|
@ -4,11 +4,12 @@
|
||||
#include <stdbool.h>
|
||||
|
||||
extern bool do_trace;
|
||||
extern bool do_debug;
|
||||
extern int do_debug; // NOTE: has multiple levels
|
||||
extern char * output_filename;
|
||||
extern char * input_filename;
|
||||
|
||||
extern void usage(void);
|
||||
extern int parse_arguments(const int argc, const char * * argv);
|
||||
extern void deinit_opts(void);
|
||||
|
||||
#endif
|
||||
|
@ -25,7 +25,7 @@ int yylex(const char * s) {\n\
|
||||
*ss != '\\0';\n\
|
||||
ss += ((mlen ? mlen : 1) * direction)\n\
|
||||
) {\n\
|
||||
int match = mlookup(ss, state_table[state]);\n\
|
||||
int match = mlookup(ss, state);\n\
|
||||
switch (match) {\n\
|
||||
";
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <stdio.h>
|
||||
char data[30000];
|
||||
char * data_ptr = data;
|
||||
int nesting = 0;
|
||||
%}
|
||||
|
||||
%x INITIAL IN_SKIP_FORWARD IN_SKIP_BACKWARD
|
||||
@ -26,7 +27,7 @@
|
||||
}
|
||||
}
|
||||
\] {
|
||||
if (!*data_ptr) {
|
||||
if (*data_ptr) {
|
||||
REVERSE;
|
||||
BEGIN IN_SKIP_BACKWARD;
|
||||
}
|
||||
@ -34,15 +35,30 @@
|
||||
}
|
||||
|
||||
<IN_SKIP_FORWARD>{
|
||||
\] { BEGIN INITIAL; }
|
||||
\[ { ++nesting; }
|
||||
\] {
|
||||
if (!nesting) {
|
||||
BEGIN INITIAL;
|
||||
} else {
|
||||
--nesting;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
<IN_SKIP_BACKWARD>{
|
||||
\[ { REVERSE; BEGIN INITIAL; }
|
||||
\] { ++nesting; }
|
||||
\[ {
|
||||
if (!nesting) {
|
||||
REVERSE;
|
||||
BEGIN INITIAL;
|
||||
} else {
|
||||
--nesting;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
<INITIAL,IN_SKIP_FORWARD,IN_SKIP_BACKWARD>{
|
||||
.|\n { ; }
|
||||
.|\n { ; }
|
||||
}
|
||||
%%
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user