attempt to handle multi-state rules
This commit is contained in:
parent
abe69a7221
commit
bc485a8b82
2
Makefile
2
Makefile
@ -19,7 +19,7 @@ ${OUTPUT}: object/main.o object/opts.o object/generator.o object/jeger.yy.o
|
|||||||
${LINK.cpp} -o ${OUTPUT} $^
|
${LINK.cpp} -o ${OUTPUT} $^
|
||||||
|
|
||||||
test:
|
test:
|
||||||
./${OUTPUT} -t -d test/brainfuck.l 2>&1 | tool/hl_table
|
./${OUTPUT} -d -t test/brainfuck.l 2>&1 | tool/hl_table
|
||||||
cat jeger.yy.c | tool/hl_table
|
cat jeger.yy.c | tool/hl_table
|
||||||
gcc -o bf.out jeger.yy.c -ggdb
|
gcc -o bf.out jeger.yy.c -ggdb
|
||||||
./bf.out test/hw.bf
|
./bf.out test/hw.bf
|
||||||
|
@ -37,8 +37,14 @@ void put_header(FILE * f, const int alphabet_size, const int no_match) {
|
|||||||
fputs("#define AS_SYMBOL(c) c\n", /* (c-'a')\n */ f);
|
fputs("#define AS_SYMBOL(c) c\n", /* (c-'a')\n */ f);
|
||||||
|
|
||||||
if (do_trace) {
|
if (do_trace) {
|
||||||
DEFINE_STR(TRACE, "fprintf(stderr, \"--accepting rule at line %d (\\\"%.*s\\\")\\n\", __LINE__, mlen, ss);");
|
DEFINE_STR(
|
||||||
DEFINE_STR(TRACE_DEFAULT, "fprintf(stderr, \"--accepting default rule (\"%c\")\\n\", *ss);");
|
TRACE(l),
|
||||||
|
"fprintf(stderr, \"--accepting rule at line %d (\\\"%.*s\\\")\\n\", l, mlen, ss);"
|
||||||
|
);
|
||||||
|
DEFINE_STR(
|
||||||
|
TRACE_DEFAULT,
|
||||||
|
"fprintf(stderr, \"--accepting default rule (\\\"%c\\\")\\n\", *ss);"
|
||||||
|
);
|
||||||
} else {
|
} else {
|
||||||
DEFINE_STR(TRACE, "");
|
DEFINE_STR(TRACE, "");
|
||||||
DEFINE_STR(TRACE_DEFAULT, "");
|
DEFINE_STR(TRACE_DEFAULT, "");
|
||||||
@ -79,14 +85,6 @@ void put_table(FILE * f, const int * table, char * * prefixes, int n_cases, int
|
|||||||
|
|
||||||
static
|
static
|
||||||
void put_state_table(FILE * f, int * states) {
|
void put_state_table(FILE * f, int * states) {
|
||||||
// XXX do i even need this table?
|
|
||||||
fprintf(f, "int state_table[%d] = {\n", n_states);
|
|
||||||
for (int i = 0; i < n_states; i++) {
|
|
||||||
if (states[i] == -1) { break; } // XXX
|
|
||||||
fprintf(f, "\t[%d] = %d,\n", i, states[i]);
|
|
||||||
}
|
|
||||||
fputs("};\n\n", f);
|
|
||||||
|
|
||||||
for (int i = 0; i < n_states; i++) {
|
for (int i = 0; i < n_states; i++) {
|
||||||
fprintf(
|
fprintf(
|
||||||
f,
|
f,
|
||||||
@ -182,8 +180,10 @@ void make_and_put_table(FILE * f) {
|
|||||||
= TOKEN_OFFSET+1 + rule_index
|
= TOKEN_OFFSET+1 + rule_index
|
||||||
;
|
;
|
||||||
|
|
||||||
put_table(stderr, (int*)table, prefixes, n_rules, alphabet_size);
|
if (do_debug > 1) {
|
||||||
fputs("/* ================== */\n", stderr);
|
put_table(stderr, (int*)table, prefixes, n_rules, alphabet_size);
|
||||||
|
fputs("/* ================== */\n", stderr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const int n_cases = next_free_slot;
|
const int n_cases = next_free_slot;
|
||||||
@ -198,14 +198,24 @@ void put_functions(FILE * f) {
|
|||||||
fputs(yy_lookup_str, f);
|
fputs(yy_lookup_str, f);
|
||||||
|
|
||||||
fputs(yy_lex_str_start, f);
|
fputs(yy_lex_str_start, f);
|
||||||
|
fprintf(
|
||||||
|
f,
|
||||||
|
"\tcase %d: {\n"
|
||||||
|
"TRACE_DEFAULT;\n"
|
||||||
|
"\t} break;\n",
|
||||||
|
TOKEN_OFFSET
|
||||||
|
);
|
||||||
for (rule_t * rule = rules; rule->code != NULL; rule++) {
|
for (rule_t * rule = rules; rule->code != NULL; rule++) {
|
||||||
fprintf(
|
fprintf(
|
||||||
f,
|
f,
|
||||||
"\tcase %ld: {\n"
|
"\tcase %ld: {\n"
|
||||||
"TRACE;\n"
|
"TRACE(%d);\n"
|
||||||
"%s\n"
|
"%s\n"
|
||||||
"\t} break;\n",
|
"\t} break;\n",
|
||||||
TOKEN_OFFSET + 1 + (rule - rules), rule->code);
|
TOKEN_OFFSET + 1 + (rule - rules),
|
||||||
|
rule->line,
|
||||||
|
rule->code
|
||||||
|
);
|
||||||
}
|
}
|
||||||
fputs(yy_lex_str_end, f);
|
fputs(yy_lex_str_end, f);
|
||||||
}
|
}
|
||||||
|
@ -8,6 +8,7 @@ typedef struct {
|
|||||||
int state;
|
int state;
|
||||||
char * pattern;
|
char * pattern;
|
||||||
char * code;
|
char * code;
|
||||||
|
int line;
|
||||||
} rule_t;
|
} rule_t;
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
|
107
source/jeger.l
107
source/jeger.l
@ -25,11 +25,6 @@
|
|||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
char * pattern;
|
|
||||||
char * code;
|
|
||||||
} rule_t2;
|
|
||||||
|
|
||||||
static void set_alphanet_range(char s, char e) {
|
static void set_alphanet_range(char s, char e) {
|
||||||
// XXX not implemented
|
// XXX not implemented
|
||||||
}
|
}
|
||||||
@ -45,23 +40,23 @@
|
|||||||
va_end(va);
|
va_end(va);
|
||||||
}
|
}
|
||||||
|
|
||||||
string definition_section_code_buffer_str;
|
|
||||||
string code_section_code_buffer_str;
|
|
||||||
|
|
||||||
static map<string, vector<rule_t2>> rules_map;
|
|
||||||
static map<string, vector<rule_t2>>::iterator current_state;
|
|
||||||
static string patter_buffer;
|
|
||||||
static string code_buffer;
|
|
||||||
|
|
||||||
static int nest_counter = 0;
|
static int nest_counter = 0;
|
||||||
|
|
||||||
static int source_state;
|
static int source_state;
|
||||||
static string * source_buffer;
|
static string * source_buffer;
|
||||||
|
|
||||||
|
static string definition_section_code_buffer_str;
|
||||||
|
static string code_section_code_buffer_str;
|
||||||
|
|
||||||
|
static map<string, vector<rule_t>> rules_map;
|
||||||
|
static vector<map<string, vector<rule_t>>::iterator> current_states;
|
||||||
|
static string patter_buffer;
|
||||||
|
static string code_buffer;
|
||||||
|
static int line_buffer;
|
||||||
%}
|
%}
|
||||||
%x IN_DEFINITION_SECTION IN_RULE_SECTION IN_CODE_SECTION
|
%x IN_DEFINITION_SECTION IN_RULE_SECTION IN_CODE_SECTION
|
||||||
%x IN_DEFINITION_SECTION_CODE
|
%x IN_DEFINITION_SECTION_CODE
|
||||||
%x IN_RULE_LIST IN_OPTION_LIST
|
%x IN_RULE_LIST IN_OPTION_LIST
|
||||||
%x IN_STATE_DEFINITION
|
%x IN_STATE_HEAD IN_STATE_DEFINITION
|
||||||
%x IN_CODE IN_STRING IN_COMMENT IN_MULTILINE_COMMENT
|
%x IN_CODE IN_STRING IN_COMMENT IN_MULTILINE_COMMENT
|
||||||
|
|
||||||
rule_name [A-Z_][A-Z0-9_]*
|
rule_name [A-Z_][A-Z0-9_]*
|
||||||
@ -91,12 +86,12 @@ value \"[-a-z]+\"
|
|||||||
}
|
}
|
||||||
\/\* {
|
\/\* {
|
||||||
definition_section_code_buffer_str += yytext;
|
definition_section_code_buffer_str += yytext;
|
||||||
source_state = IN_DEFINITION_SECTION;
|
source_state = IN_DEFINITION_SECTION;
|
||||||
source_buffer = &definition_section_code_buffer_str;
|
source_buffer = &definition_section_code_buffer_str;
|
||||||
BEGIN IN_MULTILINE_COMMENT;
|
BEGIN IN_MULTILINE_COMMENT;
|
||||||
}
|
}
|
||||||
. {
|
. {
|
||||||
yyerror("baaaa");
|
yyerror("Unknown character encountered inside definition section ('%c') (temp warning)", yytext[0]);
|
||||||
}
|
}
|
||||||
\n { ; }
|
\n { ; }
|
||||||
}
|
}
|
||||||
@ -162,26 +157,41 @@ prefix={value} {
|
|||||||
\%\% {
|
\%\% {
|
||||||
BEGIN IN_CODE_SECTION;
|
BEGIN IN_CODE_SECTION;
|
||||||
}
|
}
|
||||||
\<{rule_name}\>\{ {
|
\< {
|
||||||
string state_name(yytext+1, yyleng-3);
|
BEGIN IN_STATE_HEAD;
|
||||||
current_state = rules_map.find(state_name);
|
|
||||||
if (current_state == rules_map.end()) {
|
|
||||||
yyerror("State '%s' was never declared.", state_name.c_str());
|
|
||||||
}
|
|
||||||
|
|
||||||
patter_buffer = "";
|
|
||||||
code_buffer = "";
|
|
||||||
|
|
||||||
BEGIN IN_STATE_DEFINITION;
|
|
||||||
}
|
}
|
||||||
. {
|
. {
|
||||||
yyerror("Rule section giberish (temp warning).");
|
yyerror("Rule section giberish (temp warning) ('%s').", yytext); // XXX
|
||||||
}
|
}
|
||||||
\n { ; }
|
\n { ; }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
<IN_STATE_HEAD>{
|
||||||
|
{rule_name} {
|
||||||
|
string state_name(yytext);
|
||||||
|
map<string, vector<rule_t>>::iterator current_state = rules_map.find(state_name);
|
||||||
|
if (current_state == rules_map.end()) {
|
||||||
|
yyerror("State '%s' was never declared.", state_name.c_str());
|
||||||
|
} else {
|
||||||
|
current_states.push_back(current_state);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
\>\{ {
|
||||||
|
patter_buffer = "";
|
||||||
|
code_buffer = "";
|
||||||
|
|
||||||
|
BEGIN IN_STATE_DEFINITION;
|
||||||
|
}
|
||||||
|
, |
|
||||||
|
{wsnl} { ; }
|
||||||
|
. {
|
||||||
|
yyerror("Unknown character inside state head (%c).", yytext[0]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
<IN_STATE_DEFINITION>{
|
<IN_STATE_DEFINITION>{
|
||||||
\} {
|
\} {
|
||||||
|
current_states.clear();
|
||||||
BEGIN IN_RULE_SECTION;
|
BEGIN IN_RULE_SECTION;
|
||||||
}
|
}
|
||||||
. {
|
. {
|
||||||
@ -192,6 +202,7 @@ prefix={value} {
|
|||||||
}
|
}
|
||||||
{wsnl}+\{ {
|
{wsnl}+\{ {
|
||||||
BEGIN IN_CODE;
|
BEGIN IN_CODE;
|
||||||
|
line_buffer = yylineno;
|
||||||
nest_counter = 0;
|
nest_counter = 0;
|
||||||
}
|
}
|
||||||
\n { ; }
|
\n { ; }
|
||||||
@ -205,13 +216,17 @@ prefix={value} {
|
|||||||
\} {
|
\} {
|
||||||
--nest_counter;
|
--nest_counter;
|
||||||
if (nest_counter == -1) {
|
if (nest_counter == -1) {
|
||||||
current_state->second.push_back((rule_t2) {
|
for (const auto ¤t_state : current_states) {
|
||||||
.pattern = strdup(patter_buffer.c_str()),
|
current_state->second.push_back((rule_t) {
|
||||||
.code = strdup(code_buffer.c_str()),
|
.state = -1, // NOTE: initialized elsewhere
|
||||||
});
|
.pattern = strdup(patter_buffer.c_str()),
|
||||||
|
.code = strdup(code_buffer.c_str()),
|
||||||
|
.line = line_buffer,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
patter_buffer = "";
|
patter_buffer = "";
|
||||||
code_buffer = "";
|
code_buffer = "";
|
||||||
|
|
||||||
BEGIN IN_STATE_DEFINITION;
|
BEGIN IN_STATE_DEFINITION;
|
||||||
} else {
|
} else {
|
||||||
@ -279,30 +294,35 @@ prefix={value} {
|
|||||||
|
|
||||||
static
|
static
|
||||||
void dump_parse_results(void) {
|
void dump_parse_results(void) {
|
||||||
puts(definition_section_code_buffer_str.c_str());
|
fputs("--- Definition section code buffer ---\n", stderr);
|
||||||
puts("----------");
|
fputs(definition_section_code_buffer_str.c_str(), stderr);
|
||||||
|
fputs("\n----------\n", stderr);
|
||||||
|
|
||||||
|
fputs("--- Patterns ---\n", stderr);
|
||||||
for (const auto &i : rules_map) {
|
for (const auto &i : rules_map) {
|
||||||
printf("%s:\n", i.first.c_str());
|
fprintf(stderr, "%s:\n", i.first.c_str());
|
||||||
for (const auto &h : i.second) {
|
for (const auto &h : i.second) {
|
||||||
printf("\tpattern:\n%s\n" "\tcode:\n%s\n", h.pattern, h.code);
|
fprintf(stderr, "\tpattern:\n%s\n" "\tcode:\n%s\n", h.pattern, h.code);
|
||||||
}
|
}
|
||||||
puts("--");
|
fputs("--\n", stderr);
|
||||||
}
|
}
|
||||||
|
|
||||||
puts("----------");
|
fputs("--- Code section code buffer ---\n", stderr);
|
||||||
puts(code_section_code_buffer_str.c_str());
|
fputs(code_section_code_buffer_str.c_str(), stderr);
|
||||||
|
fputs("\n----------\n", stderr);
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void dump_rules(void) {
|
void dump_rules(void) {
|
||||||
for (rule_t * rule = rules; rule->pattern != NULL; rule++) {
|
for (rule_t * rule = rules; rule->pattern != NULL; rule++) {
|
||||||
printf("{ .state = %d, .pattern = %s, }\n",
|
fprintf(
|
||||||
|
stderr,
|
||||||
|
"{ .state = %d, .pattern = `%s` }\n",
|
||||||
rule->state,
|
rule->state,
|
||||||
rule->pattern
|
rule->pattern
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
puts("{ .state = 0, .pattern = NULL, }");
|
fputs("{ .state = 0, .pattern = NULL }\n", stderr);
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C"
|
extern "C"
|
||||||
@ -335,6 +355,7 @@ int parse(const char * filename) {
|
|||||||
.state = state,
|
.state = state,
|
||||||
.pattern = rule.pattern,
|
.pattern = rule.pattern,
|
||||||
.code = rule.code,
|
.code = rule.code,
|
||||||
|
.line = rule.line,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
++state;
|
++state;
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
static
|
static
|
||||||
char * to_output_name(const char * filename) {
|
char * to_output_name(const char * filename) {
|
||||||
return strdup("jeger.yy.c");
|
return strdup("jeger.yy.c"); // XXX temp
|
||||||
}
|
}
|
||||||
|
|
||||||
signed main(const int argc, const char * argv[]) {
|
signed main(const int argc, const char * argv[]) {
|
||||||
@ -28,6 +28,7 @@ signed main(const int argc, const char * argv[]) {
|
|||||||
|
|
||||||
deinit_parser();
|
deinit_parser();
|
||||||
deinit_jeger();
|
deinit_jeger();
|
||||||
|
deinit_opts();
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
#include "jeger.h"
|
#include "jeger.h"
|
||||||
|
|
||||||
bool do_trace = false;
|
bool do_trace = false;
|
||||||
bool do_debug = false;
|
int do_debug = 0;
|
||||||
|
|
||||||
char * output_filename = NULL;
|
char * output_filename = NULL;
|
||||||
char * input_filename = NULL;
|
char * input_filename = NULL;
|
||||||
@ -47,7 +47,7 @@ int parse_arguments(const int argc, const char * * argv) {
|
|||||||
do_trace = true;
|
do_trace = true;
|
||||||
} break;
|
} break;
|
||||||
case 'd': {
|
case 'd': {
|
||||||
do_debug = true;
|
++do_debug;
|
||||||
} break;
|
} break;
|
||||||
default: {
|
default: {
|
||||||
usage();
|
usage();
|
||||||
@ -61,3 +61,8 @@ int parse_arguments(const int argc, const char * * argv) {
|
|||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void deinit_opts(void) {
|
||||||
|
free(output_filename);
|
||||||
|
free(input_filename);
|
||||||
|
}
|
||||||
|
@ -4,11 +4,12 @@
|
|||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
|
|
||||||
extern bool do_trace;
|
extern bool do_trace;
|
||||||
extern bool do_debug;
|
extern int do_debug; // NOTE: has multiple levels
|
||||||
extern char * output_filename;
|
extern char * output_filename;
|
||||||
extern char * input_filename;
|
extern char * input_filename;
|
||||||
|
|
||||||
extern void usage(void);
|
extern void usage(void);
|
||||||
extern int parse_arguments(const int argc, const char * * argv);
|
extern int parse_arguments(const int argc, const char * * argv);
|
||||||
|
extern void deinit_opts(void);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -25,7 +25,7 @@ int yylex(const char * s) {\n\
|
|||||||
*ss != '\\0';\n\
|
*ss != '\\0';\n\
|
||||||
ss += ((mlen ? mlen : 1) * direction)\n\
|
ss += ((mlen ? mlen : 1) * direction)\n\
|
||||||
) {\n\
|
) {\n\
|
||||||
int match = mlookup(ss, state_table[state]);\n\
|
int match = mlookup(ss, state);\n\
|
||||||
switch (match) {\n\
|
switch (match) {\n\
|
||||||
";
|
";
|
||||||
|
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
char data[30000];
|
char data[30000];
|
||||||
char * data_ptr = data;
|
char * data_ptr = data;
|
||||||
|
int nesting = 0;
|
||||||
%}
|
%}
|
||||||
|
|
||||||
%x INITIAL IN_SKIP_FORWARD IN_SKIP_BACKWARD
|
%x INITIAL IN_SKIP_FORWARD IN_SKIP_BACKWARD
|
||||||
@ -26,7 +27,7 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
\] {
|
\] {
|
||||||
if (!*data_ptr) {
|
if (*data_ptr) {
|
||||||
REVERSE;
|
REVERSE;
|
||||||
BEGIN IN_SKIP_BACKWARD;
|
BEGIN IN_SKIP_BACKWARD;
|
||||||
}
|
}
|
||||||
@ -34,15 +35,30 @@
|
|||||||
}
|
}
|
||||||
|
|
||||||
<IN_SKIP_FORWARD>{
|
<IN_SKIP_FORWARD>{
|
||||||
\] { BEGIN INITIAL; }
|
\[ { ++nesting; }
|
||||||
|
\] {
|
||||||
|
if (!nesting) {
|
||||||
|
BEGIN INITIAL;
|
||||||
|
} else {
|
||||||
|
--nesting;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
<IN_SKIP_BACKWARD>{
|
<IN_SKIP_BACKWARD>{
|
||||||
\[ { REVERSE; BEGIN INITIAL; }
|
\] { ++nesting; }
|
||||||
|
\[ {
|
||||||
|
if (!nesting) {
|
||||||
|
REVERSE;
|
||||||
|
BEGIN INITIAL;
|
||||||
|
} else {
|
||||||
|
--nesting;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
<INITIAL,IN_SKIP_FORWARD,IN_SKIP_BACKWARD>{
|
<INITIAL,IN_SKIP_FORWARD,IN_SKIP_BACKWARD>{
|
||||||
.|\n { ; }
|
.|\n { ; }
|
||||||
}
|
}
|
||||||
%%
|
%%
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user