attempt to handle multi-state rules

This commit is contained in:
anon 2024-12-13 17:31:21 +01:00
parent abe69a7221
commit bc485a8b82
9 changed files with 122 additions and 67 deletions

View File

@ -19,7 +19,7 @@ ${OUTPUT}: object/main.o object/opts.o object/generator.o object/jeger.yy.o
${LINK.cpp} -o ${OUTPUT} $^
test:
./${OUTPUT} -t -d test/brainfuck.l 2>&1 | tool/hl_table
./${OUTPUT} -d -t test/brainfuck.l 2>&1 | tool/hl_table
cat jeger.yy.c | tool/hl_table
gcc -o bf.out jeger.yy.c -ggdb
./bf.out test/hw.bf

View File

@ -37,8 +37,14 @@ void put_header(FILE * f, const int alphabet_size, const int no_match) {
fputs("#define AS_SYMBOL(c) c\n", /* (c-'a')\n */ f);
if (do_trace) {
DEFINE_STR(TRACE, "fprintf(stderr, \"--accepting rule at line %d (\\\"%.*s\\\")\\n\", __LINE__, mlen, ss);");
DEFINE_STR(TRACE_DEFAULT, "fprintf(stderr, \"--accepting default rule (\"%c\")\\n\", *ss);");
DEFINE_STR(
TRACE(l),
"fprintf(stderr, \"--accepting rule at line %d (\\\"%.*s\\\")\\n\", l, mlen, ss);"
);
DEFINE_STR(
TRACE_DEFAULT,
"fprintf(stderr, \"--accepting default rule (\\\"%c\\\")\\n\", *ss);"
);
} else {
DEFINE_STR(TRACE, "");
DEFINE_STR(TRACE_DEFAULT, "");
@ -79,14 +85,6 @@ void put_table(FILE * f, const int * table, char * * prefixes, int n_cases, int
static
void put_state_table(FILE * f, int * states) {
// XXX do i even need this table?
fprintf(f, "int state_table[%d] = {\n", n_states);
for (int i = 0; i < n_states; i++) {
if (states[i] == -1) { break; } // XXX
fprintf(f, "\t[%d] = %d,\n", i, states[i]);
}
fputs("};\n\n", f);
for (int i = 0; i < n_states; i++) {
fprintf(
f,
@ -182,8 +180,10 @@ void make_and_put_table(FILE * f) {
= TOKEN_OFFSET+1 + rule_index
;
put_table(stderr, (int*)table, prefixes, n_rules, alphabet_size);
fputs("/* ================== */\n", stderr);
if (do_debug > 1) {
put_table(stderr, (int*)table, prefixes, n_rules, alphabet_size);
fputs("/* ================== */\n", stderr);
}
}
const int n_cases = next_free_slot;
@ -198,14 +198,24 @@ void put_functions(FILE * f) {
fputs(yy_lookup_str, f);
fputs(yy_lex_str_start, f);
fprintf(
f,
"\tcase %d: {\n"
"TRACE_DEFAULT;\n"
"\t} break;\n",
TOKEN_OFFSET
);
for (rule_t * rule = rules; rule->code != NULL; rule++) {
fprintf(
f,
"\tcase %ld: {\n"
"TRACE;\n"
"TRACE(%d);\n"
"%s\n"
"\t} break;\n",
TOKEN_OFFSET + 1 + (rule - rules), rule->code);
TOKEN_OFFSET + 1 + (rule - rules),
rule->line,
rule->code
);
}
fputs(yy_lex_str_end, f);
}

View File

@ -8,6 +8,7 @@ typedef struct {
int state;
char * pattern;
char * code;
int line;
} rule_t;
typedef enum {

View File

@ -25,11 +25,6 @@
using namespace std;
typedef struct {
char * pattern;
char * code;
} rule_t2;
static void set_alphanet_range(char s, char e) {
// XXX not implemented
}
@ -45,23 +40,23 @@
va_end(va);
}
string definition_section_code_buffer_str;
string code_section_code_buffer_str;
static map<string, vector<rule_t2>> rules_map;
static map<string, vector<rule_t2>>::iterator current_state;
static string patter_buffer;
static string code_buffer;
static int nest_counter = 0;
static int source_state;
static string * source_buffer;
static string definition_section_code_buffer_str;
static string code_section_code_buffer_str;
static map<string, vector<rule_t>> rules_map;
static vector<map<string, vector<rule_t>>::iterator> current_states;
static string patter_buffer;
static string code_buffer;
static int line_buffer;
%}
%x IN_DEFINITION_SECTION IN_RULE_SECTION IN_CODE_SECTION
%x IN_DEFINITION_SECTION_CODE
%x IN_RULE_LIST IN_OPTION_LIST
%x IN_STATE_DEFINITION
%x IN_STATE_HEAD IN_STATE_DEFINITION
%x IN_CODE IN_STRING IN_COMMENT IN_MULTILINE_COMMENT
rule_name [A-Z_][A-Z0-9_]*
@ -91,12 +86,12 @@ value \"[-a-z]+\"
}
\/\* {
definition_section_code_buffer_str += yytext;
source_state = IN_DEFINITION_SECTION;
source_state = IN_DEFINITION_SECTION;
source_buffer = &definition_section_code_buffer_str;
BEGIN IN_MULTILINE_COMMENT;
}
. {
yyerror("baaaa");
yyerror("Unknown character encountered inside definition section ('%c') (temp warning)", yytext[0]);
}
\n { ; }
}
@ -162,26 +157,41 @@ prefix={value} {
\%\% {
BEGIN IN_CODE_SECTION;
}
\<{rule_name}\>\{ {
string state_name(yytext+1, yyleng-3);
current_state = rules_map.find(state_name);
if (current_state == rules_map.end()) {
yyerror("State '%s' was never declared.", state_name.c_str());
}
patter_buffer = "";
code_buffer = "";
BEGIN IN_STATE_DEFINITION;
\< {
BEGIN IN_STATE_HEAD;
}
. {
yyerror("Rule section giberish (temp warning).");
yyerror("Rule section giberish (temp warning) ('%s').", yytext); // XXX
}
\n { ; }
}
<IN_STATE_HEAD>{
{rule_name} {
string state_name(yytext);
map<string, vector<rule_t>>::iterator current_state = rules_map.find(state_name);
if (current_state == rules_map.end()) {
yyerror("State '%s' was never declared.", state_name.c_str());
} else {
current_states.push_back(current_state);
}
}
\>\{ {
patter_buffer = "";
code_buffer = "";
BEGIN IN_STATE_DEFINITION;
}
, |
{wsnl} { ; }
. {
yyerror("Unknown character inside state head (%c).", yytext[0]);
}
}
<IN_STATE_DEFINITION>{
\} {
current_states.clear();
BEGIN IN_RULE_SECTION;
}
. {
@ -192,6 +202,7 @@ prefix={value} {
}
{wsnl}+\{ {
BEGIN IN_CODE;
line_buffer = yylineno;
nest_counter = 0;
}
\n { ; }
@ -205,13 +216,17 @@ prefix={value} {
\} {
--nest_counter;
if (nest_counter == -1) {
current_state->second.push_back((rule_t2) {
.pattern = strdup(patter_buffer.c_str()),
.code = strdup(code_buffer.c_str()),
});
for (const auto &current_state : current_states) {
current_state->second.push_back((rule_t) {
.state = -1, // NOTE: initialized elsewhere
.pattern = strdup(patter_buffer.c_str()),
.code = strdup(code_buffer.c_str()),
.line = line_buffer,
});
}
patter_buffer = "";
code_buffer = "";
code_buffer = "";
BEGIN IN_STATE_DEFINITION;
} else {
@ -279,30 +294,35 @@ prefix={value} {
static
void dump_parse_results(void) {
puts(definition_section_code_buffer_str.c_str());
puts("----------");
fputs("--- Definition section code buffer ---\n", stderr);
fputs(definition_section_code_buffer_str.c_str(), stderr);
fputs("\n----------\n", stderr);
fputs("--- Patterns ---\n", stderr);
for (const auto &i : rules_map) {
printf("%s:\n", i.first.c_str());
fprintf(stderr, "%s:\n", i.first.c_str());
for (const auto &h : i.second) {
printf("\tpattern:\n%s\n" "\tcode:\n%s\n", h.pattern, h.code);
fprintf(stderr, "\tpattern:\n%s\n" "\tcode:\n%s\n", h.pattern, h.code);
}
puts("--");
fputs("--\n", stderr);
}
puts("----------");
puts(code_section_code_buffer_str.c_str());
fputs("--- Code section code buffer ---\n", stderr);
fputs(code_section_code_buffer_str.c_str(), stderr);
fputs("\n----------\n", stderr);
}
static
void dump_rules(void) {
for (rule_t * rule = rules; rule->pattern != NULL; rule++) {
printf("{ .state = %d, .pattern = %s, }\n",
fprintf(
stderr,
"{ .state = %d, .pattern = `%s` }\n",
rule->state,
rule->pattern
);
}
puts("{ .state = 0, .pattern = NULL, }");
fputs("{ .state = 0, .pattern = NULL }\n", stderr);
}
extern "C"
@ -335,6 +355,7 @@ int parse(const char * filename) {
.state = state,
.pattern = rule.pattern,
.code = rule.code,
.line = rule.line,
};
}
++state;

View File

@ -7,7 +7,7 @@
static
char * to_output_name(const char * filename) {
return strdup("jeger.yy.c");
return strdup("jeger.yy.c"); // XXX temp
}
signed main(const int argc, const char * argv[]) {
@ -28,6 +28,7 @@ signed main(const int argc, const char * argv[]) {
deinit_parser();
deinit_jeger();
deinit_opts();
return 0;
}

View File

@ -7,7 +7,7 @@
#include "jeger.h"
bool do_trace = false;
bool do_debug = false;
int do_debug = 0;
char * output_filename = NULL;
char * input_filename = NULL;
@ -47,7 +47,7 @@ int parse_arguments(const int argc, const char * * argv) {
do_trace = true;
} break;
case 'd': {
do_debug = true;
++do_debug;
} break;
default: {
usage();
@ -61,3 +61,8 @@ int parse_arguments(const int argc, const char * * argv) {
return 0;
}
void deinit_opts(void) {
free(output_filename);
free(input_filename);
}

View File

@ -4,11 +4,12 @@
#include <stdbool.h>
extern bool do_trace;
extern bool do_debug;
extern int do_debug; // NOTE: has multiple levels
extern char * output_filename;
extern char * input_filename;
extern void usage(void);
extern int parse_arguments(const int argc, const char * * argv);
extern void deinit_opts(void);
#endif

View File

@ -25,7 +25,7 @@ int yylex(const char * s) {\n\
*ss != '\\0';\n\
ss += ((mlen ? mlen : 1) * direction)\n\
) {\n\
int match = mlookup(ss, state_table[state]);\n\
int match = mlookup(ss, state);\n\
switch (match) {\n\
";

View File

@ -8,6 +8,7 @@
#include <stdio.h>
char data[30000];
char * data_ptr = data;
int nesting = 0;
%}
%x INITIAL IN_SKIP_FORWARD IN_SKIP_BACKWARD
@ -26,7 +27,7 @@
}
}
\] {
if (!*data_ptr) {
if (*data_ptr) {
REVERSE;
BEGIN IN_SKIP_BACKWARD;
}
@ -34,15 +35,30 @@
}
<IN_SKIP_FORWARD>{
\] { BEGIN INITIAL; }
\[ { ++nesting; }
\] {
if (!nesting) {
BEGIN INITIAL;
} else {
--nesting;
}
}
}
<IN_SKIP_BACKWARD>{
\[ { REVERSE; BEGIN INITIAL; }
\] { ++nesting; }
\[ {
if (!nesting) {
REVERSE;
BEGIN INITIAL;
} else {
--nesting;
}
}
}
<INITIAL,IN_SKIP_FORWARD,IN_SKIP_BACKWARD>{
.|\n { ; }
.|\n { ; }
}
%%