output compiles

This commit is contained in:
anon 2024-12-12 09:42:40 +01:00
parent c3c3a4edde
commit ef95a216be
8 changed files with 253 additions and 119 deletions

View File

@ -4,7 +4,14 @@ SOURCE.d := source/
OBJECT.d := object/
CFLAGS += -Wall -Wpedantic -I${SOURCE.d}/
CPPFLAGS += ${CFLAGS}
ifeq (${DEBUG}, 1)
LFLAGS += --debug --trace
CFLAGS += -O0 -ggdb -fno-inline
CPPFLAGS += -DDEBUG=1
endif
CXXFLAGS += ${CFLAGS} -std=gnu++20
OUTPUT := jeger
@ -14,13 +21,15 @@ ${OUTPUT}: object/main.o object/generator.o object/jeger.yy.o
test:
./${OUTPUT} test/brainfuck.l 2>&1 | perl -pe "s/(\[.{1,4}\] = 128)/\x1b[90m\1\x1b[0m/g"
cat jeger.yy.c
gcc jeger.yy.c
clean:
-rm ${OBJECT.d}/*.yy.*
-rm ${OBJECT.d}/*.o
-rm ${OUTPUT}
object/%.yy.cpp: source/%.l
flex -o $@ $<
flex ${LFLAGS} -o $@ $<
object/%.o: source/%.c
${COMPILE.c} $< -o $@

View File

@ -1,4 +1,5 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
@ -6,26 +7,31 @@
#include "jeger.h"
#include "snippets.inc"
// XXX
//#define AS_SYMBOL(c) (c-'a')
#define AS_SYMBOL(c) ((int)c)
#define TOKEN_OFFSET 128 /* XXX */
// ---
rule_t * rules;
int n_rules = 0;
char * * state_names;
int n_states = 0;
int alphabet_size = 128;
rule_t * patterns;
char * definition_section_code_buffer;
char * code_section_code_buffer;
static int n_states = 0;
static inline
void put_header(FILE * f, const int alphabet_size, const int n_states, const int no_match) {
void put_header(FILE * f, const int alphabet_size, const int no_match) {
#define DEFINE_INT(m, n) fprintf(f, "#define " #m " %d\n", n);
#define DEFINE_STR(m, s) fprintf(f, "#define " #m " %s\n", s);
DEFINE_INT(ALPHABET_SIZE, alphabet_size);
DEFINE_INT(N_STATES, n_states);
DEFINE_INT(N_RULES, n_rules);
DEFINE_INT(NO_MATCH, no_match);
DEFINE_STR(BEGIN, "state = ");
DEFINE_STR(REVERSE, "(direction *= -1)");
fputs("#define AS_SYMBOL(c) c\n", /* (c-'a')\n */ f);
@ -35,15 +41,17 @@ void put_header(FILE * f, const int alphabet_size, const int n_states, const int
// DEFINE_STR(TRACE, "");
// DEFINE_STR(TRACE_DEFAULT, "");
// XXX we want no globals
fputs("int mlen;\n", f);
fputs("int direction = 1;\n", f);
fputs("\n", f);
}
static inline
void put_table(FILE * f, const int * table, char * * prefixes, int n_states, int alphabet_size) {
fputs("int table[N_STATES][ALPHABET_SIZE] = {\n", f);
for (int i = 0; i < n_states; i++) {
fputs("int table[N_RULES][ALPHABET_SIZE] = {\n", f);
for (int i = 0; i < n_rules; i++) {
fprintf(f, "\t[%d] = {", i);
for (int h = 0; h < alphabet_size; h++) {
/* NOTE: we have to awkwardly escate "\" and "'",
@ -67,13 +75,25 @@ void put_table(FILE * f, const int * table, char * * prefixes, int n_states, int
}
static
void put_state_table(FILE * f, int * states, int n) {
fprintf(f, "int state_table[%d] = {\n", n);
for (int i = 0; i < n; i++) {
if (states[i] == -1) { break; }
void put_state_table(FILE * f, int * states) {
// XXX do i even need this table?
fprintf(f, "int state_table[%d] = {\n", n_states);
for (int i = 0; i < n_states; i++) {
if (states[i] == -1) { break; } // XXX
fprintf(f, "\t[%d] = %d,\n", i, states[i]);
}
fputs("};\n\n", f);
for (int i = 0; i < n_states; i++) {
fprintf(
f,
"#define %s %d\n",
state_names[i],
states[i]
);
}
fputs("\n", f);
}
static
@ -87,54 +107,37 @@ int get_most_common_prefix(const char * pattern, char * * prefixes, int current_
return r;
}
static
int get_max_number_of_states(const rule_t * patterns) {
int r = 0;
int state_max_accumulator = -1;
for (int i = 0; patterns[i].pattern != NULL; i++) {
r += strlen(patterns[i].pattern);
if (patterns[i].state > state_max_accumulator) {
state_max_accumulator = patterns[i].state;
++r;
}
}
return r;
}
static
void make_and_put_table(FILE * f) {
// Init
n_states = get_max_number_of_states(patterns);
int states[n_states];
INITIALIZE_ARRAY(states, n_states, -1);
states[0] = 0;
char * prefixes[n_states];
INITIALIZE_ARRAY(prefixes, n_states, NULL);
char * prefixes[n_rules];
INITIALIZE_ARRAY(prefixes, n_rules, NULL);
int table[n_states][alphabet_size];
INITIALIZE_MATRIX(table, n_states, alphabet_size, TOKEN_OFFSET);
int table[n_rules][alphabet_size];
INITIALIZE_MATRIX(table, n_rules, alphabet_size, TOKEN_OFFSET);
// Construct table
int next_free_slot = 1;
for (
int pattern_index = 0;
patterns[pattern_index].pattern != NULL;
pattern_index++
int rule_index = 0;
rules[rule_index].pattern != NULL;
rule_index++
) {
const rule_t * pattern = &patterns[pattern_index];
const rule_t * rule = &rules[rule_index];
int current_state_start = states[pattern->state];
int current_state_start = states[rule->state];
if (current_state_start == -1) {
current_state_start = next_free_slot;
states[pattern->state] = next_free_slot;
states[rule->state] = next_free_slot;
++next_free_slot;
}
int most_common_prefix_state = get_most_common_prefix(
pattern->pattern,
rule->pattern,
prefixes,
current_state_start
);
@ -142,30 +145,30 @@ void make_and_put_table(FILE * f) {
prefixes[current_state_start] = strdup("");
int most_common_prefix_index = strlen(prefixes[most_common_prefix_state]);
const char * last_char = pattern->pattern + most_common_prefix_index;
const char * last_char = rule->pattern + most_common_prefix_index;
table
[most_common_prefix_state]
[AS_SYMBOL(pattern->pattern[most_common_prefix_index])]
[AS_SYMBOL(rule->pattern[most_common_prefix_index])]
= next_free_slot
;
for (
int i = most_common_prefix_index+1;
pattern->pattern[i] != '\0';
rule->pattern[i] != '\0';
i++, next_free_slot++
) {
table
[next_free_slot]
[AS_SYMBOL(pattern->pattern[i])]
[AS_SYMBOL(rule->pattern[i])]
= next_free_slot + 1
;
prefixes[next_free_slot] = strndup(pattern->pattern, i);
last_char = pattern->pattern + i;
prefixes[next_free_slot] = strndup(rule->pattern, i);
last_char = rule->pattern + i;
}
int last_position = (last_char == pattern->pattern
|| most_common_prefix_index == last_char - pattern->pattern)
int last_position = (last_char == rule->pattern
|| most_common_prefix_index == last_char - rule->pattern)
? most_common_prefix_state
: next_free_slot-1
;
@ -173,21 +176,16 @@ void make_and_put_table(FILE * f) {
table
[last_position]
[AS_SYMBOL(*last_char)]
= TOKEN_OFFSET+1 + pattern_index
= TOKEN_OFFSET+1 + rule_index
;
put_table(stderr, (int*)table, prefixes, n_states, alphabet_size);
put_table(stderr, (int*)table, prefixes, n_rules, alphabet_size);
fputs("/* ================== */\n", stderr);
}
/* `get_max_number_of_states()` most likely over estimated,
* so we cut back the table to the number of rows that were actually used.
*/
n_states = next_free_slot;
// Output
put_table(f, (int*)table, prefixes, n_states, alphabet_size);
put_state_table(f, states, n_states);
put_table(f, (int*)table, prefixes, n_rules, alphabet_size);
put_state_table(f, states);
}
static
@ -195,16 +193,29 @@ void put_functions(FILE * f) {
fputs(yy_lookup_str, f);
fputs(yy_lex_str_start, f);
for (rule_t * rule = patterns; rule->code != NULL; rule++) {
fprintf(f, "\tcase %ld: {\n" "%s\n" "\t} break;\n", rule - patterns, rule->code);
for (rule_t * rule = rules; rule->code != NULL; rule++) {
fprintf(f, "\tcase %ld: {\n" "%s\n" "\t} break;\n", rule - rules, rule->code);
}
fputs(yy_lex_str_end, f);
}
void deinit_jeger(void) {
for (int i = 0; i < n_states; i++) {
free(state_names[i]);
}
for (int i = 0; i < n_rules; i++) {
free(rules[i].pattern);
free(rules[i].code);
}
n_rules = 0;
n_states = 0;
}
void generate(const char * filename) {
FILE * f = fopen(filename, "w");
put_header(f, alphabet_size, n_states, TOKEN_OFFSET);
put_header(f, alphabet_size, TOKEN_OFFSET);
make_and_put_table(f);
fputs(definition_section_code_buffer, f);

View File

@ -1,18 +1,37 @@
#ifndef JEGER_H
#define JEGER_H
#include <stdbool.h>
// Structs
typedef struct {
int state;
char * pattern;
char * code;
} rule_t;
extern rule_t * patterns;
typedef enum {
STATIC_TABLE,
SWITCH_TABLE,
} table_t;
// Globals
extern rule_t * rules;
extern int n_rules;
extern char * * state_names;
extern int n_states;
extern int alphabet_size;
extern table_t table_type;
extern char * prefix;
extern bool do_setup_lineno;
extern char * definition_section_code_buffer;
extern char * code_section_code_buffer;
// Functions
extern void generate(const char * filename);
extern void deinit_jeger(void);
#endif

View File

@ -2,9 +2,14 @@
/* NOTE:
its technically very bad taste to implement
a minimalist lex subset with flex.
it was a devtime optimization.
maybe it should be reimplemented in pure C when possible.
*/
extern "C" {
#include "jeger.h"
}
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -13,13 +18,22 @@
#include <vector>
#include <string>
char * prefix = strdup("yy");
table_t table_type = STATIC_TABLE;
bool do_setup_lineno = false;
using namespace std;
void set_alphanet_range(char s, char e) {
typedef struct {
char * pattern;
char * code;
} rule_t2;
static void set_alphanet_range(char s, char e) {
// XXX not implemented
}
void yyerror(const char * fmt, ...) {
static void yyerror(const char * fmt, ...) {
va_list va;
va_start(va, fmt);
@ -30,29 +44,18 @@
va_end(va);
}
typedef enum {
STATIC_TABLE,
SWITCH_TABLE,
} table_t;
typedef struct {
char * pattern;
char * code;
} rule_t2;
string definition_section_code_buffer_str;
string code_section_code_buffer_str;
map<string, vector<rule_t2>> rules;
map<string, vector<rule_t2>>::iterator current_state;
string patter_buffer;
string code_buffer;
static map<string, vector<rule_t2>> rules_map;
static map<string, vector<rule_t2>>::iterator current_state;
static string patter_buffer;
static string code_buffer;
char * prefix = strdup("yy");
table_t table_type = STATIC_TABLE;
bool do_setup_lineno = false;
static int nest_counter = 0;
int nest_counter = 0;
static int source_state;
static string * source_buffer;
%}
%x IN_DEFINITION_SECTION IN_RULE_SECTION IN_CODE_SECTION
%x IN_DEFINITION_SECTION_CODE
@ -68,6 +71,7 @@ value \"[-a-z]+\"
%option yylineno
%option nodefault
%option noyywrap
%option nounput
%%
BEGIN IN_DEFINITION_SECTION;
@ -76,10 +80,6 @@ value \"[-a-z]+\"
BEGIN IN_RULE_SECTION;
}
^\%\{ {
if (definition_section_code_buffer_str != "") {
return 1;
}
BEGIN IN_DEFINITION_SECTION_CODE;
}
\%x {
@ -88,6 +88,12 @@ value \"[-a-z]+\"
\%option {
BEGIN IN_OPTION_LIST;
}
\/\* {
definition_section_code_buffer_str += yytext;
source_state = IN_DEFINITION_SECTION;
source_buffer = &definition_section_code_buffer_str;
BEGIN IN_MULTILINE_COMMENT;
}
. {
yyerror("baaaa");
}
@ -96,7 +102,7 @@ value \"[-a-z]+\"
<IN_RULE_LIST>{
{rule_name} {
rules[yytext] = {};
rules_map[yytext] = {};
}
{ws}* { ; }
\n {
@ -157,8 +163,8 @@ prefix={value} {
}
\<{rule_name}\>\{ {
string state_name(yytext+1, yyleng-3);
current_state = rules.find(state_name);
if (current_state == rules.end()) {
current_state = rules_map.find(state_name);
if (current_state == rules_map.end()) {
yyerror("State '%s' was never declared.", state_name.c_str());
}
@ -168,15 +174,21 @@ prefix={value} {
BEGIN IN_STATE_DEFINITION;
}
. {
yyerror("baaa");
yyerror("Rule section giberish (temp warning).");
}
\n { ; }
}
<IN_STATE_DEFINITION>{
\} {
BEGIN IN_RULE_SECTION;
}
. {
patter_buffer += yytext;
}
\\. {
patter_buffer += yytext + 1;
}
{wsnl}+\{ {
BEGIN IN_CODE;
nest_counter = 0;
@ -186,6 +198,7 @@ prefix={value} {
<IN_CODE>{
\{ {
code_buffer += yytext;
++nest_counter;
}
\} {
@ -196,16 +209,26 @@ prefix={value} {
.code = strdup(code_buffer.c_str()),
});
BEGIN IN_RULE_SECTION;
patter_buffer = "";
code_buffer = "";
BEGIN IN_STATE_DEFINITION;
} else {
code_buffer += yytext;
}
}
\" {
code_buffer += yytext;
BEGIN IN_STRING;
}
\/\/ {
code_buffer += yytext;
BEGIN IN_COMMENT;
}
\/\* {
code_buffer += yytext;
source_state = IN_CODE;
source_buffer = &code_buffer;
BEGIN IN_MULTILINE_COMMENT;
}
.|\n {
@ -214,20 +237,35 @@ prefix={value} {
}
<IN_STRING>{
\\\\ { ; }
\\\" { ; }
\" { BEGIN IN_CODE; }
.|\n { ; } /* XXX we are eating strings */
\" {
code_buffer += yytext;
BEGIN IN_CODE;
}
\\\\ |
\\\" |
.|\n {
code_buffer += yytext;
}
}
<IN_COMMENT>{
. { ; }
\n { BEGIN IN_CODE; }
. {
code_buffer += yytext;
}
\n {
code_buffer += yytext;
BEGIN IN_CODE;
}
}
<IN_MULTILINE_COMMENT>{
.|\n { ; }
\*\/ { BEGIN IN_CODE; }
.|\n {
*source_buffer += yytext;
}
\*\/ {
*source_buffer += yytext;
BEGIN source_state;
}
}
<IN_CODE_SECTION>{
@ -238,12 +276,13 @@ prefix={value} {
%%
#if DEBUG == 1
static
void dump_parse_results(void) {
puts(definition_section_code_buffer_str.c_str());
puts("----------");
for (const auto &i : rules) {
for (const auto &i : rules_map) {
printf("%s:\n", i.first.c_str());
for (const auto &h : i.second) {
printf("\tpattern:\n%s\n" "\tcode:\n%s\n", h.pattern, h.code);
@ -255,35 +294,75 @@ void dump_parse_results(void) {
puts(code_section_code_buffer_str.c_str());
}
static
void dump_rules(void) {
for (rule_t * rule = rules; rule->pattern != NULL; rule++) {
printf("{ .state = %d, .pattern = %s, }\n",
rule->state,
rule->pattern
);
}
puts("{ .state = 0, .pattern = NULL, }");
}
#else
static inline void dump_parse_results(void) { ; }
static inline void dump_rules(void) { ; }
#endif
extern "C"
int parse(const char * filename) {
// Init
int r = 0;
FILE * f = fopen(filename, "r");
if (!f) { return 2; }
yyin = f;
// Parse
r = yylex();
if (r) { return r; }
dump_parse_results();
// Set up globals
n_rules = 0;
for (const auto &rule_it : rules_map) {
n_rules += rule_it.second.size();
}
patterns = (rule_t*)malloc(sizeof(rule_t)*(rules.size()+1));
rules = (rule_t*)malloc(sizeof(rule_t)*(n_rules+1));
rules[n_rules] = (rule_t) { 0, NULL, NULL };
int i = 0;
for (const auto &rule_it : rules) {
int index = 0;
int state = 0;
for (const auto &rule_it : rules_map) {
for (const auto &rule : rule_it.second) {
patterns[i++] = (rule_t) {
.state = i,
rules[index++] = (rule_t) {
.state = state,
.pattern = rule.pattern,
.code = rule.code,
};
}
++state;
}
n_states = rules_map.size();
state_names = (char**)malloc(sizeof(char*) * n_states);
int i = 0;
for (const auto &r : rules_map) {
state_names[i++] = strdup(r.first.c_str());
}
patterns[rules.size()] = (rule_t) { 0, NULL, NULL };
definition_section_code_buffer = strdup(definition_section_code_buffer_str.c_str());
code_section_code_buffer = strdup(code_section_code_buffer_str.c_str());
// Debug
dump_parse_results();
dump_rules();
return r;
}
extern "C"
int deinit_parser(void) {
yylex_destroy();
return 0;
}

View File

@ -16,5 +16,8 @@ signed main(const int argc, char * argv[]) {
parse(argv[1]);
generate("jeger.yy.c");
deinit_parser();
deinit_jeger();
return 0;
}

View File

@ -2,5 +2,6 @@
#define PARSE_H
extern int parse(const char * filename);
extern int deinit_parser(void);
#endif

View File

@ -20,7 +20,11 @@ int mlookup(const char * s, int state) {\n\
const char * yy_lex_str_start = "\n\
int yylex(const char * s) {\n\
int state = 0;\n\
for (const char * ss = s; *ss != '\\0'; ss += (mlen ? mlen : 1)) {\n\
for (\n\
const char * ss = s;\n\
*ss != '\\0';\n\
ss += ((mlen ? mlen : 1) * direction)\n\
) {\n\
int match = mlookup(ss, state_table[state]);\n\
if (match != NO_MATCH) {\n\
\n\

View File

@ -1,17 +1,19 @@
/* @BAKE
jeger --debug --trace -o $*.c $@
#jeger --debug --trace -o $*.c $@
jeger $@
gcc -o $* $*.c -ggdb
@STOP
*/
%{
#include <stdio.h>
char data[30000];
char * data_ptr = data;
%}
%x IN_SKIP_FORWARD IN_SKIP_BACKWARD
%option noyywrap nodefault
%x INITIAL IN_SKIP_FORWARD IN_SKIP_BACKWARD
%%
<INITIAL>{
\> { ++data_ptr; }
\< { --data_ptr; }
\+ { ++(*data_ptr); }
@ -29,7 +31,7 @@
BEGIN IN_SKIP_BACKWARD;
}
}
.|\n { ; }
}
<IN_SKIP_FORWARD>{
\] { BEGIN INITIAL; }
@ -39,7 +41,7 @@
\[ { REVERSE; BEGIN INITIAL; }
}
<IN_SKIP_FORWARD,IN_SKIP_BACKWARD>{
<INITIAL,IN_SKIP_FORWARD,IN_SKIP_BACKWARD>{
.|\n { ; }
}
%%
@ -50,13 +52,19 @@ signed main(int argc, char * argv[]) {
return 1;
}
yyin = fopen(argv[1], "r");
if (!yyin) {
return 2;
}
FILE * yyin = fopen(argv[1], "r");
if (!yyin) { return 2; }
yylex();
yylex_destroy();
fseek(yyin, 0, SEEK_END);
int yylen = ftell(yyin);
rewind(yyin);
char yystr[yylen+1];
yystr[yylen] = '\00';
fread(yystr, yylen, sizeof(char), yyin);
yylex(yystr);
fclose(yyin);
return 0;
}