output compiles
This commit is contained in:
parent
c3c3a4edde
commit
ef95a216be
13
Makefile
13
Makefile
@ -4,7 +4,14 @@ SOURCE.d := source/
|
|||||||
OBJECT.d := object/
|
OBJECT.d := object/
|
||||||
|
|
||||||
CFLAGS += -Wall -Wpedantic -I${SOURCE.d}/
|
CFLAGS += -Wall -Wpedantic -I${SOURCE.d}/
|
||||||
CPPFLAGS += ${CFLAGS}
|
|
||||||
|
ifeq (${DEBUG}, 1)
|
||||||
|
LFLAGS += --debug --trace
|
||||||
|
CFLAGS += -O0 -ggdb -fno-inline
|
||||||
|
CPPFLAGS += -DDEBUG=1
|
||||||
|
endif
|
||||||
|
|
||||||
|
CXXFLAGS += ${CFLAGS} -std=gnu++20
|
||||||
|
|
||||||
OUTPUT := jeger
|
OUTPUT := jeger
|
||||||
|
|
||||||
@ -14,13 +21,15 @@ ${OUTPUT}: object/main.o object/generator.o object/jeger.yy.o
|
|||||||
test:
|
test:
|
||||||
./${OUTPUT} test/brainfuck.l 2>&1 | perl -pe "s/(\[.{1,4}\] = 128)/\x1b[90m\1\x1b[0m/g"
|
./${OUTPUT} test/brainfuck.l 2>&1 | perl -pe "s/(\[.{1,4}\] = 128)/\x1b[90m\1\x1b[0m/g"
|
||||||
cat jeger.yy.c
|
cat jeger.yy.c
|
||||||
|
gcc jeger.yy.c
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
|
-rm ${OBJECT.d}/*.yy.*
|
||||||
-rm ${OBJECT.d}/*.o
|
-rm ${OBJECT.d}/*.o
|
||||||
-rm ${OUTPUT}
|
-rm ${OUTPUT}
|
||||||
|
|
||||||
object/%.yy.cpp: source/%.l
|
object/%.yy.cpp: source/%.l
|
||||||
flex -o $@ $<
|
flex ${LFLAGS} -o $@ $<
|
||||||
|
|
||||||
object/%.o: source/%.c
|
object/%.o: source/%.c
|
||||||
${COMPILE.c} $< -o $@
|
${COMPILE.c} $< -o $@
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
|
|
||||||
@ -6,26 +7,31 @@
|
|||||||
#include "jeger.h"
|
#include "jeger.h"
|
||||||
#include "snippets.inc"
|
#include "snippets.inc"
|
||||||
|
|
||||||
|
// XXX
|
||||||
//#define AS_SYMBOL(c) (c-'a')
|
//#define AS_SYMBOL(c) (c-'a')
|
||||||
#define AS_SYMBOL(c) ((int)c)
|
#define AS_SYMBOL(c) ((int)c)
|
||||||
#define TOKEN_OFFSET 128 /* XXX */
|
#define TOKEN_OFFSET 128 /* XXX */
|
||||||
|
// ---
|
||||||
|
|
||||||
|
rule_t * rules;
|
||||||
|
int n_rules = 0;
|
||||||
|
char * * state_names;
|
||||||
|
int n_states = 0;
|
||||||
int alphabet_size = 128;
|
int alphabet_size = 128;
|
||||||
rule_t * patterns;
|
|
||||||
|
|
||||||
char * definition_section_code_buffer;
|
char * definition_section_code_buffer;
|
||||||
char * code_section_code_buffer;
|
char * code_section_code_buffer;
|
||||||
|
|
||||||
static int n_states = 0;
|
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
void put_header(FILE * f, const int alphabet_size, const int n_states, const int no_match) {
|
void put_header(FILE * f, const int alphabet_size, const int no_match) {
|
||||||
#define DEFINE_INT(m, n) fprintf(f, "#define " #m " %d\n", n);
|
#define DEFINE_INT(m, n) fprintf(f, "#define " #m " %d\n", n);
|
||||||
#define DEFINE_STR(m, s) fprintf(f, "#define " #m " %s\n", s);
|
#define DEFINE_STR(m, s) fprintf(f, "#define " #m " %s\n", s);
|
||||||
|
|
||||||
DEFINE_INT(ALPHABET_SIZE, alphabet_size);
|
DEFINE_INT(ALPHABET_SIZE, alphabet_size);
|
||||||
DEFINE_INT(N_STATES, n_states);
|
DEFINE_INT(N_RULES, n_rules);
|
||||||
DEFINE_INT(NO_MATCH, no_match);
|
DEFINE_INT(NO_MATCH, no_match);
|
||||||
|
DEFINE_STR(BEGIN, "state = ");
|
||||||
DEFINE_STR(REVERSE, "(direction *= -1)");
|
DEFINE_STR(REVERSE, "(direction *= -1)");
|
||||||
fputs("#define AS_SYMBOL(c) c\n", /* (c-'a')\n */ f);
|
fputs("#define AS_SYMBOL(c) c\n", /* (c-'a')\n */ f);
|
||||||
|
|
||||||
@ -35,15 +41,17 @@ void put_header(FILE * f, const int alphabet_size, const int n_states, const int
|
|||||||
// DEFINE_STR(TRACE, "");
|
// DEFINE_STR(TRACE, "");
|
||||||
// DEFINE_STR(TRACE_DEFAULT, "");
|
// DEFINE_STR(TRACE_DEFAULT, "");
|
||||||
|
|
||||||
|
// XXX we want no globals
|
||||||
fputs("int mlen;\n", f);
|
fputs("int mlen;\n", f);
|
||||||
|
fputs("int direction = 1;\n", f);
|
||||||
|
|
||||||
fputs("\n", f);
|
fputs("\n", f);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline
|
static inline
|
||||||
void put_table(FILE * f, const int * table, char * * prefixes, int n_states, int alphabet_size) {
|
void put_table(FILE * f, const int * table, char * * prefixes, int n_states, int alphabet_size) {
|
||||||
fputs("int table[N_STATES][ALPHABET_SIZE] = {\n", f);
|
fputs("int table[N_RULES][ALPHABET_SIZE] = {\n", f);
|
||||||
for (int i = 0; i < n_states; i++) {
|
for (int i = 0; i < n_rules; i++) {
|
||||||
fprintf(f, "\t[%d] = {", i);
|
fprintf(f, "\t[%d] = {", i);
|
||||||
for (int h = 0; h < alphabet_size; h++) {
|
for (int h = 0; h < alphabet_size; h++) {
|
||||||
/* NOTE: we have to awkwardly escate "\" and "'",
|
/* NOTE: we have to awkwardly escate "\" and "'",
|
||||||
@ -67,13 +75,25 @@ void put_table(FILE * f, const int * table, char * * prefixes, int n_states, int
|
|||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void put_state_table(FILE * f, int * states, int n) {
|
void put_state_table(FILE * f, int * states) {
|
||||||
fprintf(f, "int state_table[%d] = {\n", n);
|
// XXX do i even need this table?
|
||||||
for (int i = 0; i < n; i++) {
|
fprintf(f, "int state_table[%d] = {\n", n_states);
|
||||||
if (states[i] == -1) { break; }
|
for (int i = 0; i < n_states; i++) {
|
||||||
|
if (states[i] == -1) { break; } // XXX
|
||||||
fprintf(f, "\t[%d] = %d,\n", i, states[i]);
|
fprintf(f, "\t[%d] = %d,\n", i, states[i]);
|
||||||
}
|
}
|
||||||
fputs("};\n\n", f);
|
fputs("};\n\n", f);
|
||||||
|
|
||||||
|
for (int i = 0; i < n_states; i++) {
|
||||||
|
fprintf(
|
||||||
|
f,
|
||||||
|
"#define %s %d\n",
|
||||||
|
state_names[i],
|
||||||
|
states[i]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fputs("\n", f);
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
@ -87,54 +107,37 @@ int get_most_common_prefix(const char * pattern, char * * prefixes, int current_
|
|||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
|
||||||
int get_max_number_of_states(const rule_t * patterns) {
|
|
||||||
int r = 0;
|
|
||||||
int state_max_accumulator = -1;
|
|
||||||
for (int i = 0; patterns[i].pattern != NULL; i++) {
|
|
||||||
r += strlen(patterns[i].pattern);
|
|
||||||
if (patterns[i].state > state_max_accumulator) {
|
|
||||||
state_max_accumulator = patterns[i].state;
|
|
||||||
++r;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
static
|
static
|
||||||
void make_and_put_table(FILE * f) {
|
void make_and_put_table(FILE * f) {
|
||||||
// Init
|
// Init
|
||||||
n_states = get_max_number_of_states(patterns);
|
|
||||||
|
|
||||||
int states[n_states];
|
int states[n_states];
|
||||||
INITIALIZE_ARRAY(states, n_states, -1);
|
INITIALIZE_ARRAY(states, n_states, -1);
|
||||||
states[0] = 0;
|
states[0] = 0;
|
||||||
|
|
||||||
char * prefixes[n_states];
|
char * prefixes[n_rules];
|
||||||
INITIALIZE_ARRAY(prefixes, n_states, NULL);
|
INITIALIZE_ARRAY(prefixes, n_rules, NULL);
|
||||||
|
|
||||||
int table[n_states][alphabet_size];
|
int table[n_rules][alphabet_size];
|
||||||
INITIALIZE_MATRIX(table, n_states, alphabet_size, TOKEN_OFFSET);
|
INITIALIZE_MATRIX(table, n_rules, alphabet_size, TOKEN_OFFSET);
|
||||||
|
|
||||||
// Construct table
|
// Construct table
|
||||||
int next_free_slot = 1;
|
int next_free_slot = 1;
|
||||||
for (
|
for (
|
||||||
int pattern_index = 0;
|
int rule_index = 0;
|
||||||
patterns[pattern_index].pattern != NULL;
|
rules[rule_index].pattern != NULL;
|
||||||
pattern_index++
|
rule_index++
|
||||||
) {
|
) {
|
||||||
const rule_t * pattern = &patterns[pattern_index];
|
const rule_t * rule = &rules[rule_index];
|
||||||
|
|
||||||
int current_state_start = states[pattern->state];
|
int current_state_start = states[rule->state];
|
||||||
if (current_state_start == -1) {
|
if (current_state_start == -1) {
|
||||||
current_state_start = next_free_slot;
|
current_state_start = next_free_slot;
|
||||||
states[pattern->state] = next_free_slot;
|
states[rule->state] = next_free_slot;
|
||||||
++next_free_slot;
|
++next_free_slot;
|
||||||
}
|
}
|
||||||
|
|
||||||
int most_common_prefix_state = get_most_common_prefix(
|
int most_common_prefix_state = get_most_common_prefix(
|
||||||
pattern->pattern,
|
rule->pattern,
|
||||||
prefixes,
|
prefixes,
|
||||||
current_state_start
|
current_state_start
|
||||||
);
|
);
|
||||||
@ -142,30 +145,30 @@ void make_and_put_table(FILE * f) {
|
|||||||
prefixes[current_state_start] = strdup("");
|
prefixes[current_state_start] = strdup("");
|
||||||
|
|
||||||
int most_common_prefix_index = strlen(prefixes[most_common_prefix_state]);
|
int most_common_prefix_index = strlen(prefixes[most_common_prefix_state]);
|
||||||
const char * last_char = pattern->pattern + most_common_prefix_index;
|
const char * last_char = rule->pattern + most_common_prefix_index;
|
||||||
|
|
||||||
table
|
table
|
||||||
[most_common_prefix_state]
|
[most_common_prefix_state]
|
||||||
[AS_SYMBOL(pattern->pattern[most_common_prefix_index])]
|
[AS_SYMBOL(rule->pattern[most_common_prefix_index])]
|
||||||
= next_free_slot
|
= next_free_slot
|
||||||
;
|
;
|
||||||
|
|
||||||
for (
|
for (
|
||||||
int i = most_common_prefix_index+1;
|
int i = most_common_prefix_index+1;
|
||||||
pattern->pattern[i] != '\0';
|
rule->pattern[i] != '\0';
|
||||||
i++, next_free_slot++
|
i++, next_free_slot++
|
||||||
) {
|
) {
|
||||||
table
|
table
|
||||||
[next_free_slot]
|
[next_free_slot]
|
||||||
[AS_SYMBOL(pattern->pattern[i])]
|
[AS_SYMBOL(rule->pattern[i])]
|
||||||
= next_free_slot + 1
|
= next_free_slot + 1
|
||||||
;
|
;
|
||||||
prefixes[next_free_slot] = strndup(pattern->pattern, i);
|
prefixes[next_free_slot] = strndup(rule->pattern, i);
|
||||||
last_char = pattern->pattern + i;
|
last_char = rule->pattern + i;
|
||||||
}
|
}
|
||||||
|
|
||||||
int last_position = (last_char == pattern->pattern
|
int last_position = (last_char == rule->pattern
|
||||||
|| most_common_prefix_index == last_char - pattern->pattern)
|
|| most_common_prefix_index == last_char - rule->pattern)
|
||||||
? most_common_prefix_state
|
? most_common_prefix_state
|
||||||
: next_free_slot-1
|
: next_free_slot-1
|
||||||
;
|
;
|
||||||
@ -173,21 +176,16 @@ void make_and_put_table(FILE * f) {
|
|||||||
table
|
table
|
||||||
[last_position]
|
[last_position]
|
||||||
[AS_SYMBOL(*last_char)]
|
[AS_SYMBOL(*last_char)]
|
||||||
= TOKEN_OFFSET+1 + pattern_index
|
= TOKEN_OFFSET+1 + rule_index
|
||||||
;
|
;
|
||||||
|
|
||||||
put_table(stderr, (int*)table, prefixes, n_states, alphabet_size);
|
put_table(stderr, (int*)table, prefixes, n_rules, alphabet_size);
|
||||||
fputs("/* ================== */\n", stderr);
|
fputs("/* ================== */\n", stderr);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* `get_max_number_of_states()` most likely over estimated,
|
|
||||||
* so we cut back the table to the number of rows that were actually used.
|
|
||||||
*/
|
|
||||||
n_states = next_free_slot;
|
|
||||||
|
|
||||||
// Output
|
// Output
|
||||||
put_table(f, (int*)table, prefixes, n_states, alphabet_size);
|
put_table(f, (int*)table, prefixes, n_rules, alphabet_size);
|
||||||
put_state_table(f, states, n_states);
|
put_state_table(f, states);
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
@ -195,16 +193,29 @@ void put_functions(FILE * f) {
|
|||||||
fputs(yy_lookup_str, f);
|
fputs(yy_lookup_str, f);
|
||||||
|
|
||||||
fputs(yy_lex_str_start, f);
|
fputs(yy_lex_str_start, f);
|
||||||
for (rule_t * rule = patterns; rule->code != NULL; rule++) {
|
for (rule_t * rule = rules; rule->code != NULL; rule++) {
|
||||||
fprintf(f, "\tcase %ld: {\n" "%s\n" "\t} break;\n", rule - patterns, rule->code);
|
fprintf(f, "\tcase %ld: {\n" "%s\n" "\t} break;\n", rule - rules, rule->code);
|
||||||
}
|
}
|
||||||
fputs(yy_lex_str_end, f);
|
fputs(yy_lex_str_end, f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void deinit_jeger(void) {
|
||||||
|
for (int i = 0; i < n_states; i++) {
|
||||||
|
free(state_names[i]);
|
||||||
|
}
|
||||||
|
for (int i = 0; i < n_rules; i++) {
|
||||||
|
free(rules[i].pattern);
|
||||||
|
free(rules[i].code);
|
||||||
|
}
|
||||||
|
|
||||||
|
n_rules = 0;
|
||||||
|
n_states = 0;
|
||||||
|
}
|
||||||
|
|
||||||
void generate(const char * filename) {
|
void generate(const char * filename) {
|
||||||
FILE * f = fopen(filename, "w");
|
FILE * f = fopen(filename, "w");
|
||||||
|
|
||||||
put_header(f, alphabet_size, n_states, TOKEN_OFFSET);
|
put_header(f, alphabet_size, TOKEN_OFFSET);
|
||||||
make_and_put_table(f);
|
make_and_put_table(f);
|
||||||
|
|
||||||
fputs(definition_section_code_buffer, f);
|
fputs(definition_section_code_buffer, f);
|
||||||
|
@ -1,18 +1,37 @@
|
|||||||
#ifndef JEGER_H
|
#ifndef JEGER_H
|
||||||
#define JEGER_H
|
#define JEGER_H
|
||||||
|
|
||||||
|
#include <stdbool.h>
|
||||||
|
|
||||||
|
// Structs
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int state;
|
int state;
|
||||||
char * pattern;
|
char * pattern;
|
||||||
char * code;
|
char * code;
|
||||||
} rule_t;
|
} rule_t;
|
||||||
|
|
||||||
extern rule_t * patterns;
|
typedef enum {
|
||||||
|
STATIC_TABLE,
|
||||||
|
SWITCH_TABLE,
|
||||||
|
} table_t;
|
||||||
|
|
||||||
|
// Globals
|
||||||
|
extern rule_t * rules;
|
||||||
|
extern int n_rules;
|
||||||
|
extern char * * state_names;
|
||||||
|
extern int n_states;
|
||||||
extern int alphabet_size;
|
extern int alphabet_size;
|
||||||
|
|
||||||
|
extern table_t table_type;
|
||||||
|
|
||||||
|
extern char * prefix;
|
||||||
|
extern bool do_setup_lineno;
|
||||||
|
|
||||||
extern char * definition_section_code_buffer;
|
extern char * definition_section_code_buffer;
|
||||||
extern char * code_section_code_buffer;
|
extern char * code_section_code_buffer;
|
||||||
|
|
||||||
|
// Functions
|
||||||
extern void generate(const char * filename);
|
extern void generate(const char * filename);
|
||||||
|
extern void deinit_jeger(void);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
169
source/jeger.l
169
source/jeger.l
@ -2,9 +2,14 @@
|
|||||||
/* NOTE:
|
/* NOTE:
|
||||||
its technically very bad taste to implement
|
its technically very bad taste to implement
|
||||||
a minimalist lex subset with flex.
|
a minimalist lex subset with flex.
|
||||||
|
it was a devtime optimization.
|
||||||
maybe it should be reimplemented in pure C when possible.
|
maybe it should be reimplemented in pure C when possible.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
#include "jeger.h"
|
#include "jeger.h"
|
||||||
|
}
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
@ -13,13 +18,22 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
char * prefix = strdup("yy");
|
||||||
|
table_t table_type = STATIC_TABLE;
|
||||||
|
bool do_setup_lineno = false;
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
void set_alphanet_range(char s, char e) {
|
typedef struct {
|
||||||
|
char * pattern;
|
||||||
|
char * code;
|
||||||
|
} rule_t2;
|
||||||
|
|
||||||
|
static void set_alphanet_range(char s, char e) {
|
||||||
// XXX not implemented
|
// XXX not implemented
|
||||||
}
|
}
|
||||||
|
|
||||||
void yyerror(const char * fmt, ...) {
|
static void yyerror(const char * fmt, ...) {
|
||||||
va_list va;
|
va_list va;
|
||||||
va_start(va, fmt);
|
va_start(va, fmt);
|
||||||
|
|
||||||
@ -30,29 +44,18 @@
|
|||||||
va_end(va);
|
va_end(va);
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
STATIC_TABLE,
|
|
||||||
SWITCH_TABLE,
|
|
||||||
} table_t;
|
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
char * pattern;
|
|
||||||
char * code;
|
|
||||||
} rule_t2;
|
|
||||||
|
|
||||||
string definition_section_code_buffer_str;
|
string definition_section_code_buffer_str;
|
||||||
string code_section_code_buffer_str;
|
string code_section_code_buffer_str;
|
||||||
|
|
||||||
map<string, vector<rule_t2>> rules;
|
static map<string, vector<rule_t2>> rules_map;
|
||||||
map<string, vector<rule_t2>>::iterator current_state;
|
static map<string, vector<rule_t2>>::iterator current_state;
|
||||||
string patter_buffer;
|
static string patter_buffer;
|
||||||
string code_buffer;
|
static string code_buffer;
|
||||||
|
|
||||||
char * prefix = strdup("yy");
|
static int nest_counter = 0;
|
||||||
table_t table_type = STATIC_TABLE;
|
|
||||||
bool do_setup_lineno = false;
|
|
||||||
|
|
||||||
int nest_counter = 0;
|
static int source_state;
|
||||||
|
static string * source_buffer;
|
||||||
%}
|
%}
|
||||||
%x IN_DEFINITION_SECTION IN_RULE_SECTION IN_CODE_SECTION
|
%x IN_DEFINITION_SECTION IN_RULE_SECTION IN_CODE_SECTION
|
||||||
%x IN_DEFINITION_SECTION_CODE
|
%x IN_DEFINITION_SECTION_CODE
|
||||||
@ -68,6 +71,7 @@ value \"[-a-z]+\"
|
|||||||
%option yylineno
|
%option yylineno
|
||||||
%option nodefault
|
%option nodefault
|
||||||
%option noyywrap
|
%option noyywrap
|
||||||
|
%option nounput
|
||||||
%%
|
%%
|
||||||
BEGIN IN_DEFINITION_SECTION;
|
BEGIN IN_DEFINITION_SECTION;
|
||||||
|
|
||||||
@ -76,10 +80,6 @@ value \"[-a-z]+\"
|
|||||||
BEGIN IN_RULE_SECTION;
|
BEGIN IN_RULE_SECTION;
|
||||||
}
|
}
|
||||||
^\%\{ {
|
^\%\{ {
|
||||||
if (definition_section_code_buffer_str != "") {
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
BEGIN IN_DEFINITION_SECTION_CODE;
|
BEGIN IN_DEFINITION_SECTION_CODE;
|
||||||
}
|
}
|
||||||
\%x {
|
\%x {
|
||||||
@ -88,6 +88,12 @@ value \"[-a-z]+\"
|
|||||||
\%option {
|
\%option {
|
||||||
BEGIN IN_OPTION_LIST;
|
BEGIN IN_OPTION_LIST;
|
||||||
}
|
}
|
||||||
|
\/\* {
|
||||||
|
definition_section_code_buffer_str += yytext;
|
||||||
|
source_state = IN_DEFINITION_SECTION;
|
||||||
|
source_buffer = &definition_section_code_buffer_str;
|
||||||
|
BEGIN IN_MULTILINE_COMMENT;
|
||||||
|
}
|
||||||
. {
|
. {
|
||||||
yyerror("baaaa");
|
yyerror("baaaa");
|
||||||
}
|
}
|
||||||
@ -96,7 +102,7 @@ value \"[-a-z]+\"
|
|||||||
|
|
||||||
<IN_RULE_LIST>{
|
<IN_RULE_LIST>{
|
||||||
{rule_name} {
|
{rule_name} {
|
||||||
rules[yytext] = {};
|
rules_map[yytext] = {};
|
||||||
}
|
}
|
||||||
{ws}* { ; }
|
{ws}* { ; }
|
||||||
\n {
|
\n {
|
||||||
@ -157,8 +163,8 @@ prefix={value} {
|
|||||||
}
|
}
|
||||||
\<{rule_name}\>\{ {
|
\<{rule_name}\>\{ {
|
||||||
string state_name(yytext+1, yyleng-3);
|
string state_name(yytext+1, yyleng-3);
|
||||||
current_state = rules.find(state_name);
|
current_state = rules_map.find(state_name);
|
||||||
if (current_state == rules.end()) {
|
if (current_state == rules_map.end()) {
|
||||||
yyerror("State '%s' was never declared.", state_name.c_str());
|
yyerror("State '%s' was never declared.", state_name.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -168,15 +174,21 @@ prefix={value} {
|
|||||||
BEGIN IN_STATE_DEFINITION;
|
BEGIN IN_STATE_DEFINITION;
|
||||||
}
|
}
|
||||||
. {
|
. {
|
||||||
yyerror("baaa");
|
yyerror("Rule section giberish (temp warning).");
|
||||||
}
|
}
|
||||||
\n { ; }
|
\n { ; }
|
||||||
}
|
}
|
||||||
|
|
||||||
<IN_STATE_DEFINITION>{
|
<IN_STATE_DEFINITION>{
|
||||||
|
\} {
|
||||||
|
BEGIN IN_RULE_SECTION;
|
||||||
|
}
|
||||||
. {
|
. {
|
||||||
patter_buffer += yytext;
|
patter_buffer += yytext;
|
||||||
}
|
}
|
||||||
|
\\. {
|
||||||
|
patter_buffer += yytext + 1;
|
||||||
|
}
|
||||||
{wsnl}+\{ {
|
{wsnl}+\{ {
|
||||||
BEGIN IN_CODE;
|
BEGIN IN_CODE;
|
||||||
nest_counter = 0;
|
nest_counter = 0;
|
||||||
@ -186,6 +198,7 @@ prefix={value} {
|
|||||||
|
|
||||||
<IN_CODE>{
|
<IN_CODE>{
|
||||||
\{ {
|
\{ {
|
||||||
|
code_buffer += yytext;
|
||||||
++nest_counter;
|
++nest_counter;
|
||||||
}
|
}
|
||||||
\} {
|
\} {
|
||||||
@ -196,16 +209,26 @@ prefix={value} {
|
|||||||
.code = strdup(code_buffer.c_str()),
|
.code = strdup(code_buffer.c_str()),
|
||||||
});
|
});
|
||||||
|
|
||||||
BEGIN IN_RULE_SECTION;
|
patter_buffer = "";
|
||||||
|
code_buffer = "";
|
||||||
|
|
||||||
|
BEGIN IN_STATE_DEFINITION;
|
||||||
|
} else {
|
||||||
|
code_buffer += yytext;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
\" {
|
\" {
|
||||||
|
code_buffer += yytext;
|
||||||
BEGIN IN_STRING;
|
BEGIN IN_STRING;
|
||||||
}
|
}
|
||||||
\/\/ {
|
\/\/ {
|
||||||
|
code_buffer += yytext;
|
||||||
BEGIN IN_COMMENT;
|
BEGIN IN_COMMENT;
|
||||||
}
|
}
|
||||||
\/\* {
|
\/\* {
|
||||||
|
code_buffer += yytext;
|
||||||
|
source_state = IN_CODE;
|
||||||
|
source_buffer = &code_buffer;
|
||||||
BEGIN IN_MULTILINE_COMMENT;
|
BEGIN IN_MULTILINE_COMMENT;
|
||||||
}
|
}
|
||||||
.|\n {
|
.|\n {
|
||||||
@ -214,20 +237,35 @@ prefix={value} {
|
|||||||
}
|
}
|
||||||
|
|
||||||
<IN_STRING>{
|
<IN_STRING>{
|
||||||
\\\\ { ; }
|
\" {
|
||||||
\\\" { ; }
|
code_buffer += yytext;
|
||||||
\" { BEGIN IN_CODE; }
|
BEGIN IN_CODE;
|
||||||
.|\n { ; } /* XXX we are eating strings */
|
}
|
||||||
|
\\\\ |
|
||||||
|
\\\" |
|
||||||
|
.|\n {
|
||||||
|
code_buffer += yytext;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
<IN_COMMENT>{
|
<IN_COMMENT>{
|
||||||
. { ; }
|
. {
|
||||||
\n { BEGIN IN_CODE; }
|
code_buffer += yytext;
|
||||||
|
}
|
||||||
|
\n {
|
||||||
|
code_buffer += yytext;
|
||||||
|
BEGIN IN_CODE;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
<IN_MULTILINE_COMMENT>{
|
<IN_MULTILINE_COMMENT>{
|
||||||
.|\n { ; }
|
.|\n {
|
||||||
\*\/ { BEGIN IN_CODE; }
|
*source_buffer += yytext;
|
||||||
|
}
|
||||||
|
\*\/ {
|
||||||
|
*source_buffer += yytext;
|
||||||
|
BEGIN source_state;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
<IN_CODE_SECTION>{
|
<IN_CODE_SECTION>{
|
||||||
@ -238,12 +276,13 @@ prefix={value} {
|
|||||||
|
|
||||||
%%
|
%%
|
||||||
|
|
||||||
|
#if DEBUG == 1
|
||||||
static
|
static
|
||||||
void dump_parse_results(void) {
|
void dump_parse_results(void) {
|
||||||
puts(definition_section_code_buffer_str.c_str());
|
puts(definition_section_code_buffer_str.c_str());
|
||||||
puts("----------");
|
puts("----------");
|
||||||
|
|
||||||
for (const auto &i : rules) {
|
for (const auto &i : rules_map) {
|
||||||
printf("%s:\n", i.first.c_str());
|
printf("%s:\n", i.first.c_str());
|
||||||
for (const auto &h : i.second) {
|
for (const auto &h : i.second) {
|
||||||
printf("\tpattern:\n%s\n" "\tcode:\n%s\n", h.pattern, h.code);
|
printf("\tpattern:\n%s\n" "\tcode:\n%s\n", h.pattern, h.code);
|
||||||
@ -255,35 +294,75 @@ void dump_parse_results(void) {
|
|||||||
puts(code_section_code_buffer_str.c_str());
|
puts(code_section_code_buffer_str.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
void dump_rules(void) {
|
||||||
|
for (rule_t * rule = rules; rule->pattern != NULL; rule++) {
|
||||||
|
printf("{ .state = %d, .pattern = %s, }\n",
|
||||||
|
rule->state,
|
||||||
|
rule->pattern
|
||||||
|
);
|
||||||
|
}
|
||||||
|
puts("{ .state = 0, .pattern = NULL, }");
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static inline void dump_parse_results(void) { ; }
|
||||||
|
static inline void dump_rules(void) { ; }
|
||||||
|
#endif
|
||||||
|
|
||||||
extern "C"
|
extern "C"
|
||||||
int parse(const char * filename) {
|
int parse(const char * filename) {
|
||||||
|
// Init
|
||||||
int r = 0;
|
int r = 0;
|
||||||
FILE * f = fopen(filename, "r");
|
FILE * f = fopen(filename, "r");
|
||||||
if (!f) { return 2; }
|
if (!f) { return 2; }
|
||||||
|
|
||||||
yyin = f;
|
yyin = f;
|
||||||
|
|
||||||
|
// Parse
|
||||||
r = yylex();
|
r = yylex();
|
||||||
if (r) { return r; }
|
if (r) { return r; }
|
||||||
|
|
||||||
dump_parse_results();
|
// Set up globals
|
||||||
|
n_rules = 0;
|
||||||
|
for (const auto &rule_it : rules_map) {
|
||||||
|
n_rules += rule_it.second.size();
|
||||||
|
}
|
||||||
|
|
||||||
patterns = (rule_t*)malloc(sizeof(rule_t)*(rules.size()+1));
|
rules = (rule_t*)malloc(sizeof(rule_t)*(n_rules+1));
|
||||||
|
rules[n_rules] = (rule_t) { 0, NULL, NULL };
|
||||||
|
|
||||||
int i = 0;
|
int index = 0;
|
||||||
for (const auto &rule_it : rules) {
|
int state = 0;
|
||||||
|
for (const auto &rule_it : rules_map) {
|
||||||
for (const auto &rule : rule_it.second) {
|
for (const auto &rule : rule_it.second) {
|
||||||
patterns[i++] = (rule_t) {
|
rules[index++] = (rule_t) {
|
||||||
.state = i,
|
.state = state,
|
||||||
.pattern = rule.pattern,
|
.pattern = rule.pattern,
|
||||||
.code = rule.code,
|
.code = rule.code,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
++state;
|
||||||
|
}
|
||||||
|
|
||||||
|
n_states = rules_map.size();
|
||||||
|
state_names = (char**)malloc(sizeof(char*) * n_states);
|
||||||
|
int i = 0;
|
||||||
|
for (const auto &r : rules_map) {
|
||||||
|
state_names[i++] = strdup(r.first.c_str());
|
||||||
}
|
}
|
||||||
patterns[rules.size()] = (rule_t) { 0, NULL, NULL };
|
|
||||||
|
|
||||||
definition_section_code_buffer = strdup(definition_section_code_buffer_str.c_str());
|
definition_section_code_buffer = strdup(definition_section_code_buffer_str.c_str());
|
||||||
code_section_code_buffer = strdup(code_section_code_buffer_str.c_str());
|
code_section_code_buffer = strdup(code_section_code_buffer_str.c_str());
|
||||||
|
|
||||||
|
// Debug
|
||||||
|
dump_parse_results();
|
||||||
|
dump_rules();
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern "C"
|
||||||
|
int deinit_parser(void) {
|
||||||
|
yylex_destroy();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
@ -16,5 +16,8 @@ signed main(const int argc, char * argv[]) {
|
|||||||
parse(argv[1]);
|
parse(argv[1]);
|
||||||
generate("jeger.yy.c");
|
generate("jeger.yy.c");
|
||||||
|
|
||||||
|
deinit_parser();
|
||||||
|
deinit_jeger();
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -2,5 +2,6 @@
|
|||||||
#define PARSE_H
|
#define PARSE_H
|
||||||
|
|
||||||
extern int parse(const char * filename);
|
extern int parse(const char * filename);
|
||||||
|
extern int deinit_parser(void);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -20,7 +20,11 @@ int mlookup(const char * s, int state) {\n\
|
|||||||
const char * yy_lex_str_start = "\n\
|
const char * yy_lex_str_start = "\n\
|
||||||
int yylex(const char * s) {\n\
|
int yylex(const char * s) {\n\
|
||||||
int state = 0;\n\
|
int state = 0;\n\
|
||||||
for (const char * ss = s; *ss != '\\0'; ss += (mlen ? mlen : 1)) {\n\
|
for (\n\
|
||||||
|
const char * ss = s;\n\
|
||||||
|
*ss != '\\0';\n\
|
||||||
|
ss += ((mlen ? mlen : 1) * direction)\n\
|
||||||
|
) {\n\
|
||||||
int match = mlookup(ss, state_table[state]);\n\
|
int match = mlookup(ss, state_table[state]);\n\
|
||||||
if (match != NO_MATCH) {\n\
|
if (match != NO_MATCH) {\n\
|
||||||
\n\
|
\n\
|
||||||
|
@ -1,17 +1,19 @@
|
|||||||
/* @BAKE
|
/* @BAKE
|
||||||
jeger --debug --trace -o $*.c $@
|
#jeger --debug --trace -o $*.c $@
|
||||||
|
jeger $@
|
||||||
gcc -o $* $*.c -ggdb
|
gcc -o $* $*.c -ggdb
|
||||||
@STOP
|
@STOP
|
||||||
*/
|
*/
|
||||||
%{
|
%{
|
||||||
|
#include <stdio.h>
|
||||||
char data[30000];
|
char data[30000];
|
||||||
char * data_ptr = data;
|
char * data_ptr = data;
|
||||||
%}
|
%}
|
||||||
|
|
||||||
%x IN_SKIP_FORWARD IN_SKIP_BACKWARD
|
%x INITIAL IN_SKIP_FORWARD IN_SKIP_BACKWARD
|
||||||
|
|
||||||
%option noyywrap nodefault
|
|
||||||
%%
|
%%
|
||||||
|
|
||||||
|
<INITIAL>{
|
||||||
\> { ++data_ptr; }
|
\> { ++data_ptr; }
|
||||||
\< { --data_ptr; }
|
\< { --data_ptr; }
|
||||||
\+ { ++(*data_ptr); }
|
\+ { ++(*data_ptr); }
|
||||||
@ -29,7 +31,7 @@
|
|||||||
BEGIN IN_SKIP_BACKWARD;
|
BEGIN IN_SKIP_BACKWARD;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
.|\n { ; }
|
}
|
||||||
|
|
||||||
<IN_SKIP_FORWARD>{
|
<IN_SKIP_FORWARD>{
|
||||||
\] { BEGIN INITIAL; }
|
\] { BEGIN INITIAL; }
|
||||||
@ -39,7 +41,7 @@
|
|||||||
\[ { REVERSE; BEGIN INITIAL; }
|
\[ { REVERSE; BEGIN INITIAL; }
|
||||||
}
|
}
|
||||||
|
|
||||||
<IN_SKIP_FORWARD,IN_SKIP_BACKWARD>{
|
<INITIAL,IN_SKIP_FORWARD,IN_SKIP_BACKWARD>{
|
||||||
.|\n { ; }
|
.|\n { ; }
|
||||||
}
|
}
|
||||||
%%
|
%%
|
||||||
@ -50,13 +52,19 @@ signed main(int argc, char * argv[]) {
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
yyin = fopen(argv[1], "r");
|
FILE * yyin = fopen(argv[1], "r");
|
||||||
if (!yyin) {
|
if (!yyin) { return 2; }
|
||||||
return 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
yylex();
|
fseek(yyin, 0, SEEK_END);
|
||||||
yylex_destroy();
|
int yylen = ftell(yyin);
|
||||||
|
rewind(yyin);
|
||||||
|
char yystr[yylen+1];
|
||||||
|
yystr[yylen] = '\00';
|
||||||
|
fread(yystr, yylen, sizeof(char), yyin);
|
||||||
|
|
||||||
|
yylex(yystr);
|
||||||
|
|
||||||
|
fclose(yyin);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user