generating a rought approximation of code
This commit is contained in:
parent
6cdf57f468
commit
c3c3a4edde
5
Makefile
5
Makefile
@ -9,10 +9,11 @@ CPPFLAGS += ${CFLAGS}
|
||||
OUTPUT := jeger
|
||||
|
||||
${OUTPUT}: object/main.o object/generator.o object/jeger.yy.o
|
||||
${LINK.cpp} -o ${OUTPUT} $?
|
||||
${LINK.cpp} -o ${OUTPUT} $^
|
||||
|
||||
test:
|
||||
./${OUTPUT} source/jeger.l
|
||||
./${OUTPUT} test/brainfuck.l 2>&1 | perl -pe "s/(\[.{1,4}\] = 128)/\x1b[90m\1\x1b[0m/g"
|
||||
cat jeger.yy.c
|
||||
|
||||
clean:
|
||||
-rm ${OBJECT.d}/*.o
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
#include "util.h"
|
||||
#include "jeger.h"
|
||||
#include "snippets.inc"
|
||||
|
||||
//#define AS_SYMBOL(c) (c-'a')
|
||||
#define AS_SYMBOL(c) ((int)c)
|
||||
@ -12,27 +13,29 @@
|
||||
int alphabet_size = 128;
|
||||
rule_t * patterns;
|
||||
|
||||
char * definition_section_code_buffer;
|
||||
char * code_section_code_buffer;
|
||||
|
||||
static int n_states = 0;
|
||||
|
||||
static inline
|
||||
void put_header(FILE * f, const int alphabet_size, const int n_states, const int no_match) {
|
||||
fputs(
|
||||
"#define AS_SYMBOL(c) c\n", // (c-'a')\n
|
||||
f
|
||||
);
|
||||
fprintf(
|
||||
f,
|
||||
"#define ALPHABET_SIZE %d\n",
|
||||
alphabet_size
|
||||
);
|
||||
fprintf(
|
||||
f,
|
||||
"#define N_STATES %d\n",
|
||||
n_states
|
||||
);
|
||||
fprintf(
|
||||
f,
|
||||
"#define NO_MATCH %d\n",
|
||||
no_match
|
||||
);
|
||||
#define DEFINE_INT(m, n) fprintf(f, "#define " #m " %d\n", n);
|
||||
#define DEFINE_STR(m, s) fprintf(f, "#define " #m " %s\n", s);
|
||||
|
||||
DEFINE_INT(ALPHABET_SIZE, alphabet_size);
|
||||
DEFINE_INT(N_STATES, n_states);
|
||||
DEFINE_INT(NO_MATCH, no_match);
|
||||
DEFINE_STR(REVERSE, "(direction *= -1)");
|
||||
fputs("#define AS_SYMBOL(c) c\n", /* (c-'a')\n */ f);
|
||||
|
||||
// XXX make this conditional
|
||||
DEFINE_STR(TRACE, "fprintf(stderr, \"--accepting rule at line %d (\"%.*s\")\\n\", __LINE__, mlen, ss);");
|
||||
DEFINE_STR(TRACE_DEFAULT, "fprintf(stderr, \"--accepting default rule (\"%c\")\\n\", *ss);");
|
||||
// DEFINE_STR(TRACE, "");
|
||||
// DEFINE_STR(TRACE_DEFAULT, "");
|
||||
|
||||
fputs("int mlen;\n", f);
|
||||
|
||||
fputs("\n", f);
|
||||
}
|
||||
@ -43,6 +46,9 @@ void put_table(FILE * f, const int * table, char * * prefixes, int n_states, int
|
||||
for (int i = 0; i < n_states; i++) {
|
||||
fprintf(f, "\t[%d] = {", i);
|
||||
for (int h = 0; h < alphabet_size; h++) {
|
||||
/* NOTE: we have to awkwardly escate "\" and "'",
|
||||
* then also print printable characters as themselves
|
||||
*/
|
||||
if (h == '\\') {
|
||||
fprintf(f, "['\\\\'] = %d, ", table[i*alphabet_size + h]);
|
||||
} else
|
||||
@ -60,15 +66,17 @@ void put_table(FILE * f, const int * table, char * * prefixes, int n_states, int
|
||||
fputs("};\n", f);
|
||||
}
|
||||
|
||||
void put_state_table(int * states, int n) {
|
||||
puts("int state_table[] = {");
|
||||
static
|
||||
void put_state_table(FILE * f, int * states, int n) {
|
||||
fprintf(f, "int state_table[%d] = {\n", n);
|
||||
for (int i = 0; i < n; i++) {
|
||||
if (states[i] == -1) { break; }
|
||||
printf("\t[%d] = %d,\n", i, states[i]);
|
||||
fprintf(f, "\t[%d] = %d,\n", i, states[i]);
|
||||
}
|
||||
puts("};");
|
||||
fputs("};\n\n", f);
|
||||
}
|
||||
|
||||
static
|
||||
int get_most_common_prefix(const char * pattern, char * * prefixes, int current_state_start) {
|
||||
int r = current_state_start;
|
||||
for (int i = current_state_start; prefixes[i] != NULL; i++) {
|
||||
@ -79,6 +87,7 @@ int get_most_common_prefix(const char * pattern, char * * prefixes, int current_
|
||||
return r;
|
||||
}
|
||||
|
||||
static
|
||||
int get_max_number_of_states(const rule_t * patterns) {
|
||||
int r = 0;
|
||||
int state_max_accumulator = -1;
|
||||
@ -93,9 +102,10 @@ int get_max_number_of_states(const rule_t * patterns) {
|
||||
return r;
|
||||
}
|
||||
|
||||
void generate(const char * filename) {
|
||||
static
|
||||
void make_and_put_table(FILE * f) {
|
||||
// Init
|
||||
int n_states = get_max_number_of_states(patterns);
|
||||
n_states = get_max_number_of_states(patterns);
|
||||
|
||||
int states[n_states];
|
||||
INITIALIZE_ARRAY(states, n_states, -1);
|
||||
@ -176,7 +186,28 @@ void generate(const char * filename) {
|
||||
n_states = next_free_slot;
|
||||
|
||||
// Output
|
||||
put_header(stdout, alphabet_size, n_states, TOKEN_OFFSET);
|
||||
put_table(stdout, (int*)table, prefixes, n_states, alphabet_size);
|
||||
put_state_table(states, n_states);
|
||||
put_table(f, (int*)table, prefixes, n_states, alphabet_size);
|
||||
put_state_table(f, states, n_states);
|
||||
}
|
||||
|
||||
static
|
||||
void put_functions(FILE * f) {
|
||||
fputs(yy_lookup_str, f);
|
||||
|
||||
fputs(yy_lex_str_start, f);
|
||||
for (rule_t * rule = patterns; rule->code != NULL; rule++) {
|
||||
fprintf(f, "\tcase %ld: {\n" "%s\n" "\t} break;\n", rule - patterns, rule->code);
|
||||
}
|
||||
fputs(yy_lex_str_end, f);
|
||||
}
|
||||
|
||||
void generate(const char * filename) {
|
||||
FILE * f = fopen(filename, "w");
|
||||
|
||||
put_header(f, alphabet_size, n_states, TOKEN_OFFSET);
|
||||
make_and_put_table(f);
|
||||
|
||||
fputs(definition_section_code_buffer, f);
|
||||
put_functions(f);
|
||||
fputs(code_section_code_buffer, f);
|
||||
}
|
||||
|
@ -10,6 +10,9 @@ typedef struct {
|
||||
extern rule_t * patterns;
|
||||
extern int alphabet_size;
|
||||
|
||||
extern char * definition_section_code_buffer;
|
||||
extern char * code_section_code_buffer;
|
||||
|
||||
extern void generate(const char * filename);
|
||||
|
||||
#endif
|
||||
|
@ -40,8 +40,8 @@
|
||||
char * code;
|
||||
} rule_t2;
|
||||
|
||||
string definition_section_code_buffer;
|
||||
string code_section_code_buffer;
|
||||
string definition_section_code_buffer_str;
|
||||
string code_section_code_buffer_str;
|
||||
|
||||
map<string, vector<rule_t2>> rules;
|
||||
map<string, vector<rule_t2>>::iterator current_state;
|
||||
@ -76,7 +76,7 @@ value \"[-a-z]+\"
|
||||
BEGIN IN_RULE_SECTION;
|
||||
}
|
||||
^\%\{ {
|
||||
if (definition_section_code_buffer != "") {
|
||||
if (definition_section_code_buffer_str != "") {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@ -144,7 +144,7 @@ prefix={value} {
|
||||
|
||||
<IN_DEFINITION_SECTION_CODE>{
|
||||
.|\n {
|
||||
definition_section_code_buffer += yytext;
|
||||
definition_section_code_buffer_str += yytext;
|
||||
}
|
||||
^\%\} {
|
||||
BEGIN IN_DEFINITION_SECTION;
|
||||
@ -232,7 +232,7 @@ prefix={value} {
|
||||
|
||||
<IN_CODE_SECTION>{
|
||||
(.|\n)* {
|
||||
code_section_code_buffer += yytext;
|
||||
code_section_code_buffer_str += yytext;
|
||||
}
|
||||
}
|
||||
|
||||
@ -240,7 +240,7 @@ prefix={value} {
|
||||
|
||||
static
|
||||
void dump_parse_results(void) {
|
||||
puts(definition_section_code_buffer.c_str());
|
||||
puts(definition_section_code_buffer_str.c_str());
|
||||
puts("----------");
|
||||
|
||||
for (const auto &i : rules) {
|
||||
@ -252,7 +252,7 @@ void dump_parse_results(void) {
|
||||
}
|
||||
|
||||
puts("----------");
|
||||
puts(code_section_code_buffer.c_str());
|
||||
puts(code_section_code_buffer_str.c_str());
|
||||
}
|
||||
|
||||
extern "C"
|
||||
@ -282,6 +282,8 @@ int parse(const char * filename) {
|
||||
}
|
||||
patterns[rules.size()] = (rule_t) { 0, NULL, NULL };
|
||||
|
||||
definition_section_code_buffer = strdup(definition_section_code_buffer_str.c_str());
|
||||
code_section_code_buffer = strdup(code_section_code_buffer_str.c_str());
|
||||
|
||||
return r;
|
||||
}
|
||||
|
@ -1,123 +0,0 @@
|
||||
// @BAKE gcc -o $*.out $@ -ggdb
|
||||
#include <stdio.h>
|
||||
|
||||
int had_seperation = 1;
|
||||
|
||||
#include "generated.h"
|
||||
|
||||
int mlen;
|
||||
|
||||
static inline
|
||||
int mlookup(const char * s, int state) {
|
||||
for (int i = 0; s[i] != '\0'; i++) {
|
||||
state = table[state][AS_SYMBOL(s[i])];
|
||||
if (state == NO_MATCH) {
|
||||
break;
|
||||
} else
|
||||
if (state > NO_MATCH) {
|
||||
mlen = i+1;
|
||||
return state;
|
||||
}
|
||||
}
|
||||
|
||||
mlen = 0;
|
||||
return NO_MATCH;
|
||||
}
|
||||
|
||||
#define N_KEYWORDS 34
|
||||
#define N_SEPARATORS 10
|
||||
#define RETARDATION_OFFSET (NO_MATCH+N_KEYWORDS+N_SEPARATORS)
|
||||
|
||||
#if 0
|
||||
# define TRACE fprintf(stderr, "--accepting rule at line %d (\"%.*s\")\n", __LINE__, mlen, ss);
|
||||
# define TRACE_DEFAULT fprintf(stderr, "--accepting default rule (\"%c\")\n", *ss);
|
||||
#else
|
||||
# define TRACE
|
||||
# define TRACE_DEFAULT
|
||||
#endif
|
||||
|
||||
int mlex(const char * s) {
|
||||
int state = 0;
|
||||
for (const char * ss = s; *ss != '\0'; ss += (mlen ? mlen : 1)) {
|
||||
int match = mlookup(ss, state_table[state]);
|
||||
if (match != NO_MATCH) {
|
||||
|
||||
} else {
|
||||
|
||||
}
|
||||
switch (match) {
|
||||
case NO_MATCH: {
|
||||
TRACE_DEFAULT;
|
||||
putchar(*ss);
|
||||
had_seperation = 0;
|
||||
} break;
|
||||
// keyword
|
||||
case NO_MATCH+1 ... NO_MATCH+N_KEYWORDS: {
|
||||
TRACE;
|
||||
if (had_seperation) {
|
||||
printf("\033[31m%.*s\033[0m", mlen, ss);
|
||||
} else {
|
||||
printf("%.*s", mlen, ss);
|
||||
}
|
||||
had_seperation = 0;
|
||||
} break;
|
||||
// Sep
|
||||
case NO_MATCH+N_KEYWORDS+1 ... RETARDATION_OFFSET: {
|
||||
TRACE;
|
||||
printf("\033[35m%c\033[0m", *ss);
|
||||
//putchar(*ss);
|
||||
had_seperation = 1;
|
||||
} break;
|
||||
// string
|
||||
case RETARDATION_OFFSET+1: {
|
||||
TRACE;
|
||||
state = 1;
|
||||
printf("\033[32m\"");
|
||||
} break;
|
||||
case RETARDATION_OFFSET+5: {
|
||||
TRACE;
|
||||
state = 0;
|
||||
printf("\"\033[0m");
|
||||
} break;
|
||||
// comment (multiline)
|
||||
case RETARDATION_OFFSET+2: {
|
||||
TRACE;
|
||||
state = 2;
|
||||
printf("\033[34m/*");
|
||||
} break;
|
||||
case RETARDATION_OFFSET+6: {
|
||||
TRACE;
|
||||
state = 0;
|
||||
printf("*/\033[0m");
|
||||
had_seperation = 1;
|
||||
} break;
|
||||
// comment (single line)
|
||||
case RETARDATION_OFFSET+3: {
|
||||
TRACE;
|
||||
state = 3;
|
||||
printf("\033[34m//");
|
||||
} break;
|
||||
case RETARDATION_OFFSET+7: {
|
||||
TRACE;
|
||||
state = 0;
|
||||
printf("\033[0m\n");
|
||||
had_seperation = 1;
|
||||
} break;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef MAIN_GENERATED_USER_MAIN
|
||||
|
||||
extern const char * source_code;
|
||||
#include "c_source_code_str.inc"
|
||||
|
||||
signed main(void) {
|
||||
//mlex("while (1) { printf(\"Heyo\"); }\n");
|
||||
mlex(source_code);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
38
source/snippets.inc
Normal file
38
source/snippets.inc
Normal file
@ -0,0 +1,38 @@
|
||||
const char * yy_lookup_str = "\n\
|
||||
static inline\n\
|
||||
int mlookup(const char * s, int state) {\n\
|
||||
for (int i = 0; s[i] != '\\0'; i++) {\n\
|
||||
state = table[state][AS_SYMBOL(s[i])];\n\
|
||||
if (state == NO_MATCH) {\n\
|
||||
break;\n\
|
||||
} else\n\
|
||||
if (state > NO_MATCH) {\n\
|
||||
mlen = i+1;\n\
|
||||
return state;\n\
|
||||
}\n\
|
||||
}\n\
|
||||
\n\
|
||||
mlen = 0;\n\
|
||||
return NO_MATCH;\n\
|
||||
}\n"
|
||||
;
|
||||
|
||||
const char * yy_lex_str_start = "\n\
|
||||
int yylex(const char * s) {\n\
|
||||
int state = 0;\n\
|
||||
for (const char * ss = s; *ss != '\\0'; ss += (mlen ? mlen : 1)) {\n\
|
||||
int match = mlookup(ss, state_table[state]);\n\
|
||||
if (match != NO_MATCH) {\n\
|
||||
\n\
|
||||
} else {\n\
|
||||
\n\
|
||||
}\n\
|
||||
switch (match) {\n"
|
||||
;
|
||||
|
||||
const char * yy_lex_str_end = "\n\
|
||||
}\n\
|
||||
}\n\
|
||||
return 0;\n\
|
||||
}\n"
|
||||
;
|
@ -25,6 +25,7 @@
|
||||
}
|
||||
\] {
|
||||
if (!*data_ptr) {
|
||||
REVERSE;
|
||||
BEGIN IN_SKIP_BACKWARD;
|
||||
}
|
||||
}
|
||||
@ -35,7 +36,7 @@
|
||||
}
|
||||
|
||||
<IN_SKIP_BACKWARD>{
|
||||
\[ { BEGIN INITIAL; }
|
||||
\[ { REVERSE; BEGIN INITIAL; }
|
||||
}
|
||||
|
||||
<IN_SKIP_FORWARD,IN_SKIP_BACKWARD>{
|
||||
@ -44,7 +45,6 @@
|
||||
%%
|
||||
|
||||
signed main(int argc, char * argv[]) {
|
||||
// XXX: modify this to use a string
|
||||
if (argc != 2) {
|
||||
printf("%s <file>", argv[0]);
|
||||
return 1;
|
||||
|
Loading…
x
Reference in New Issue
Block a user