225 lines
6.2 KiB
C
225 lines
6.2 KiB
C
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
|
|
#include "util.h"
|
|
#include "jeger.h"
|
|
#include "snippets.inc"
|
|
|
|
// XXX
|
|
//#define AS_SYMBOL(c) (c-'a')
|
|
#define AS_SYMBOL(c) ((int)c)
|
|
#define TOKEN_OFFSET 128 /* XXX */
|
|
// ---
|
|
|
|
rule_t * rules;
|
|
int n_rules = 0;
|
|
char * * state_names;
|
|
int n_states = 0;
|
|
int alphabet_size = 128;
|
|
|
|
char * definition_section_code_buffer;
|
|
char * code_section_code_buffer;
|
|
|
|
|
|
static inline
|
|
void put_header(FILE * f, const int alphabet_size, const int no_match) {
|
|
#define DEFINE_INT(m, n) fprintf(f, "#define " #m " %d\n", n);
|
|
#define DEFINE_STR(m, s) fprintf(f, "#define " #m " %s\n", s);
|
|
|
|
DEFINE_INT(ALPHABET_SIZE, alphabet_size);
|
|
DEFINE_INT(N_RULES, n_rules);
|
|
DEFINE_INT(NO_MATCH, no_match);
|
|
DEFINE_STR(BEGIN, "state = ");
|
|
DEFINE_STR(REVERSE, "(direction *= -1)");
|
|
fputs("#define AS_SYMBOL(c) c\n", /* (c-'a')\n */ f);
|
|
|
|
// XXX make this conditional
|
|
DEFINE_STR(TRACE, "fprintf(stderr, \"--accepting rule at line %d (\"%.*s\")\\n\", __LINE__, mlen, ss);");
|
|
DEFINE_STR(TRACE_DEFAULT, "fprintf(stderr, \"--accepting default rule (\"%c\")\\n\", *ss);");
|
|
// DEFINE_STR(TRACE, "");
|
|
// DEFINE_STR(TRACE_DEFAULT, "");
|
|
|
|
// XXX we want no globals
|
|
fputs("int mlen;\n", f);
|
|
fputs("int direction = 1;\n", f);
|
|
|
|
fputs("\n", f);
|
|
}
|
|
|
|
static inline
|
|
void put_table(FILE * f, const int * table, char * * prefixes, int n_states, int alphabet_size) {
|
|
fputs("int table[N_RULES][ALPHABET_SIZE] = {\n", f);
|
|
for (int i = 0; i < n_rules; i++) {
|
|
fprintf(f, "\t[%d] = {", i);
|
|
for (int h = 0; h < alphabet_size; h++) {
|
|
/* NOTE: we have to awkwardly escate "\" and "'",
|
|
* then also print printable characters as themselves
|
|
*/
|
|
if (h == '\\') {
|
|
fprintf(f, "['\\\\'] = %d, ", table[i*alphabet_size + h]);
|
|
} else
|
|
if (h == '\'') {
|
|
fprintf(f, "['\\''] = %d, ", table[i*alphabet_size + h]);
|
|
} else
|
|
if (isprint(h)) {
|
|
fprintf(f, "['%c'] = %d, ", h, table[i*alphabet_size + h]);
|
|
} else {
|
|
fprintf(f, "[%d] = %d, ", h, table[i*alphabet_size + h]);
|
|
}
|
|
}
|
|
fprintf(f, "}, /* \"%s\" */\n", prefixes[i]); // XXX can break
|
|
}
|
|
fputs("};\n", f);
|
|
}
|
|
|
|
static
|
|
void put_state_table(FILE * f, int * states) {
|
|
// XXX do i even need this table?
|
|
fprintf(f, "int state_table[%d] = {\n", n_states);
|
|
for (int i = 0; i < n_states; i++) {
|
|
if (states[i] == -1) { break; } // XXX
|
|
fprintf(f, "\t[%d] = %d,\n", i, states[i]);
|
|
}
|
|
fputs("};\n\n", f);
|
|
|
|
for (int i = 0; i < n_states; i++) {
|
|
fprintf(
|
|
f,
|
|
"#define %s %d\n",
|
|
state_names[i],
|
|
states[i]
|
|
);
|
|
}
|
|
|
|
fputs("\n", f);
|
|
}
|
|
|
|
static
|
|
int get_most_common_prefix(const char * pattern, char * * prefixes, int current_state_start) {
|
|
int r = current_state_start;
|
|
for (int i = current_state_start; prefixes[i] != NULL; i++) {
|
|
if (!strncmp(pattern, prefixes[i], strlen(prefixes[i]))) {
|
|
r = i;
|
|
}
|
|
}
|
|
return r;
|
|
}
|
|
|
|
static
|
|
void make_and_put_table(FILE * f) {
|
|
// Init
|
|
int states[n_states];
|
|
INITIALIZE_ARRAY(states, n_states, -1);
|
|
states[0] = 0;
|
|
|
|
char * prefixes[n_rules];
|
|
INITIALIZE_ARRAY(prefixes, n_rules, NULL);
|
|
|
|
int table[n_rules][alphabet_size];
|
|
INITIALIZE_MATRIX(table, n_rules, alphabet_size, TOKEN_OFFSET);
|
|
|
|
// Construct table
|
|
int next_free_slot = 1;
|
|
for (
|
|
int rule_index = 0;
|
|
rules[rule_index].pattern != NULL;
|
|
rule_index++
|
|
) {
|
|
const rule_t * rule = &rules[rule_index];
|
|
|
|
int current_state_start = states[rule->state];
|
|
if (current_state_start == -1) {
|
|
current_state_start = next_free_slot;
|
|
states[rule->state] = next_free_slot;
|
|
++next_free_slot;
|
|
}
|
|
|
|
int most_common_prefix_state = get_most_common_prefix(
|
|
rule->pattern,
|
|
prefixes,
|
|
current_state_start
|
|
);
|
|
|
|
prefixes[current_state_start] = strdup("");
|
|
|
|
int most_common_prefix_index = strlen(prefixes[most_common_prefix_state]);
|
|
const char * last_char = rule->pattern + most_common_prefix_index;
|
|
|
|
table
|
|
[most_common_prefix_state]
|
|
[AS_SYMBOL(rule->pattern[most_common_prefix_index])]
|
|
= next_free_slot
|
|
;
|
|
|
|
for (
|
|
int i = most_common_prefix_index+1;
|
|
rule->pattern[i] != '\0';
|
|
i++, next_free_slot++
|
|
) {
|
|
table
|
|
[next_free_slot]
|
|
[AS_SYMBOL(rule->pattern[i])]
|
|
= next_free_slot + 1
|
|
;
|
|
prefixes[next_free_slot] = strndup(rule->pattern, i);
|
|
last_char = rule->pattern + i;
|
|
}
|
|
|
|
int last_position = (last_char == rule->pattern
|
|
|| most_common_prefix_index == last_char - rule->pattern)
|
|
? most_common_prefix_state
|
|
: next_free_slot-1
|
|
;
|
|
|
|
table
|
|
[last_position]
|
|
[AS_SYMBOL(*last_char)]
|
|
= TOKEN_OFFSET+1 + rule_index
|
|
;
|
|
|
|
put_table(stderr, (int*)table, prefixes, n_rules, alphabet_size);
|
|
fputs("/* ================== */\n", stderr);
|
|
}
|
|
|
|
// Output
|
|
put_table(f, (int*)table, prefixes, n_rules, alphabet_size);
|
|
put_state_table(f, states);
|
|
}
|
|
|
|
static
|
|
void put_functions(FILE * f) {
|
|
fputs(yy_lookup_str, f);
|
|
|
|
fputs(yy_lex_str_start, f);
|
|
for (rule_t * rule = rules; rule->code != NULL; rule++) {
|
|
fprintf(f, "\tcase %ld: {\n" "%s\n" "\t} break;\n", rule - rules, rule->code);
|
|
}
|
|
fputs(yy_lex_str_end, f);
|
|
}
|
|
|
|
void deinit_jeger(void) {
|
|
for (int i = 0; i < n_states; i++) {
|
|
free(state_names[i]);
|
|
}
|
|
for (int i = 0; i < n_rules; i++) {
|
|
free(rules[i].pattern);
|
|
free(rules[i].code);
|
|
}
|
|
|
|
n_rules = 0;
|
|
n_states = 0;
|
|
}
|
|
|
|
void generate(const char * filename) {
|
|
FILE * f = fopen(filename, "w");
|
|
|
|
put_header(f, alphabet_size, TOKEN_OFFSET);
|
|
make_and_put_table(f);
|
|
|
|
fputs(definition_section_code_buffer, f);
|
|
put_functions(f);
|
|
fputs(code_section_code_buffer, f);
|
|
}
|