dumpster fire

This commit is contained in:
anon 2024-07-19 04:10:50 +02:00
parent bec336d276
commit 1b9acda87b
8 changed files with 152 additions and 57 deletions

View File

@ -13,6 +13,16 @@ The C source contains definitions which
store the abstract state and or
required to construct it
`eaxhla.l` is strictly limited to raising
errors which sabotage scanning.
(Unterminated/partial tokens.)
`eaxhla.y` is strictly limited to raising
syntax errors.
`eaxhla.c` is strictly limited to raising
logical errors
## compile.c|h
Responsible for transforming the abstract state
of eaxhla.c to something that can be understood

View File

@ -52,12 +52,43 @@ int eaxhla_deinit(void) {
}
static
int table_compare_unsigned(const void * arg, const void * obj) {
return *(const unsigned *) arg != ((const symbol_t*)obj)->_hash;
}
static
void * symbol_lookup(const char * const name) {
unsigned lookup_hash = tommy_strhash_u32(0, name);
void * r = tommy_hashtable_search(&symbol_table,
table_compare_unsigned,
&lookup_hash,
lookup_hash
);
return r;
}
static
void symbol_insert(symbol_t * symbol) {
symbol->_hash = tommy_strhash_u32(0, symbol->name);
tommy_hashtable_insert(&symbol_table,
&symbol->_node,
symbol,
symbol->_hash
);
}
void add_program(const char * const name) {
if (is_program_found) {
issue_error("only 1 entry point is allowed and a program block was already found");
}
is_program_found = 1;
append_instructions(ASMDIRMEM, 0);
scope = strdup(name);
}
void add_variable(symbol_t variable) {
if (get_variable(variable.name)) {
issue_error("symbol '%s' redeclared as new variable", variable.name);
@ -70,16 +101,11 @@ void add_variable(symbol_t variable) {
symbol_t * heap_variable = malloc(sizeof(variable));
memcpy(heap_variable, &variable, sizeof(variable));
heap_variable->_hash = tommy_strhash_u32(0, heap_variable->name);
tommy_hashtable_insert(&symbol_table,
&heap_variable->_node,
heap_variable,
heap_variable->_hash
);
symbol_insert(heap_variable);
}
void add_procedure(symbol_t procedure) {
if (get_function(procedure.name)) {
if (get_symbol(procedure.name)) {
issue_error("symbol '%s' redeclared as new function", procedure.name);
return;
}
@ -90,16 +116,21 @@ void add_procedure(symbol_t procedure) {
symbol_t * heap_procedure = malloc(sizeof(procedure));
memcpy(heap_procedure, &procedure, sizeof(procedure));
heap_procedure->_hash = tommy_strhash_u32(0, heap_procedure->name);
tommy_hashtable_insert(&symbol_table,
&heap_procedure->_node,
heap_procedure,
heap_procedure->_hash
);
symbol_insert(heap_procedure);
//
append_instructions(ASMDIRMEM, procedure._id);
}
void add_fastcall(const char * const destination) {
symbol_t * function = get_function(destination);
if (!function) {
issue_error("can't fastcall '%s', no such known symbol", destination);
return;
}
append_instructions(CALL, REL, function->_id);
}
/* Are these literals ugly? yes.
* However it would be much more painful to calculate the values inline.
*/
@ -195,9 +226,9 @@ int validate_array_size(const int size) {
return 0;
}
char * make_scoped_name(const char * const scope, char * name) {
char * make_scoped_name(const char * const scope, const char * const name) {
if (!scope) {
return name;
return (char*)name;
}
char * r;
@ -210,23 +241,29 @@ char * make_scoped_name(const char * const scope, char * name) {
r[2 + scl] = '_';
memcpy(r + 2 + scl + 1, name, nml);
r[2 + scl + 1 + nml] = '\0';
free(name);
return r;
}
static
void * symbol_lookup(const char * const name) {
unsigned lookup_hash = tommy_strhash_u32(0, name);
void * r = tommy_hashtable_search(&symbol_table,
table_compare_unsigned,
&lookup_hash,
lookup_hash
);
symbol_t * get_symbol(const char * const name) {
symbol_t * r;
r = symbol_lookup(name);
if (r) {
return r;
}
char * alternative_name = make_scoped_name(scope, name);
r = symbol_lookup(alternative_name);
free(alternative_name);
return r;
}
symbol_t * get_variable(const char * const name) {
symbol_t *r = symbol_lookup(name);
symbol_t * r;
char * varname = make_scoped_name(scope, name);
r = symbol_lookup(varname);
if (r
&& r->symbol_type != VARIABLE) {
issue_error("the symbol '%s' is not a variable", name);
@ -236,7 +273,8 @@ symbol_t * get_variable(const char * const name) {
}
symbol_t * get_function(const char * const name) {
symbol_t * r = symbol_lookup(name);
symbol_t * r;
r = symbol_lookup(name);
if (r
&& r->symbol_type != FUNCTION) {
issue_error("the symbol '%s' is not a function", name);

View File

@ -54,16 +54,20 @@ extern char * yyfilename;
extern int eaxhla_init(void);
extern int eaxhla_deinit(void);
extern char * make_scoped_name(const char * const scope, char * name);
extern char * make_scoped_name(const char * const scope, const char * const name);
extern int can_fit(const int type, const long long value);
extern int validate_array_size(const int size);
extern void add_variable(symbol_t variable);
extern symbol_t * get_variable(const char * const name);
extern symbol_t * get_symbol(const char * const name);
//extern void add_function(symbol_t function);
extern void add_procedure(symbol_t procedure);
extern symbol_t * get_function(const char * const name);
extern void add_program(const char * const name);
extern void add_fastcall(const char * const destination);
extern int type2size(int type);
extern int size2bytes(const int size);

View File

@ -162,6 +162,7 @@ hlt { return ITHLT; }
idiv { return ITIDIV; }
imul { return ITIMUL; }
inc { return ITINC; }
jmp { return ITJMP; }
leave { return ITLEAVE; }
lock { return ITLOCK; }
mov { return ITMOV; }
@ -201,7 +202,6 @@ library { BEGIN INITIAL; return END_LIBRARY; }
}
<IN_STRING>{
/* XXX: multiline strings will die */
/* XXX: the first WORD_SIZE_IN_BYTES bytes should be 0'd */
/* XXX: i wanted short strings to be literals;
this however clashes with with the sanity of machine blocks;
@ -289,7 +289,11 @@ library { BEGIN INITIAL; return END_LIBRARY; }
}
{identifier} { yylval.strval = strdup(yytext); return IDENTIFIER; }
{identifier}: { yylval.strval = strdup(yytext); return LABEL; }
{identifier}: {
yytext[yyleng-1] = '\0';
yylval.strval = strdup(yytext);
return LABEL;
}
. { issue_error("unknown symbol \033[1m'%c'\033[0m", yytext[0]); }

View File

@ -48,7 +48,7 @@
%token<strval> IDENTIFIER LABEL
%type<argval> immediate
%type<intval> memory dereference
%type<intval> memory dereference relative
%type<intval> artimetric_block artimetric_expression artimetric_operand
%type<intval> value
%token<intval> LITERAL
@ -86,7 +86,7 @@
// Instructions
%token INOP
// #placeholder<instruction_token_list> BEGIN
%token ITADC ITADD ITAND ITCMP ITDEC ITDIV ITHLT ITIDIV ITIMUL ITINC ITLEAVE ITLOCK ITMOV ITMUL ITNEG ITNOT ITOR ITPAUSE ITRETF ITRETN ITSAR ITSBB ITSUB ITSYSCALL ITSYSENTER ITSYSEXIT ITSYSRET ITXOR
%token ITADC ITADD ITAND ITCMP ITDEC ITDIV ITHLT ITIDIV ITIMUL ITINC ITJMP ITLEAVE ITLOCK ITMOV ITMUL ITNEG ITNOT ITOR ITPAUSE ITRETF ITRETN ITSAR ITSBB ITSUB ITSYSCALL ITSYSENTER ITSYSEXIT ITSYSRET ITXOR
// #placeholder<instruction_token_list> END
// Instruction-likes
@ -107,13 +107,8 @@ program: program_head declaration_section MYBEGIN code END_PROGRAM {
;
program_head: program_specifier PROGRAM IDENTIFIER {
if (is_program_found) {
issue_error("only 1 entry point is allowed and a program block was already found");
YYERROR;
}
is_program_found = 1;
append_instructions(ASMDIRMEM, 0);
scope = $3; // !!! IF WE START USING THE REFERENCE OF $3 THIS WILL DOUBLE FREE
add_program($3);
free($3);
};
program_specifier: %empty
@ -133,6 +128,7 @@ function: function_head declaration_section MYBEGIN code END_PROCEDURE {
function_head: function_specifier PROCEDURE IDENTIFIER {
scope = strdup($3);
symbol_t procedure;
procedure.name = $3;
add_procedure(procedure);
@ -154,6 +150,8 @@ declaration:
$$.name = make_scoped_name(scope, $3);
$$.elements = 1;
add_variable($$);
free($3);
}
| variable_specifier type IDENTIFIER '=' LITERAL {
$$.type = $2;
@ -164,6 +162,8 @@ declaration:
$$.elements = 1;
$$.value = $5;
add_variable($$);
free($3);
}
| variable_specifier type '<' value '>' IDENTIFIER {
$$.type = $2;
@ -173,6 +173,8 @@ declaration:
$$.name = make_scoped_name(scope, $6);
$$.elements = $4;
add_variable($$);
free($6);
}
| variable_specifier type '<' value '>' IDENTIFIER '=' ARRAY_LITERAL {
$$.type = $2;
@ -186,6 +188,8 @@ declaration:
$$.elements = $4;
$$.array_value = $8.data;
add_variable($$);
free($6);
}
| variable_specifier type '<' '>' IDENTIFIER '=' ARRAY_LITERAL {
$$.type = $2;
@ -193,6 +197,8 @@ declaration:
$$.elements = $7.len;
$$.array_value = $7.data;
add_variable($$);
free($5);
}
;
@ -210,13 +216,16 @@ type: S8 { $$ = S8; }
| U64 { $$ = U64; }
;
immediate: LITERAL { $$.type = IMM; $$.value = $1; }
immediate: LITERAL {
$$.type = IMM;
$$.value = $1;
}
| IDENTIFIER {
char * varname = make_scoped_name(scope, $1);
symbol_t * variable = get_variable(varname);
symbol_t * variable = get_variable($1);
$$.type = REL;
$$.value = variable->_id;
free(varname);
free($1);
}
;
@ -229,13 +238,19 @@ dereference: '[' IDENTIFIER ']' { $$ = 0; /* XXX: how the fuck do i dereference?
| '[' IDENTIFIER '-' value ']' { $$ = 0; /* XXX: how the fuck do i dereference? */ }
;
relative: IDENTIFIER {
symbol_t * relative = get_symbol($1);
breakpoint();
$$ = relative->_id;
}
;
value: artimetric_block
| LITERAL
| IDENTIFIER {
char * varname = make_scoped_name(scope, $1);
symbol_t * var = get_variable(varname);
symbol_t * var = get_variable($1);
$$ = var->value;
free(var);
free($1);
}
;
@ -253,13 +268,20 @@ code: %empty
| repeat code
| if code
| call code
| LABEL code { /* XXX */ free($1); }
| label code
| machine code
| BREAK code
| exit code
| instruction code
;
label: LABEL {
symbol_t label;
label.name = make_scoped_name(scope, $1);
add_procedure(label);
}
;
repeat: REPEAT code END_REPEAT
| UNTIL logic REPEAT code END_REPEAT
;
@ -305,9 +327,7 @@ machine_code: %empty
;
call: FASTCALL IDENTIFIER arguments {
// XXX
symbol_t * function = get_function($2);
append_instructions(CALL, REL, function->_id);
add_fastcall($2);
free($2);
}
;
@ -443,6 +463,7 @@ instruction: INOP { append_instructions(NOP); }
| ITPAUSE { append_instructions(PAUSE); }
| ITHLT { append_instructions(HLT); }
| ITLOCK { append_instructions(LOCK); }
| ITJMP relative { append_instructions( JMP, D32, REL, 0 ); }
| ITINC register { append_instructions( INC, $2.size, REG, $2.number ); }
| ITDEC register { append_instructions( DEC, $2.size, REG, $2.number ); }
| ITNOT register { append_instructions( NOT, $2.size, REG, $2.number ); }

11
test/label.eax Normal file
View File

@ -0,0 +1,11 @@
program main
u8 <> msg = "heyo\n"
begin
redo:
mov eax 1
mov edi 1
mov esi msg
mov edx 5
syscall
jmp redo
end program

View File

@ -13,7 +13,12 @@ proc make_parser_rules {is} {
dict set r enum "$$n.type"
dict set r value "$$n.value"
# XXX
dict set r size "32"
dict set r size "D32"
}
"relative" {
dict set r enum "REL"
dict set r value "$$n"
dict set r size "D32"
}
"memory" {
dict set r enum "MEM"
@ -25,10 +30,11 @@ proc make_parser_rules {is} {
return $r
}
proc make_parser_rule {i} {
set token_name [string toupper [lindex $i 0]]
if {[llength $i] == 1} {
set rule [format " | IT%s { append_instructions(%s); }" \
[string toupper [lindex $i 0]] \
[string toupper [lindex $i 0]] \
$token_name \
$token_name \
]
} elseif {[llength $i] == 2} {
set arg [init_iarg [lindex $i 1] 2]
@ -38,10 +44,10 @@ proc make_parser_rules {is} {
%s,\
%s\
); \}" \
[string toupper [lindex $i 0]] \
$token_name \
[lindex $i 1] \
\
[string toupper [lindex $i 0]] \
$token_name \
[dict get $arg size] \
[dict get $arg enum] \
[dict get $arg value] \
@ -57,11 +63,11 @@ proc make_parser_rules {is} {
%s,\
%s\
); \}" \
[string toupper [lindex $i 0]] \
$token_name \
[lindex $i 1] \
[lindex $i 2] \
\
[string toupper [lindex $i 0]] \
$token_name \
[dict get $arg1 size] \
[dict get $arg1 enum] \
[dict get $arg1 value] \

View File

@ -9,6 +9,7 @@ set instructions {
{pause}
{hlt}
{lock}
{jmp relative}
{inc register}
{dec register}
{not register}