a bunch of stuff

This commit is contained in:
anon 2024-07-23 23:06:40 +02:00
parent 549937a53a
commit c17348f18d
13 changed files with 210 additions and 154 deletions

@ -10,6 +10,8 @@ endif
"runtime! syntax/c.vim
"unlet b:current_syntax
syn iskeyword @,48-57,_
syn region eaxSingleLineComment start=+//+ end=+\n+
syn region eaxSingleLineComment2 start=+#+ end=+\n+
syn region eaxMultiLineComment start=+\/\*+ end=+\*\/+
@ -18,7 +20,11 @@ syn keyword eaxKeyword program machine procedure begin until repeat break if the
syn keyword eaxType u8 u16 u32 u64 s8 s16 s32 s64
syn keyword eaxInstruction inc xor mov
syn keyword eaxInstructionLike fastcall exit
syn keyword eaxRegister rax rbx rcx rdx rbp rsp rip rdi r7 r8 r9 r10 r11 r12 r13 r14 r15
syn keyword eaxRegister
\ rax rcx rdx rbx rsp rbp rsi rdi rg8 rg9 rg10 rg11 rg12 rg13 rg14 rg15
\ eax ecx edx ebx esp ebp esi edi rg8d rg9d rg10d rg11d rg12d rg13d rg14d rg15d
\ ax cx dx bx sp bp si di r8w r9w r10w r11w r12w r13w r14w r15w
\ al cl dl bl spl bpl sil dil r8b r9b r10b r11b r12b r13b r14b r15b
syn match eaxInt "\-\?\d\+"
syn match eaxHex "0x[0-9a-fA-F]\+"
syn match eaxBin "0b\[01\]\+"

@ -128,8 +128,8 @@ void _append_instructions(const unsigned argc, ...) {
void append_exit(int code) {
if (system_type == UNIX) {
append_instructions(MOV, D32, REG, R0, IMM, 60,
MOV, D32, REG, R7, IMM, code,
append_instructions(MOV, D32, REG, GR0, IMM, 60,
MOV, D32, REG, GR7, IMM, code,
SYSCALL
);
}

@ -17,31 +17,34 @@
#include "assembler.h"
#include "compile.h"
int has_encountered_error = 0;
int is_program_found = 0;
char * yyfilename = "";
static symbol_t * undeclared_symbol;
/* Used for naming variables constructed from literals
*/
size_t anon_variable_counter = 0;
static size_t anon_variable_counter = 0;
/* Used to check whether all labels without
* previous declarations (forward jumps)
* have been declared later in code
*/
size_t unresolved_label_counter = 0;
static size_t unresolved_label_counter = 0;
static unsigned symbol_id = 1;
tommy_hashtable symbol_table;
int has_encountered_error = 0;
int is_program_found = 0;
char * scope = NULL;
static char * scope = NULL;
void empty_out_scope(void) {
free(scope);
scope = NULL;
}
char * yyfilename = "";
int eaxhla_init(void) {
undeclared_symbol = (symbol_t *)calloc(sizeof(symbol_t), 1);
tommy_hashtable_init(&symbol_table, 256);
return 0;
}
@ -74,6 +77,7 @@ void free_symbol(void * data) {
int eaxhla_deinit(void) {
empty_out_scope();
free(undeclared_symbol);
tommy_hashtable_foreach(&symbol_table, free_symbol);
tommy_hashtable_done(&symbol_table);
@ -207,8 +211,8 @@ void add_program(const char * const name) {
static
void _add_variable(unsigned type, const char * const name, size_t size, void * value) {
char * full_name = make_scoped_name(scope, name);
if (get_variable(full_name)) {
issue_error("symbol '%s' redeclared as new variable", full_name);
if (get_symbol(name)) {
issue_error("symbol '%s' redeclared as new variable", name);
return;
}
@ -384,8 +388,12 @@ symbol_t * get_variable(const char * const name) {
if (r
&& r->symbol_type != VARIABLE_SYMBOL) {
issue_error("the symbol '%s' is not a variable", name);
r = NULL;
}
if (!r) {
r = undeclared_symbol;
}
free(varname);
return r;
}

@ -33,8 +33,10 @@ typedef struct {
} symbol_t;
/* private:
symbol_t * new_symbol(const char * const name);
void free_symbol(void * name);
*/
extern tommy_hashtable symbol_table;

@ -6,6 +6,8 @@
sds string_literal_buffer;
long char_literal_buffer = 0;
#define YY_USER_INIT \
string_literal_buffer = sdsnew("");
%}
@ -18,7 +20,7 @@ hex [0123456789abcdef]
uhex [0123456789ABCDEF]
%x IN_COMMENT IN_MULTILINE_COMMENT
%x IN_STRING
%x IN_CHAR IN_STRING
%x IN_END IN_UNKNOWN_END
%x IN_ARTIMETRIC_BLOCK
@ -48,11 +50,7 @@ end { BEGIN IN_END; }
fast { return FAST; }
unix { return UNIX; }
/* #placeholder<register_scanner_instructions> END
*/
in { return TIN; }
\= { return '='; }
s8 { return S8; }
s16 { return S16; }
@ -63,6 +61,7 @@ u16 { return U16; }
u32 { return U32; }
u64 { return U64; }
\' { BEGIN IN_CHAR; }
\" { BEGIN IN_STRING; }
\!\= { return ITNEQ; }
@ -71,7 +70,7 @@ u64 { return U64; }
(\/\/)|\# { BEGIN IN_COMMENT; }
\/\* { BEGIN IN_MULTILINE_COMMENT; }
\[|\]|\{|\}|\+|\-|\*|\/|\%|\^|\:|\<|\> {
\[|\]|\{|\}|\+|\-|\*|\/|\%|\^|\:|\<|\>|\= {
return yytext[0];
}
@ -88,14 +87,14 @@ rsp { return RSP; }
rbp { return RBP; }
rsi { return RSI; }
rdi { return RDI; }
rg8 { return RG8; }
rg9 { return RG9; }
rg10 { return RG10; }
rg11 { return RG11; }
rg12 { return RG12; }
rg13 { return RG13; }
rg14 { return RG14; }
rg15 { return RG15; }
r8 { return R8; }
r9 { return R9; }
r10 { return R10; }
r11 { return R11; }
r12 { return R12; }
r13 { return R13; }
r14 { return R14; }
r15 { return R15; }
eax { return EAX; }
ecx { return ECX; }
edx { return EDX; }
@ -104,14 +103,14 @@ esp { return ESP; }
ebp { return EBP; }
esi { return ESI; }
edi { return EDI; }
rg8d { return RG8D; }
rg9d { return RG9D; }
rg10d { return RG10D; }
rg11d { return RG11D; }
rg12d { return RG12D; }
rg13d { return RG13D; }
rg14d { return RG14D; }
rg15d { return RG15D; }
r8d { return R8D; }
r9d { return R9D; }
r10d { return R10D; }
r11d { return R11D; }
r12d { return R12D; }
r13d { return R13D; }
r14d { return R14D; }
r15d { return R15D; }
ax { return AX; }
cx { return CX; }
dx { return DX; }
@ -163,7 +162,9 @@ hlt { return ITHLT; }
idiv { return ITIDIV; }
imul { return ITIMUL; }
inc { return ITINC; }
je { return ITJE; }
jmp { return ITJMP; }
jne { return ITJNE; }
leave { return ITLEAVE; }
lock { return ITLOCK; }
mov { return ITMOV; }
@ -172,6 +173,7 @@ neg { return ITNEG; }
not { return ITNOT; }
or { return ITOR; }
pause { return ITPAUSE; }
pop { return ITPOP; }
retf { return ITRETF; }
retn { return ITRETN; }
sar { return ITSAR; }
@ -202,12 +204,25 @@ library { BEGIN INITIAL; return END_LIBRARY; }
.* { issue_error("unknown end-sequence \033[1m'%s'\033[0m", yytext); BEGIN INITIAL; return 0; }
}
<IN_STRING>{
/* XXX: the first WORD_SIZE_IN_BYTES bytes should be 0'd */
<IN_CHAR>{
/* XXX: i wanted short strings to be literals;
this however clashes with with the sanity of machine blocks;
those should be moved to '' (exactly like in Holy C)
*/
/* XXX: THIS CODE IS TOTAL DEATH
*/
. { char_literal_buffer = yytext[0]; }
\n { char_literal_buffer = '\n'; }
\' {
yylval.intval = char_literal_buffer;
char_literal_buffer = 0;
return LITERAL;
}
\n { issue_error("unterminated character literal sequence"); yyterminate(); }
}
<IN_STRING>{
/* XXX: the first WORD_SIZE_IN_BYTES bytes should be 0'd */
\" {
BEGIN INITIAL;
yylval.blobval.data = malloc(sdslen(string_literal_buffer));

@ -83,8 +83,8 @@
%type<regval> register register64s register32s register16s register8s
// #placeholder<register_token_list> BEGIN
%token RAX RCX RDX RBX RSP RBP RSI RDI RG8 RG9 RG10 RG11 RG12 RG13 RG14 RG15
%token EAX ECX EDX EBX ESP EBP ESI EDI RG8D RG9D RG10D RG11D RG12D RG13D RG14D RG15D
%token RAX RCX RDX RBX RSP RBP RSI RDI R8 R9 R10 R11 R12 R13 R14 R15
%token EAX ECX EDX EBX ESP EBP ESI EDI R8D R9D R10D R11D R12D R13D R14D R15D
%token AX CX DX BX SP BP SI DI R8W R9W R10W R11W R12W R13W R14W R15W
%token AL CL DL BL SPL BPL SIL DIL R8B R9B R10B R11B R12B R13B R14B R15B
@ -93,7 +93,7 @@
// Instructions
%token INOP
// #placeholder<instruction_token_list> BEGIN
%token ITADC ITADD ITAND ITCMP ITDEC ITDIV ITHLT ITIDIV ITIMUL ITINC ITJMP ITLEAVE ITLOCK ITMOV ITMUL ITNEG ITNOT ITOR ITPAUSE ITRETF ITRETN ITSAR ITSBB ITSUB ITSYSCALL ITSYSENTER ITSYSEXIT ITSYSRET ITXOR
%token ITADC ITADD ITAND ITCMP ITDEC ITDIV ITHLT ITIDIV ITIMUL ITINC ITJE ITJMP ITJNE ITLEAVE ITLOCK ITMOV ITMUL ITNEG ITNOT ITOR ITPAUSE ITPOP ITRETF ITRETN ITSAR ITSBB ITSUB ITSYSCALL ITSYSENTER ITSYSEXIT ITSYSRET ITXOR
// #placeholder<instruction_token_list> END
// Instruction-likes
@ -204,7 +204,10 @@ memory: artimetric_block
| dereference
;
dereference: '[' IDENTIFIER ']' { $$ = 0; /* XXX: how the fuck do i dereference? */ }
dereference: '[' IDENTIFIER ']' {
$$ = get_variable($2)->_id;
free($2);
}
| '[' IDENTIFIER '+' value ']' { $$ = 0; /* XXX: how the fuck do i dereference? */ }
| '[' IDENTIFIER '-' value ']' { $$ = 0; /* XXX: how the fuck do i dereference? */ }
;
@ -312,76 +315,76 @@ register: register64s { $$ = $1; $$.size = D64; }
;
// #placeholder<register_parser_rules> BEGIN
register64s: RAX { $$.number = R0; }
| RCX { $$.number = R1; }
| RDX { $$.number = R2; }
| RBX { $$.number = R3; }
| RSP { $$.number = R4; }
| RBP { $$.number = R5; }
| RSI { $$.number = R6; }
| RDI { $$.number = R7; }
| RG8 { $$.number = R8; }
| RG9 { $$.number = R9; }
| RG10 { $$.number = R10; }
| RG11 { $$.number = R11; }
| RG12 { $$.number = R12; }
| RG13 { $$.number = R13; }
| RG14 { $$.number = R14; }
| RG15 { $$.number = R15; }
register64s: RAX { $$.number = GR0; }
| RCX { $$.number = GR1; }
| RDX { $$.number = GR2; }
| RBX { $$.number = GR3; }
| RSP { $$.number = GR4; }
| RBP { $$.number = GR5; }
| RSI { $$.number = GR6; }
| RDI { $$.number = GR7; }
| R8 { $$.number = GR8; }
| R9 { $$.number = GR9; }
| R10 { $$.number = GR10; }
| R11 { $$.number = GR11; }
| R12 { $$.number = GR12; }
| R13 { $$.number = GR13; }
| R14 { $$.number = GR14; }
| R15 { $$.number = GR15; }
;
register32s: EAX { $$.number = R0; }
| ECX { $$.number = R1; }
| EDX { $$.number = R2; }
| EBX { $$.number = R3; }
| ESP { $$.number = R4; }
| EBP { $$.number = R5; }
| ESI { $$.number = R6; }
| EDI { $$.number = R7; }
| RG8D { $$.number = R8; }
| RG9D { $$.number = R9; }
| RG10D { $$.number = R10; }
| RG11D { $$.number = R11; }
| RG12D { $$.number = R12; }
| RG13D { $$.number = R13; }
| RG14D { $$.number = R14; }
| RG15D { $$.number = R15; }
register32s: EAX { $$.number = GR0; }
| ECX { $$.number = GR1; }
| EDX { $$.number = GR2; }
| EBX { $$.number = GR3; }
| ESP { $$.number = GR4; }
| EBP { $$.number = GR5; }
| ESI { $$.number = GR6; }
| EDI { $$.number = GR7; }
| R8D { $$.number = GR8; }
| R9D { $$.number = GR9; }
| R10D { $$.number = GR10; }
| R11D { $$.number = GR11; }
| R12D { $$.number = GR12; }
| R13D { $$.number = GR13; }
| R14D { $$.number = GR14; }
| R15D { $$.number = GR15; }
;
register16s: AX { $$.number = R0; }
| CX { $$.number = R1; }
| DX { $$.number = R2; }
| BX { $$.number = R3; }
| SP { $$.number = R4; }
| BP { $$.number = R5; }
| SI { $$.number = R6; }
| DI { $$.number = R7; }
| R8W { $$.number = R8; }
| R9W { $$.number = R9; }
| R10W { $$.number = R10; }
| R11W { $$.number = R11; }
| R12W { $$.number = R12; }
| R13W { $$.number = R13; }
| R14W { $$.number = R14; }
| R15W { $$.number = R15; }
register16s: AX { $$.number = GR0; }
| CX { $$.number = GR1; }
| DX { $$.number = GR2; }
| BX { $$.number = GR3; }
| SP { $$.number = GR4; }
| BP { $$.number = GR5; }
| SI { $$.number = GR6; }
| DI { $$.number = GR7; }
| R8W { $$.number = GR8; }
| R9W { $$.number = GR9; }
| R10W { $$.number = GR10; }
| R11W { $$.number = GR11; }
| R12W { $$.number = GR12; }
| R13W { $$.number = GR13; }
| R14W { $$.number = GR14; }
| R15W { $$.number = GR15; }
;
register8s: AL { $$.number = R0; }
| CL { $$.number = R1; }
| DL { $$.number = R2; }
| BL { $$.number = R3; }
| SPL { $$.number = R4; }
| BPL { $$.number = R5; }
| SIL { $$.number = R6; }
| DIL { $$.number = R7; }
| R8B { $$.number = R8; }
| R9B { $$.number = R9; }
| R10B { $$.number = R10; }
| R11B { $$.number = R11; }
| R12B { $$.number = R12; }
| R13B { $$.number = R13; }
| R14B { $$.number = R14; }
| R15B { $$.number = R15; }
register8s: AL { $$.number = GR0; }
| CL { $$.number = GR1; }
| DL { $$.number = GR2; }
| BL { $$.number = GR3; }
| SPL { $$.number = GR4; }
| BPL { $$.number = GR5; }
| SIL { $$.number = GR6; }
| DIL { $$.number = GR7; }
| R8B { $$.number = GR8; }
| R9B { $$.number = GR9; }
| R10B { $$.number = GR10; }
| R11B { $$.number = GR11; }
| R12B { $$.number = GR12; }
| R13B { $$.number = GR13; }
| R14B { $$.number = GR14; }
| R15B { $$.number = GR15; }
;
@ -431,6 +434,8 @@ instruction: INOP { append_instructions(NOP); }
| ITHLT { append_instructions(HLT); }
| ITLOCK { append_instructions(LOCK); }
| ITJMP relative { append_instructions( JMP, D32, REL, $2 ); }
| ITJE relative { append_instructions( JE, D32, REL, $2 ); }
| ITJNE relative { append_instructions( JNE, D32, REL, $2 ); }
| ITINC register { append_instructions( INC, $2.size, REG, $2.number ); }
| ITDEC register { append_instructions( DEC, $2.size, REG, $2.number ); }
| ITNOT register { append_instructions( NOT, $2.size, REG, $2.number ); }
@ -439,14 +444,15 @@ instruction: INOP { append_instructions(NOP); }
| ITIMUL register { append_instructions( IMUL, $2.size, REG, $2.number ); }
| ITDIV register { append_instructions( DIV, $2.size, REG, $2.number ); }
| ITIDIV register { append_instructions( IDIV, $2.size, REG, $2.number ); }
| ITINC memory { append_instructions( INC, 0 /* ??? */, MEM, 0 /* ??? */ ); }
| ITDEC memory { append_instructions( DEC, 0 /* ??? */, MEM, 0 /* ??? */ ); }
| ITNOT memory { append_instructions( NOT, 0 /* ??? */, MEM, 0 /* ??? */ ); }
| ITNEG memory { append_instructions( NEG, 0 /* ??? */, MEM, 0 /* ??? */ ); }
| ITMUL memory { append_instructions( MUL, 0 /* ??? */, MEM, 0 /* ??? */ ); }
| ITIMUL memory { append_instructions( IMUL, 0 /* ??? */, MEM, 0 /* ??? */ ); }
| ITDIV memory { append_instructions( DIV, 0 /* ??? */, MEM, 0 /* ??? */ ); }
| ITIDIV memory { append_instructions( IDIV, 0 /* ??? */, MEM, 0 /* ??? */ ); }
| ITPOP register { append_instructions( POP, $2.size, REG, $2.number ); }
| ITINC memory { append_instructions( INC, D32, MEM, $2 ); }
| ITDEC memory { append_instructions( DEC, D32, MEM, $2 ); }
| ITNOT memory { append_instructions( NOT, D32, MEM, $2 ); }
| ITNEG memory { append_instructions( NEG, D32, MEM, $2 ); }
| ITMUL memory { append_instructions( MUL, D32, MEM, $2 ); }
| ITIMUL memory { append_instructions( IMUL, D32, MEM, $2 ); }
| ITDIV memory { append_instructions( DIV, D32, MEM, $2 ); }
| ITIDIV memory { append_instructions( IDIV, D32, MEM, $2 ); }
| ITADD register register { append_instructions( ADD, $2.size, REG, $2.number, REG, $3.number ); }
| ITOR register register { append_instructions( OR, $2.size, REG, $2.number, REG, $3.number ); }
| ITADC register register { append_instructions( ADC, $2.size, REG, $2.number, REG, $3.number ); }
@ -455,9 +461,12 @@ instruction: INOP { append_instructions(NOP); }
| ITSUB register register { append_instructions( SUB, $2.size, REG, $2.number, REG, $3.number ); }
| ITXOR register register { append_instructions( XOR, $2.size, REG, $2.number, REG, $3.number ); }
| ITCMP register register { append_instructions( CMP, $2.size, REG, $2.number, REG, $3.number ); }
| ITSAR register immediate { append_instructions( SAR, $2.size, REG, $2.number, $3.type, $3.value ); }
| ITCMP register immediate { append_instructions( CMP, $2.size, REG, $2.number, $3.type, $3.value ); }
| ITMOV register register { append_instructions( MOV, $2.size, REG, $2.number, REG, $3.number ); }
| ITMOV register immediate { append_instructions( MOV, $2.size, REG, $2.number, $3.type, $3.value ); }
| ITMOV register memory { append_instructions( MOV, $2.size, REG, $2.number, MEM, $3 ); }
| ITMOV memory register { append_instructions( MOV, D32, MEM, $2, REG, $3.number ); }
| ITSAR register immediate { append_instructions( SAR, $2.size, REG, $2.number, $3.type, $3.value ); }
// #placeholder<instruction_parser_rules> END
;

18
test/if.eax Normal file

@ -0,0 +1,18 @@
program basic_conditional
begin
if 0 then
mov eax 1
mov edi 1
mov esi "nope\n"
mov edx 5
syscall
end if
if 1 then
mov eax 1
mov edi 1
mov esi "yup\n"
mov edx 4
syscall
end if
end program

@ -26,7 +26,7 @@ begin
end procedure
fast procedure write_space
s8 space = " "
s8 space = ' '
begin
mov eax 1
mov edi 1
@ -36,7 +36,7 @@ begin
end procedure
fast procedure write_line_feed
s8 line_feed = "\n"
s8 line_feed = '\n'
begin
mov eax 1
mov edi 1
@ -74,7 +74,7 @@ begin
main_loop:
mov r12d [file]
mov r13d byte
call read_character
fastcall read_character
mov r10d eax
mov r15b [byte]

@ -10,20 +10,20 @@ proc make_parser_rules {is} {
dict set r size "\$$n.size"
}
"immediate" {
dict set r enum "$$n.type"
dict set r value "$$n.value"
dict set r enum "\$$n.type"
dict set r value "\$$n.value"
# XXX
dict set r size "D32"
}
"relative" {
dict set r enum "REL"
dict set r value "$$n"
dict set r value "\$$n"
dict set r size "D32"
}
"memory" {
dict set r enum "MEM"
dict set r value "0 /* ??? */"
dict set r size "0 /* ??? */"
dict set r value "\$$n"
dict set r size "D32"
}
default { malformed_instruction $n }
}

@ -10,6 +10,8 @@ set instructions {
{hlt}
{lock}
{jmp relative}
{je relative}
{jne relative}
{inc register}
{dec register}
{not register}
@ -18,6 +20,7 @@ set instructions {
{imul register}
{div register}
{idiv register}
{pop register}
{inc memory}
{dec memory}
{not memory}
@ -34,9 +37,12 @@ set instructions {
{sub register register}
{xor register register}
{cmp register register}
{sar register immediate}
{cmp register immediate}
{mov register register}
{mov register immediate}
{mov register memory}
{mov memory register}
{sar register immediate}
}
proc malformed_instruction {i} {

@ -2,13 +2,13 @@ source tool/generators/registers.tcl
proc register_parsing {registers} {
proc register_parser_rule {size batch} {
puts [format "register%ss: %s \{ \$\$.number = R0; \}"\
puts [format "register%ss: %s \{ \$\$.number = GR0; \}"\
$size\
[string toupper [lindex $batch 0]]\
]
set accumulator 1
foreach register [lrange $batch 1 end] {
puts [format " | %-5s \{ \$\$.number = R%s; \}"\
puts [format " | %-5s \{ \$\$.number = GR%s; \}"\
[string toupper $register]\
$accumulator\
]

@ -3,7 +3,10 @@ source tool/generators/registers.tcl
proc scan_registers {registers} {
foreach {key value} $registers {
foreach {register} $value {
puts [format "%-6s \{ return %s; \}" $register [string toupper $register]]
puts [format "%-6s \{ return %s; \}"\
$register\
[string toupper $register]\
]
}
}
}

@ -1,14 +1,3 @@
# XXX
#
# | RGXMM0 { $$.number = 0; } /* XXX */
# | RGXMM1 { $$.number = 0; }
# | RGXMM2 { $$.number = 0; }
# | RGXMM3 { $$.number = 0; }
# | RGXMM4 { $$.number = 0; }
# | RGXMM5 { $$.number = 0; }
# | RGXMM6 { $$.number = 0; }
# | RGXMM7 { $$.number = 0; }
set register64s {
rax
rcx
@ -18,14 +7,14 @@ set register64s {
rbp
rsi
rdi
rg8
rg9
rg10
rg11
rg12
rg13
rg14
rg15
r8
r9
r10
r11
r12
r13
r14
r15
}
set register32s {
@ -37,14 +26,14 @@ set register32s {
ebp
esi
edi
rg8d
rg9d
rg10d
rg11d
rg12d
rg13d
rg14d
rg15d
r8d
r9d
r10d
r11d
r12d
r13d
r14d
r15d
}
set register16s {