%{ #include #include "sds/sds.h" #include "eaxhla.h" #include "eaxhla.tab.h" sds string_literal_buffer; long char_literal_buffer = 0; #define YY_USER_INIT \ string_literal_buffer = sdsnew(""); %} %option noyywrap identifier [A-Za-z_][A-Za-z0-9_]* wsnl [ \t\r\v\f\n] hex [0123456789abcdef] uhex [0123456789ABCDEF] %x IN_COMMENT IN_MULTILINE_COMMENT %x IN_CHAR IN_STRING %x IN_END IN_UNKNOWN_END %x IN_ARTIMETRIC_BLOCK %option nodefault %option yylineno %option nounput noinput %% {wsnl}* { ; } begin { return MYBEGIN; } program { return PROGRAM; } procedure { return PROCEDURE; } repeat { return REPEAT; } if { return IF; } then { return THEN; } machine { return MACHINE; } library { return LIBRARY; } break { return BREAK; } continue { return CONTINUE; } until { return UNTIL; } exit { return EXIT; } end { BEGIN IN_END; } fast { return FAST; } unix { return UNIX; } shell { return SHELL; } in { return TIN; } s8 { return S8; } s16 { return S16; } s32 { return S32; } s64 { return S64; } u8 { return U8; } u16 { return U16; } u32 { return U32; } u64 { return U64; } \' { BEGIN IN_CHAR; } \" { BEGIN IN_STRING; } \!\= { return ITNEQ; } \! { return ITNOT; } (\/\/)|\# { BEGIN IN_COMMENT; } \/\* { BEGIN IN_MULTILINE_COMMENT; } \[|\]|\{|\}|\+|\-|\*|\/|\%|\^|\:|\<|\>|\= { return yytext[0]; } fastcall { return FASTCALL; } /* --- Registers begin here --- */ /* #placeholder BEGIN */ rax { return RAX; } rcx { return RCX; } rdx { return RDX; } rbx { return RBX; } rsp { return RSP; } rbp { return RBP; } rsi { return RSI; } rdi { return RDI; } r8 { return R8; } r9 { return R9; } r10 { return R10; } r11 { return R11; } r12 { return R12; } r13 { return R13; } r14 { return R14; } r15 { return R15; } eax { return EAX; } ecx { return ECX; } edx { return EDX; } ebx { return EBX; } esp { return ESP; } ebp { return EBP; } esi { return ESI; } edi { return EDI; } r8d { return R8D; } r9d { return R9D; } r10d { return R10D; } r11d { return R11D; } r12d { return R12D; } r13d { return R13D; } r14d { return R14D; } r15d { return R15D; } ax { return AX; } cx { return CX; } dx { return DX; } bx { return BX; } sp { return SP; } bp { return BP; } si { return SI; } di { return DI; } r8w { return R8W; } r9w { return R9W; } r10w { return R10W; } r11w { return R11W; } r12w { return R12W; } r13w { return R13W; } r14w { return R14W; } r15w { return R15W; } al { return AL; } cl { return CL; } dl { return DL; } bl { return BL; } spl { return SPL; } bpl { return BPL; } sil { return SIL; } dil { return DIL; } r8b { return R8B; } r9b { return R9B; } r10b { return R10B; } r11b { return R11B; } r12b { return R12B; } r13b { return R13B; } r14b { return R14B; } r15b { return R15B; } /* #placeholder END */ /* --- Registers end here --- */ /* --- Instrunctions begin here --- */ nop { return INOP; } /* #placeholder BEGIN */ adc { return ITADC; } add { return ITADD; } and { return ITAND; } cmp { return ITCMP; } cpuid { return ITCPUID; } dec { return ITDEC; } div { return ITDIV; } f2xm1 { return ITF2XM1; } fabs { return ITFABS; } fchs { return ITFCHS; } fcos { return ITFCOS; } fdecstp { return ITFDECSTP; } fincstp { return ITFINCSTP; } fld1 { return ITFLD1; } fldl2e { return ITFLDL2E; } fldl2t { return ITFLDL2T; } fldlg2 { return ITFLDLG2; } fldln2 { return ITFLDLN2; } fldpi { return ITFLDPI; } fldz { return ITFLDZ; } fnop { return ITFNOP; } fpatan { return ITFPATAN; } fprem { return ITFPREM; } fprem1 { return ITFPREM1; } fptan { return ITFPTAN; } frndint { return ITFRNDINT; } fscale { return ITFSCALE; } fsin { return ITFSIN; } fsincos { return ITFSINCOS; } fsqrt { return ITFSQRT; } ftst { return ITFTST; } fxam { return ITFXAM; } fxtract { return ITFXTRACT; } fyl2x { return ITFYL2X; } fyl2xp1 { return ITFYL2XP1; } idiv { return ITIDIV; } imul { return ITIMUL; } inc { return ITINC; } je { return ITJE; } jmp { return ITJMP; } jne { return ITJNE; } leave { return ITLEAVE; } loop { return ITLOOP; } loope { return ITLOOPE; } loopne { return ITLOOPNE; } mov { return ITMOV; } mul { return ITMUL; } neg { return ITNEG; } nop { return ITNOP; } not { return ITNOT; } or { return ITOR; } pop { return ITPOP; } popf { return ITPOPF; } push { return ITPUSH; } pushf { return ITPUSHF; } rcl { return ITRCL; } rcr { return ITRCR; } retf { return ITRETF; } retn { return ITRETN; } rol { return ITROL; } ror { return ITROR; } sal { return ITSAL; } sar { return ITSAR; } sbb { return ITSBB; } shl { return ITSHL; } shr { return ITSHR; } sub { return ITSUB; } syscall { return ITSYSCALL; } xor { return ITXOR; } /* #placeholder END */ /* --- Instrunctions end here --- */ { program { BEGIN INITIAL; return END_PROGRAM; } procedure { BEGIN INITIAL; return END_PROCEDURE; } repeat { BEGIN INITIAL; return END_REPEAT; } if { BEGIN INITIAL; return END_IF; } machine { BEGIN INITIAL; return END_MACHINE; } library { BEGIN INITIAL; return END_LIBRARY; } {wsnl} { ; } . { yyless(0); BEGIN IN_UNKNOWN_END; } } { .* { issue_error("unknown end-sequence \033[1m'%s'\033[0m", yytext); BEGIN INITIAL; return 0; } } { /* XXX: i wanted short strings to be literals; this however clashes with with the sanity of machine blocks; those should be moved to '' (exactly like in Holy C) */ /* XXX: THIS CODE IS TOTAL DEATH */ \n { char_literal_buffer = '\n'; } \' { BEGIN INITIAL; yylval.intval = char_literal_buffer; char_literal_buffer = 0; return LITERAL; } . { char_literal_buffer = yytext[0]; } \n { issue_error("unterminated character literal sequence"); yyterminate(); } } { /* XXX: the first WORD_SIZE_IN_BYTES bytes should be 0'd */ \" { BEGIN INITIAL; yylval.blobval.data = malloc(sdslen(string_literal_buffer)); memcpy(yylval.blobval.data, string_literal_buffer, sdslen(string_literal_buffer)); yylval.blobval.len = sdslen(string_literal_buffer); string_literal_buffer[0] = '\0'; sdsupdatelen(string_literal_buffer); return ARRAY_LITERAL; } \\n { string_literal_buffer = sdscat(string_literal_buffer, "\n");} \\a { string_literal_buffer = sdscat(string_literal_buffer, "\a"); } \\b { string_literal_buffer = sdscat(string_literal_buffer, "\b"); } \\f { string_literal_buffer = sdscat(string_literal_buffer, "\f"); } \\r { string_literal_buffer = sdscat(string_literal_buffer, "\r"); } \\t { string_literal_buffer = sdscat(string_literal_buffer, "\t"); } \\v { string_literal_buffer = sdscat(string_literal_buffer, "\v"); } \\\" { string_literal_buffer = sdscat(string_literal_buffer, "\""); } \\\\ { string_literal_buffer = sdscat(string_literal_buffer, "\\"); } \\0 { string_literal_buffer = sdscatlen(string_literal_buffer, "\0", 1); } \\x({hex}|{uhex})+ { long v = strtol(yytext + 2, NULL, 16); if (v > 255 || v < -255) { issue_warning("hex escapes are char sized. '%ld' will be truncated", v); } string_literal_buffer = sdscatlen(string_literal_buffer, &v, 1); } \n { issue_error("string was never terminated"); yylval.blobval.data = strdup(""); yylval.blobval.len = 0; string_literal_buffer[0] = '\0'; sdsupdatelen(string_literal_buffer); return ARRAY_LITERAL; } . { string_literal_buffer = sdscat(string_literal_buffer, yytext); } } { \n { BEGIN INITIAL; } .* { ; } <> { issue_error("unterminated comment"); yyterminate(); } } { \*\/ { BEGIN INITIAL; } .|\n { ; } <> { issue_error("unterminated comment"); yyterminate(); } } { -?[[:digit:]]+ { yylval.intval = strtol(yytext, NULL, 10); return LITERAL; } 0b[01]+ { yylval.intval = strtol(yytext + 2, NULL, 2); return LITERAL; } 0x{hex}+ { yylval.intval = strtol(yytext + 2, NULL, 16); return LITERAL; } 0x{uhex}+ { yylval.intval = strtol(yytext + 2, NULL, 16); return LITERAL; } } { <> { issue_error("unterminated artimetric block"); yyterminate(); } } {identifier} { yylval.strval = strdup(yytext); return IDENTIFIER; } {identifier}: { yytext[yyleng-1] = '\0'; yylval.strval = strdup(yytext); return LABEL; } . { issue_error("unknown symbol \033[1m'%c'\033[0m", yytext[0]); } %% void yyfree_leftovers(void) { if (yyin) { fclose(yyin); } yylex_destroy(); sdsfree(string_literal_buffer); }