From cd86cd6bdf35ff7224015fc359bb82f9d99adf40 Mon Sep 17 00:00:00 2001 From: anon <anon@anon.anon> Date: Fri, 6 Sep 2024 16:22:52 +0200 Subject: [PATCH] +tslex --- C_C++/bison/tslex/Makefile | 3 ++ C_C++/bison/tslex/README.md | 37 ++++++++++++++++++++++ C_C++/bison/tslex/main.y | 46 +++++++++++++++++++++++++++ C_C++/bison/tslex/tslex.c | 63 +++++++++++++++++++++++++++++++++++++ C_C++/bison/tslex/tslex.h | 16 ++++++++++ 5 files changed, 165 insertions(+) create mode 100644 C_C++/bison/tslex/Makefile create mode 100644 C_C++/bison/tslex/README.md create mode 100644 C_C++/bison/tslex/main.y create mode 100644 C_C++/bison/tslex/tslex.c create mode 100644 C_C++/bison/tslex/tslex.h diff --git a/C_C++/bison/tslex/Makefile b/C_C++/bison/tslex/Makefile new file mode 100644 index 0000000..5d5f0a3 --- /dev/null +++ b/C_C++/bison/tslex/Makefile @@ -0,0 +1,3 @@ +main: + bison --header=main.tab.h main.y + gcc main.tab.c tslex.c $$(pkg-config --cflags --libs tree-sitter tree-sitter-c) -ggdb diff --git a/C_C++/bison/tslex/README.md b/C_C++/bison/tslex/README.md new file mode 100644 index 0000000..938239c --- /dev/null +++ b/C_C++/bison/tslex/README.md @@ -0,0 +1,37 @@ +a few days ago i saw this pet project on https://git.peppe.rs/languages/tbsp/tree/examples/code-overview/overview.tbsp +being a DSL fan, i really like the idea. +except seemingly the guy is making a full general purpose interpreter too, +which sentences the project to death. +i have email him about this (response pending) + +as i was thinking however i started wondering if bison could do it +given an adaquate lex() + +well, the answer is kinda. +if you look at the code, well it works. + +problems: + + bison doesnt know includes so it would have to be preprocessed + (to provide tokens as -perversily- the scanner defines them) + + the tree sitter query language is much more... + well lets say verboser than i imagined, so our code has to be much + more complexed than i hoped so; + either the bison programmer has to type a lot or lex() + define a bunch of wrappers which would be language specific + + we are not really using the stack are we? + (unless we do stuff like `BEGIN FUNCTION`/`END FUNCTION` + to mimic the "original" grammar (which is not bad)) + so its a hack to begin with, + and because of the above reasons and ugly one of that + +so, instead, i do think a DSL is the answer. +only issue is that if the guy disagrees on making it "supplementary" +then i will have to make my own. + +one more thing, a ts node query language would also be nice. +something along the lines of +``` +$$->declarator +``` +expanding to a node object +(or erroring out safely if the "declarator" named field does not exist) diff --git a/C_C++/bison/tslex/main.y b/C_C++/bison/tslex/main.y new file mode 100644 index 0000000..6163bf2 --- /dev/null +++ b/C_C++/bison/tslex/main.y @@ -0,0 +1,46 @@ +%{ + #include <stdio.h> + #include <string.h> + #include <tree_sitter/api.h> + #include "tslex.h" +%} +%token UNDEF +%token<nodeval> FUNCTION +%union { + TSNode nodeval; +} +%% +document + : any + ; + +any + : %empty + | UNDEF any + | FUNCTION any { + printf("-- %s\n", + ts_node_string($1) + ); + + char * s = "declarator"; + TSNode identifier = ts_node_child_by_field_name($1, s, strlen(s)); + + if (ts_node_is_null(identifier)) { + break; + } + + printf("-- %.*s\n", + ts_node_text_len(identifier), + ts_node_text(identifier) + ); + } + ; +%% + +signed main() { + yyinit(); + yyparse(); + yydeinit(); + + return 0; +} diff --git a/C_C++/bison/tslex/tslex.c b/C_C++/bison/tslex/tslex.c new file mode 100644 index 0000000..13cf8ac --- /dev/null +++ b/C_C++/bison/tslex/tslex.c @@ -0,0 +1,63 @@ +#include <stdio.h> +#include <string.h> +#include <tree_sitter/api.h> + +#include "main.tab.h" + +extern const TSLanguage * tree_sitter_c(void); + +TSParser * parser; +TSTree * tree; +TSTreeCursor cursor; + +TSNode current_node; + +const char * source_code = "int main() { return 0; }"; + +int yyinit() { + parser = ts_parser_new(); + + ts_parser_set_language(parser, tree_sitter_c()); + tree = ts_parser_parse_string(parser, NULL, source_code, strlen(source_code)); + cursor = ts_tree_cursor_new(ts_tree_root_node(tree)); + current_node = ts_tree_root_node(tree); +} + +int yydeinit() { + ts_tree_delete(tree); + ts_parser_delete(parser); + ts_tree_cursor_delete(&cursor); +} + +#define RETURN(x) do {\ + r = x; \ + goto END; \ +} while(0); + +int yylex() { + int r = UNDEF; + + static int end = 0; // XXX + if (end) { + return 0; + } + + if (!strcmp("function_definition", ts_node_type(current_node))) { + yylval.nodeval = current_node; + RETURN(FUNCTION); + } + + END: + if (ts_tree_cursor_goto_first_child(&cursor) + && ts_tree_cursor_goto_next_sibling(&cursor) + && ts_tree_cursor_goto_parent(&cursor)) { + end = 1; + } + + current_node = ts_tree_cursor_current_node(&cursor); + return r; +} + +void yyerror() { + puts("error -- this should never happen"); +} diff --git a/C_C++/bison/tslex/tslex.h b/C_C++/bison/tslex/tslex.h new file mode 100644 index 0000000..4de24bb --- /dev/null +++ b/C_C++/bison/tslex/tslex.h @@ -0,0 +1,16 @@ +#include <tree_sitter/api.h> + +extern void yyerror(); +extern int yyinit(); +extern int yydeinit(); +extern int yylex(); + +extern const char * source_code; + +const char * ts_node_text(const TSNode node) { + return source_code + ts_node_start_byte(node); +} + +int ts_node_text_len(const TSNode node) { + return ts_node_end_byte(node) - ts_node_start_byte(node); +}