From 35a5e94310f754a6298c8f885e7098c9ba23a096 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Mon, 9 Nov 2009 02:06:10 -0500 Subject: [PATCH] Convert lexer and parser to be reentrant ("pure lexer" and "pure parser"). Signed-off-by: Jeff Garzik --- .gitignore | 1 + Makefile | 6 ++-- sql-parser-state.h | 8 +++++ sql.l | 88 ++++++++++++++++++++++++++-------------------- sql.y | 42 ++++++++++++++++------ 5 files changed, 93 insertions(+), 52 deletions(-) create mode 100644 sql-parser-state.h diff --git a/.gitignore b/.gitignore index e295eb2..84e3c54 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ *.output sql.tab.* +sql.lex.h sql.c sql diff --git a/Makefile b/Makefile index f542c8e..5689b42 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ # Copyright (c) 2009, Taughannock Networks. All rights reserved. # See the README file for license conditions and contact info. -CC = cc -g +CC = gcc -Wall -O -g LEX = flex YACC = bison CFLAGS = -DYYDEBUG=1 @@ -15,7 +15,7 @@ all: ${PROGRAMS} # chapter 4 -OBJS = sql.tab.o sql.o exec.o +OBJS = sql.o sql.tab.o exec.o sql: ${OBJS} sql-parser.h ${CC} -o $@ ${OBJS} @@ -29,7 +29,7 @@ sql.c: sql.l sql.o: sql.c sql.tab.h clean: - rm -f sql sql.tab.c sql.tab.h sql.c ${OBJS} \ + rm -f sql sql.tab.c sql.tab.h sql.lex.h sql.c ${OBJS} \ sql.output .SUFFIXES: .l .y .c diff --git a/sql-parser-state.h b/sql-parser-state.h new file mode 100644 index 0000000..d20c1ce --- /dev/null +++ b/sql-parser-state.h @@ -0,0 +1,8 @@ +#ifndef __SQL_PARSER_STATE__ +#define __SQL_PARSER_STATE__ + +struct psql_state { + yyscan_t scaninfo; +}; + +#endif /* __SQL_PARSER_STATE__ */ diff --git a/sql.l b/sql.l index afe2a4f..5b28dd8 100644 --- a/sql.l +++ b/sql.l @@ -9,21 +9,28 @@ * With error reporting and recovery */ -%option noyywrap nodefault yylineno case-insensitive +%option noyywrap nodefault yylineno case-insensitive reentrant bison-bridge +%option bison-locations +%option header-file="sql.lex.h" + %{ -#include "sql.tab.h" #include #include +#include "sql.tab.h" +#include "sql-parser-state.h" + +void yyerror(YYLTYPE *, struct psql_state *pstate, char *s, ...); -void yyerror(char *s, ...); int oldstate; /* handle locations */ +#if 0 int yycolumn = 1; +#endif -#define YY_USER_ACTION yylloc.filename = filename; \ - yylloc.first_line = yylloc.last_line = yylineno; \ - yylloc.first_column = yycolumn; yylloc.last_column = yycolumn+yyleng-1; \ +#define YY_USER_ACTION yylloc->filename = filename; \ + yylloc->first_line = yylloc->last_line = yylineno; \ + yylloc->first_column = yycolumn; yylloc->last_column = yycolumn+yyleng-1; \ yycolumn += yyleng; @@ -33,6 +40,9 @@ int yycolumn = 1; %s BTWMODE %% +%{ + struct psql_state *pstate = yyextra; +%} /* keywords */ @@ -106,8 +116,8 @@ ELSEIF { return ELSEIF; } END { return END; } ENUM { return ENUM; } ESCAPED { return ESCAPED; } -EXISTS { yylval.subtok = 0; return EXISTS; } -NOT[ \t]+EXISTS { yylval.subtok = 1; return EXISTS; } +EXISTS { yylval->subtok = 0; return EXISTS; } +NOT[ \t]+EXISTS { yylval->subtok = 1; return EXISTS; } EXIT { return EXIT; } EXPLAIN { return EXPLAIN; } FETCH { return FETCH; } @@ -262,35 +272,35 @@ ZEROFILL { return ZEROFILL; } /* numbers */ --?[0-9]+ { yylval.intval = atoi(yytext); return INTNUM; } +-?[0-9]+ { yylval->intval = atoi(yytext); return INTNUM; } -?[0-9]+"."[0-9]* | -?"."[0-9]+ | -?[0-9]+E[-+]?[0-9]+ | -?[0-9]+"."[0-9]*E[-+]?[0-9]+ | --?"."[0-9]*E[-+]?[0-9]+ { yylval.floatval = atof(yytext) ; +-?"."[0-9]*E[-+]?[0-9]+ { yylval->floatval = atof(yytext) ; return APPROXNUM; } /* booleans */ -TRUE { yylval.intval = 1; return BOOL; } -UNKNOWN { yylval.intval = -1; return BOOL; } -FALSE { yylval.intval = 0; return BOOL; } +TRUE { yylval->intval = 1; return BOOL; } +UNKNOWN { yylval->intval = -1; return BOOL; } +FALSE { yylval->intval = 0; return BOOL; } /* strings */ '(\\.|''|[^'\n])*' | -\"(\\.|\"\"|[^"\n])*\" { yylval.strval = strdup(yytext); return STRING; } +\"(\\.|\"\"|[^"\n])*\" { yylval->strval = strdup(yytext); return STRING; } -'(\\.|[^'\n])*$ { yyerror("Unterminated string %s", yytext); } -\"(\\.|[^"\n])*$ { yyerror("Unterminated string %s", yytext); } +'(\\.|[^'\n])*$ { yyerror(yylloc, pstate, "Unterminated string %s", yytext); } +\"(\\.|[^"\n])*$ { yyerror(yylloc, pstate, "Unterminated string %s", yytext); } /* hex strings */ X'[0-9A-F]+' | -0X[0-9A-F]+ { yylval.strval = strdup(yytext); return STRING; } +0X[0-9A-F]+ { yylval->strval = strdup(yytext); return STRING; } /* bit strings */ 0B[01]+ | -B'[01]+' { yylval.strval = strdup(yytext); return STRING; } +B'[01]+' { yylval->strval = strdup(yytext); return STRING; } /* operators */ @@ -299,17 +309,17 @@ B'[01]+' { yylval.strval = strdup(yytext); return STRING; } "&&" { return ANDOP; } "||" { return OR; } -"=" { yylval.subtok = 4; return COMPARISON; } -"<=>" { yylval.subtok = 12; return COMPARISON; } -">=" { yylval.subtok = 6; return COMPARISON; } -">" { yylval.subtok = 2; return COMPARISON; } -"<=" { yylval.subtok = 5; return COMPARISON; } -"<" { yylval.subtok = 1; return COMPARISON; } +"=" { yylval->subtok = 4; return COMPARISON; } +"<=>" { yylval->subtok = 12; return COMPARISON; } +">=" { yylval->subtok = 6; return COMPARISON; } +">" { yylval->subtok = 2; return COMPARISON; } +"<=" { yylval->subtok = 5; return COMPARISON; } +"<" { yylval->subtok = 1; return COMPARISON; } "!=" | -"<>" { yylval.subtok = 3; return COMPARISON; } +"<>" { yylval->subtok = 3; return COMPARISON; } -"<<" { yylval.subtok = 1; return SHIFT; } -">>" { yylval.subtok = 2; return SHIFT; } +"<<" { yylval->subtok = 1; return SHIFT; } +">>" { yylval->subtok = 2; return SHIFT; } /* functions */ @@ -321,28 +331,28 @@ DATE_SUB/"(" { return FDATE_SUB; } /* * peek ahead and return function if name( */ -COUNT { int c = input(); unput(c); +COUNT { int c = input(pstate->scaninfo); unput(c); if(c == '(') return FCOUNT; - yylval.strval = strdup(yytext); + yylval->strval = strdup(yytext); return NAME; } -[A-Za-z][A-Za-z0-9_]* { yylval.strval = strdup(yytext); +[A-Za-z][A-Za-z0-9_]* { yylval->strval = strdup(yytext); return NAME; } -`[^`/\\.\n]+` { yylval.strval = strdup(yytext+1); - yylval.strval[yyleng-2] = 0; +`[^`/\\.\n]+` { yylval->strval = strdup(yytext+1); + yylval->strval[yyleng-2] = 0; return NAME; } -`[^`\n]*$ { yyerror("unterminated quoted name %s", yytext); } +`[^`\n]*$ { yyerror(yylloc, pstate, "unterminated quoted name %s", yytext); } /* user variables */ @[0-9a-z_.$]+ | @\"[^"\n]+\" | @`[^`\n]+` | -@'[^'\n]+' { yylval.strval = strdup(yytext+1); return USERVAR; } +@'[^'\n]+' { yylval->strval = strdup(yytext+1); return USERVAR; } -@\"[^"\n]*$ { yyerror("unterminated quoted user variable %s", yytext); } -@`[^`\n]*$ { yyerror("unterminated quoted user variable %s", yytext); } -@'[^'\n]*$ { yyerror("unterminated quoted user variable %s", yytext); } +@\"[^"\n]*$ { yyerror(yylloc, pstate, "unterminated quoted user variable %s", yytext); } +@`[^`\n]*$ { yyerror(yylloc, pstate, "unterminated quoted user variable %s", yytext); } +@'[^'\n]*$ { yyerror(yylloc, pstate, "unterminated quoted user variable %s", yytext); } ":=" { return ASSIGN; } @@ -355,12 +365,12 @@ COUNT { int c = input(); unput(c); "*/" { BEGIN oldstate; } . ; \n { yycolumn = 1; } -<> { yyerror("unclosed comment"); } +<> { yyerror(yylloc, pstate, "unclosed comment"); } /* everything else */ [ \t] /* white space */ \n { yycolumn = 1; } -. { yyerror("mystery character '%c'", *yytext); } +. { yyerror(yylloc, pstate, "mystery character '%c'", *yytext); } %% diff --git a/sql.y b/sql.y index ae59732..76947f4 100644 --- a/sql.y +++ b/sql.y @@ -4,6 +4,10 @@ * See the README file for license conditions and contact info. * $Header: /home/johnl/flnb/code/sql/RCS/lpmysql.y,v 2.1 2009/11/08 02:53:39 johnl Exp $ */ + +%define api.pure +%parse-param { struct psql_state *pstate } + /* * Parser for mysql subset */ @@ -55,6 +59,12 @@ typedef struct YYLTYPE { char *strval; int subtok; } + +%{ +#include "sql.lex.h" +#include "sql-parser-state.h" +#define YYLEX_PARAM pstate->scaninfo +%} /* names and literal values */ @@ -330,8 +340,8 @@ typedef struct YYLTYPE { %start stmt_list %{ -void yyerror(char *s, ...); -void lyyerror(YYLTYPE, char *s, ...); +void yyerror(YYLTYPE *, struct psql_state *pstate, char *s, ...); +void lyyerror(YYLTYPE t, char *s, ...); %} /* free discarded tokens */ %destructor { printf ("free at %d %s\n",@$.first_line, $$); free($$); } @@ -971,14 +981,14 @@ expr: BINARY expr %prec UMINUS { sqlp_expr_op(SEO_STRTOBIN); } %% void -yyerror(char *s, ...) +yyerror(YYLTYPE *t, struct psql_state *pstate, char *s, ...) { va_list ap; va_start(ap, s); - if(yylloc.first_line) - fprintf(stderr, "%s:%d.%d-%d.%d: error: ", yylloc.filename, yylloc.first_line, yylloc.first_column, - yylloc.last_line, yylloc.last_column); + if(t->first_line) + fprintf(stderr, "%s:%d.%d-%d.%d: error: ", t->filename, t->first_line, t->first_column, + t->last_line, t->last_column); vfprintf(stderr, s, ap); fprintf(stderr, "\n"); @@ -997,25 +1007,37 @@ lyyerror(YYLTYPE t, char *s, ...) fprintf(stderr, "\n"); } +int main(int ac, char **av) { - extern FILE *yyin; + FILE *in_f; + struct psql_state pstate; if(ac > 1 && !strcmp(av[1], "-d")) { yydebug = 1; ac--; av++; } + memset(&pstate, 0, sizeof(pstate)); + if (yylex_init_extra(&pstate, &pstate.scaninfo)) + return 1; + if(ac > 1) { - if((yyin = fopen(av[1], "r")) == NULL) { + if((in_f = fopen(av[1], "r")) == NULL) { perror(av[1]); exit(1); } filename = av[1]; - } else + } else { filename = "(stdin)"; + in_f = stdin; + } - if(!yyparse()) + yyset_in(in_f, &pstate.scaninfo); + + if(!yyparse(&pstate)) printf("SQL parse worked\n"); else printf("SQL parse failed\n"); + + return 0; } /* main */