Convert lexer and parser to be reentrant ("pure lexer" and "pure parser").

Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
This commit is contained in:
Jeff Garzik 2009-11-09 02:06:10 -05:00 committed by Jeff Garzik
parent d5e59cd6db
commit 35a5e94310
5 changed files with 93 additions and 52 deletions

1
.gitignore vendored
View File

@ -3,6 +3,7 @@
*.output *.output
sql.tab.* sql.tab.*
sql.lex.h
sql.c sql.c
sql sql

View File

@ -4,7 +4,7 @@
# Copyright (c) 2009, Taughannock Networks. All rights reserved. # Copyright (c) 2009, Taughannock Networks. All rights reserved.
# See the README file for license conditions and contact info. # See the README file for license conditions and contact info.
CC = cc -g CC = gcc -Wall -O -g
LEX = flex LEX = flex
YACC = bison YACC = bison
CFLAGS = -DYYDEBUG=1 CFLAGS = -DYYDEBUG=1
@ -15,7 +15,7 @@ all: ${PROGRAMS}
# chapter 4 # chapter 4
OBJS = sql.tab.o sql.o exec.o OBJS = sql.o sql.tab.o exec.o
sql: ${OBJS} sql-parser.h sql: ${OBJS} sql-parser.h
${CC} -o $@ ${OBJS} ${CC} -o $@ ${OBJS}
@ -29,7 +29,7 @@ sql.c: sql.l
sql.o: sql.c sql.tab.h sql.o: sql.c sql.tab.h
clean: clean:
rm -f sql sql.tab.c sql.tab.h sql.c ${OBJS} \ rm -f sql sql.tab.c sql.tab.h sql.lex.h sql.c ${OBJS} \
sql.output sql.output
.SUFFIXES: .l .y .c .SUFFIXES: .l .y .c

8
sql-parser-state.h Normal file
View File

@ -0,0 +1,8 @@
#ifndef __SQL_PARSER_STATE__
#define __SQL_PARSER_STATE__
struct psql_state {
yyscan_t scaninfo;
};
#endif /* __SQL_PARSER_STATE__ */

88
sql.l
View File

@ -9,21 +9,28 @@
* With error reporting and recovery * With error reporting and recovery
*/ */
%option noyywrap nodefault yylineno case-insensitive %option noyywrap nodefault yylineno case-insensitive reentrant bison-bridge
%option bison-locations
%option header-file="sql.lex.h"
%{ %{
#include "sql.tab.h"
#include <stdarg.h> #include <stdarg.h>
#include <string.h> #include <string.h>
#include "sql.tab.h"
#include "sql-parser-state.h"
void yyerror(YYLTYPE *, struct psql_state *pstate, char *s, ...);
void yyerror(char *s, ...);
int oldstate; int oldstate;
/* handle locations */ /* handle locations */
#if 0
int yycolumn = 1; int yycolumn = 1;
#endif
#define YY_USER_ACTION yylloc.filename = filename; \ #define YY_USER_ACTION yylloc->filename = filename; \
yylloc.first_line = yylloc.last_line = yylineno; \ yylloc->first_line = yylloc->last_line = yylineno; \
yylloc.first_column = yycolumn; yylloc.last_column = yycolumn+yyleng-1; \ yylloc->first_column = yycolumn; yylloc->last_column = yycolumn+yyleng-1; \
yycolumn += yyleng; yycolumn += yyleng;
@ -33,6 +40,9 @@ int yycolumn = 1;
%s BTWMODE %s BTWMODE
%% %%
%{
struct psql_state *pstate = yyextra;
%}
/* keywords */ /* keywords */
@ -106,8 +116,8 @@ ELSEIF { return ELSEIF; }
END { return END; } END { return END; }
ENUM { return ENUM; } ENUM { return ENUM; }
ESCAPED { return ESCAPED; } ESCAPED { return ESCAPED; }
EXISTS { yylval.subtok = 0; return EXISTS; } EXISTS { yylval->subtok = 0; return EXISTS; }
NOT[ \t]+EXISTS { yylval.subtok = 1; return EXISTS; } NOT[ \t]+EXISTS { yylval->subtok = 1; return EXISTS; }
EXIT { return EXIT; } EXIT { return EXIT; }
EXPLAIN { return EXPLAIN; } EXPLAIN { return EXPLAIN; }
FETCH { return FETCH; } FETCH { return FETCH; }
@ -262,35 +272,35 @@ ZEROFILL { return ZEROFILL; }
/* numbers */ /* numbers */
-?[0-9]+ { yylval.intval = atoi(yytext); return INTNUM; } -?[0-9]+ { yylval->intval = atoi(yytext); return INTNUM; }
-?[0-9]+"."[0-9]* | -?[0-9]+"."[0-9]* |
-?"."[0-9]+ | -?"."[0-9]+ |
-?[0-9]+E[-+]?[0-9]+ | -?[0-9]+E[-+]?[0-9]+ |
-?[0-9]+"."[0-9]*E[-+]?[0-9]+ | -?[0-9]+"."[0-9]*E[-+]?[0-9]+ |
-?"."[0-9]*E[-+]?[0-9]+ { yylval.floatval = atof(yytext) ; -?"."[0-9]*E[-+]?[0-9]+ { yylval->floatval = atof(yytext) ;
return APPROXNUM; } return APPROXNUM; }
/* booleans */ /* booleans */
TRUE { yylval.intval = 1; return BOOL; } TRUE { yylval->intval = 1; return BOOL; }
UNKNOWN { yylval.intval = -1; return BOOL; } UNKNOWN { yylval->intval = -1; return BOOL; }
FALSE { yylval.intval = 0; return BOOL; } FALSE { yylval->intval = 0; return BOOL; }
/* strings */ /* strings */
'(\\.|''|[^'\n])*' | '(\\.|''|[^'\n])*' |
\"(\\.|\"\"|[^"\n])*\" { yylval.strval = strdup(yytext); return STRING; } \"(\\.|\"\"|[^"\n])*\" { yylval->strval = strdup(yytext); return STRING; }
'(\\.|[^'\n])*$ { yyerror("Unterminated string %s", yytext); } '(\\.|[^'\n])*$ { yyerror(yylloc, pstate, "Unterminated string %s", yytext); }
\"(\\.|[^"\n])*$ { yyerror("Unterminated string %s", yytext); } \"(\\.|[^"\n])*$ { yyerror(yylloc, pstate, "Unterminated string %s", yytext); }
/* hex strings */ /* hex strings */
X'[0-9A-F]+' | X'[0-9A-F]+' |
0X[0-9A-F]+ { yylval.strval = strdup(yytext); return STRING; } 0X[0-9A-F]+ { yylval->strval = strdup(yytext); return STRING; }
/* bit strings */ /* bit strings */
0B[01]+ | 0B[01]+ |
B'[01]+' { yylval.strval = strdup(yytext); return STRING; } B'[01]+' { yylval->strval = strdup(yytext); return STRING; }
/* operators */ /* operators */
@ -299,17 +309,17 @@ B'[01]+' { yylval.strval = strdup(yytext); return STRING; }
"&&" { return ANDOP; } "&&" { return ANDOP; }
"||" { return OR; } "||" { return OR; }
"=" { yylval.subtok = 4; return COMPARISON; } "=" { yylval->subtok = 4; return COMPARISON; }
"<=>" { yylval.subtok = 12; return COMPARISON; } "<=>" { yylval->subtok = 12; return COMPARISON; }
">=" { yylval.subtok = 6; return COMPARISON; } ">=" { yylval->subtok = 6; return COMPARISON; }
">" { yylval.subtok = 2; return COMPARISON; } ">" { yylval->subtok = 2; return COMPARISON; }
"<=" { yylval.subtok = 5; return COMPARISON; } "<=" { yylval->subtok = 5; return COMPARISON; }
"<" { yylval.subtok = 1; return COMPARISON; } "<" { yylval->subtok = 1; return COMPARISON; }
"!=" | "!=" |
"<>" { yylval.subtok = 3; return COMPARISON; } "<>" { yylval->subtok = 3; return COMPARISON; }
"<<" { yylval.subtok = 1; return SHIFT; } "<<" { yylval->subtok = 1; return SHIFT; }
">>" { yylval.subtok = 2; return SHIFT; } ">>" { yylval->subtok = 2; return SHIFT; }
/* functions */ /* functions */
@ -321,28 +331,28 @@ DATE_SUB/"(" { return FDATE_SUB; }
/* /*
* peek ahead and return function if name( * peek ahead and return function if name(
*/ */
COUNT { int c = input(); unput(c); COUNT { int c = input(pstate->scaninfo); unput(c);
if(c == '(') return FCOUNT; if(c == '(') return FCOUNT;
yylval.strval = strdup(yytext); yylval->strval = strdup(yytext);
return NAME; } return NAME; }
[A-Za-z][A-Za-z0-9_]* { yylval.strval = strdup(yytext); [A-Za-z][A-Za-z0-9_]* { yylval->strval = strdup(yytext);
return NAME; } return NAME; }
`[^`/\\.\n]+` { yylval.strval = strdup(yytext+1); `[^`/\\.\n]+` { yylval->strval = strdup(yytext+1);
yylval.strval[yyleng-2] = 0; yylval->strval[yyleng-2] = 0;
return NAME; } return NAME; }
`[^`\n]*$ { yyerror("unterminated quoted name %s", yytext); } `[^`\n]*$ { yyerror(yylloc, pstate, "unterminated quoted name %s", yytext); }
/* user variables */ /* user variables */
@[0-9a-z_.$]+ | @[0-9a-z_.$]+ |
@\"[^"\n]+\" | @\"[^"\n]+\" |
@`[^`\n]+` | @`[^`\n]+` |
@'[^'\n]+' { yylval.strval = strdup(yytext+1); return USERVAR; } @'[^'\n]+' { yylval->strval = strdup(yytext+1); return USERVAR; }
@\"[^"\n]*$ { yyerror("unterminated quoted user variable %s", yytext); } @\"[^"\n]*$ { yyerror(yylloc, pstate, "unterminated quoted user variable %s", yytext); }
@`[^`\n]*$ { yyerror("unterminated quoted user variable %s", yytext); } @`[^`\n]*$ { yyerror(yylloc, pstate, "unterminated quoted user variable %s", yytext); }
@'[^'\n]*$ { yyerror("unterminated quoted user variable %s", yytext); } @'[^'\n]*$ { yyerror(yylloc, pstate, "unterminated quoted user variable %s", yytext); }
":=" { return ASSIGN; } ":=" { return ASSIGN; }
@ -355,12 +365,12 @@ COUNT { int c = input(); unput(c);
<COMMENT>"*/" { BEGIN oldstate; } <COMMENT>"*/" { BEGIN oldstate; }
<COMMENT>. ; <COMMENT>. ;
<COMMENT>\n { yycolumn = 1; } <COMMENT>\n { yycolumn = 1; }
<COMMENT><<EOF>> { yyerror("unclosed comment"); } <COMMENT><<EOF>> { yyerror(yylloc, pstate, "unclosed comment"); }
/* everything else */ /* everything else */
[ \t] /* white space */ [ \t] /* white space */
\n { yycolumn = 1; } \n { yycolumn = 1; }
. { yyerror("mystery character '%c'", *yytext); } . { yyerror(yylloc, pstate, "mystery character '%c'", *yytext); }
%% %%

42
sql.y
View File

@ -4,6 +4,10 @@
* See the README file for license conditions and contact info. * See the README file for license conditions and contact info.
* $Header: /home/johnl/flnb/code/sql/RCS/lpmysql.y,v 2.1 2009/11/08 02:53:39 johnl Exp $ * $Header: /home/johnl/flnb/code/sql/RCS/lpmysql.y,v 2.1 2009/11/08 02:53:39 johnl Exp $
*/ */
%define api.pure
%parse-param { struct psql_state *pstate }
/* /*
* Parser for mysql subset * Parser for mysql subset
*/ */
@ -56,6 +60,12 @@ typedef struct YYLTYPE {
int subtok; int subtok;
} }
%{
#include "sql.lex.h"
#include "sql-parser-state.h"
#define YYLEX_PARAM pstate->scaninfo
%}
/* names and literal values */ /* names and literal values */
%token <strval> NAME %token <strval> NAME
@ -330,8 +340,8 @@ typedef struct YYLTYPE {
%start stmt_list %start stmt_list
%{ %{
void yyerror(char *s, ...); void yyerror(YYLTYPE *, struct psql_state *pstate, char *s, ...);
void lyyerror(YYLTYPE, char *s, ...); void lyyerror(YYLTYPE t, char *s, ...);
%} %}
/* free discarded tokens */ /* free discarded tokens */
%destructor { printf ("free at %d %s\n",@$.first_line, $$); free($$); } <strval> %destructor { printf ("free at %d %s\n",@$.first_line, $$); free($$); } <strval>
@ -971,14 +981,14 @@ expr: BINARY expr %prec UMINUS { sqlp_expr_op(SEO_STRTOBIN); }
%% %%
void void
yyerror(char *s, ...) yyerror(YYLTYPE *t, struct psql_state *pstate, char *s, ...)
{ {
va_list ap; va_list ap;
va_start(ap, s); va_start(ap, s);
if(yylloc.first_line) if(t->first_line)
fprintf(stderr, "%s:%d.%d-%d.%d: error: ", yylloc.filename, yylloc.first_line, yylloc.first_column, fprintf(stderr, "%s:%d.%d-%d.%d: error: ", t->filename, t->first_line, t->first_column,
yylloc.last_line, yylloc.last_column); t->last_line, t->last_column);
vfprintf(stderr, s, ap); vfprintf(stderr, s, ap);
fprintf(stderr, "\n"); fprintf(stderr, "\n");
@ -997,25 +1007,37 @@ lyyerror(YYLTYPE t, char *s, ...)
fprintf(stderr, "\n"); fprintf(stderr, "\n");
} }
int
main(int ac, char **av) main(int ac, char **av)
{ {
extern FILE *yyin; FILE *in_f;
struct psql_state pstate;
if(ac > 1 && !strcmp(av[1], "-d")) { if(ac > 1 && !strcmp(av[1], "-d")) {
yydebug = 1; ac--; av++; yydebug = 1; ac--; av++;
} }
memset(&pstate, 0, sizeof(pstate));
if (yylex_init_extra(&pstate, &pstate.scaninfo))
return 1;
if(ac > 1) { if(ac > 1) {
if((yyin = fopen(av[1], "r")) == NULL) { if((in_f = fopen(av[1], "r")) == NULL) {
perror(av[1]); perror(av[1]);
exit(1); exit(1);
} }
filename = av[1]; filename = av[1];
} else } else {
filename = "(stdin)"; filename = "(stdin)";
in_f = stdin;
}
if(!yyparse()) yyset_in(in_f, &pstate.scaninfo);
if(!yyparse(&pstate))
printf("SQL parse worked\n"); printf("SQL parse worked\n");
else else
printf("SQL parse failed\n"); printf("SQL parse failed\n");
return 0;
} /* main */ } /* main */