Commit 91e71929 authored by Tom Lane's avatar Tom Lane

Convert the core lexer and parser into fully reentrant code, by making use

of features added to flex and bison since this code was originally written.
This change doesn't in itself offer any new capability, but it's needed
infrastructure for planned improvements in plpgsql.

Another feature now available in flex is the ability to make it use palloc
instead of malloc, so do that to avoid possible memory leaks.  (We should
at some point change the other lexers likewise, but this commit doesn't
touch them.)
parent da4b9001
# $PostgreSQL: pgsql/src/backend/nls.mk,v 1.27 2009/06/26 19:33:43 petere Exp $
# $PostgreSQL: pgsql/src/backend/nls.mk,v 1.28 2009/07/13 02:02:19 tgl Exp $
CATALOG_NAME := postgres
AVAIL_LANGUAGES := de es fr ja pt_BR tr
GETTEXT_FILES := + gettext-files
GETTEXT_TRIGGERS:= _ errmsg errmsg_plural:1,2 errdetail errdetail_log errdetail_plural:1,2 errhint errcontext write_stderr yyerror
GETTEXT_TRIGGERS:= _ errmsg errmsg_plural:1,2 errdetail errdetail_log errdetail_plural:1,2 errhint errcontext write_stderr yyerror parser_yyerror
gettext-files: distprep
find $(srcdir)/ $(srcdir)/../port/ -name '*.c' -print >$@
......
This diff is collapsed.
......@@ -14,7 +14,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/parser/parser.c,v 1.79 2009/07/12 17:12:34 tgl Exp $
* $PostgreSQL: pgsql/src/backend/parser/parser.c,v 1.80 2009/07/13 02:02:20 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -25,14 +25,6 @@
#include "parser/parser.h"
List *parsetree; /* result of parsing is left here */
static bool have_lookahead; /* is lookahead info valid? */
static int lookahead_token; /* one-token lookahead */
static YYSTYPE lookahead_yylval; /* yylval for lookahead token */
static YYLTYPE lookahead_yylloc; /* yylloc for lookahead token */
/*
* raw_parser
* Given a query in string form, do lexical and grammatical analysis.
......@@ -42,22 +34,29 @@ static YYLTYPE lookahead_yylloc; /* yylloc for lookahead token */
List *
raw_parser(const char *str)
{
base_yyscan_t yyscanner;
base_yy_extra_type yyextra;
int yyresult;
parsetree = NIL; /* in case grammar forgets to set it */
have_lookahead = false;
/* initialize the flex scanner */
yyscanner = scanner_init(str, &yyextra);
/* filtered_base_yylex() only needs this much initialization */
yyextra.have_lookahead = false;
scanner_init(str);
parser_init();
/* initialize the bison parser */
parser_init(&yyextra);
yyresult = base_yyparse();
/* Parse! */
yyresult = base_yyparse(yyscanner);
scanner_finish();
/* Clean up (release memory) */
scanner_finish(yyscanner);
if (yyresult) /* error */
return NIL;
return parsetree;
return yyextra.parsetree;
}
......@@ -69,25 +68,27 @@ raw_parser(const char *str)
* passed string does represent one single string literal.
*
* We export this function to avoid having plpgsql depend on internal details
* of the core grammar (such as the token code assigned to SCONST). Note
* that since the scanner isn't presently re-entrant, this cannot be used
* during use of the main parser/scanner.
* of the core grammar (such as the token code assigned to SCONST).
*/
char *
pg_parse_string_token(const char *token)
{
base_yyscan_t yyscanner;
base_yy_extra_type yyextra;
int ctoken;
YYSTYPE yylval;
YYLTYPE yylloc;
scanner_init(token);
yyscanner = scanner_init(token, &yyextra);
ctoken = base_yylex();
ctoken = base_yylex(&yylval, &yylloc, yyscanner);
if (ctoken != SCONST) /* caller error */
elog(ERROR, "expected string constant, got token code %d", ctoken);
scanner_finish();
scanner_finish(yyscanner);
return base_yylval.str;
return yylval.str;
}
......@@ -105,23 +106,24 @@ pg_parse_string_token(const char *token)
* layer does.
*/
int
filtered_base_yylex(void)
filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, base_yyscan_t yyscanner)
{
base_yy_extra_type *yyextra = pg_yyget_extra(yyscanner);
int cur_token;
int next_token;
YYSTYPE cur_yylval;
YYLTYPE cur_yylloc;
/* Get next token --- we might already have it */
if (have_lookahead)
if (yyextra->have_lookahead)
{
cur_token = lookahead_token;
base_yylval = lookahead_yylval;
base_yylloc = lookahead_yylloc;
have_lookahead = false;
cur_token = yyextra->lookahead_token;
*lvalp = yyextra->lookahead_yylval;
*llocp = yyextra->lookahead_yylloc;
yyextra->have_lookahead = false;
}
else
cur_token = base_yylex();
cur_token = base_yylex(lvalp, llocp, yyscanner);
/* Do we need to look ahead for a possible multiword token? */
switch (cur_token)
......@@ -131,9 +133,9 @@ filtered_base_yylex(void)
/*
* NULLS FIRST and NULLS LAST must be reduced to one token
*/
cur_yylval = base_yylval;
cur_yylloc = base_yylloc;
next_token = base_yylex();
cur_yylval = *lvalp;
cur_yylloc = *llocp;
next_token = base_yylex(lvalp, llocp, yyscanner);
switch (next_token)
{
case FIRST_P:
......@@ -144,13 +146,13 @@ filtered_base_yylex(void)
break;
default:
/* save the lookahead token for next time */
lookahead_token = next_token;
lookahead_yylval = base_yylval;
lookahead_yylloc = base_yylloc;
have_lookahead = true;
yyextra->lookahead_token = next_token;
yyextra->lookahead_yylval = *lvalp;
yyextra->lookahead_yylloc = *llocp;
yyextra->have_lookahead = true;
/* and back up the output info to cur_token */
base_yylval = cur_yylval;
base_yylloc = cur_yylloc;
*lvalp = cur_yylval;
*llocp = cur_yylloc;
break;
}
break;
......@@ -160,9 +162,9 @@ filtered_base_yylex(void)
/*
* WITH TIME must be reduced to one token
*/
cur_yylval = base_yylval;
cur_yylloc = base_yylloc;
next_token = base_yylex();
cur_yylval = *lvalp;
cur_yylloc = *llocp;
next_token = base_yylex(lvalp, llocp, yyscanner);
switch (next_token)
{
case TIME:
......@@ -170,13 +172,13 @@ filtered_base_yylex(void)
break;
default:
/* save the lookahead token for next time */
lookahead_token = next_token;
lookahead_yylval = base_yylval;
lookahead_yylloc = base_yylloc;
have_lookahead = true;
yyextra->lookahead_token = next_token;
yyextra->lookahead_yylval = *lvalp;
yyextra->lookahead_yylloc = *llocp;
yyextra->have_lookahead = true;
/* and back up the output info to cur_token */
base_yylval = cur_yylval;
base_yylloc = cur_yylloc;
*lvalp = cur_yylval;
*llocp = cur_yylloc;
break;
}
break;
......
This diff is collapsed.
......@@ -11,7 +11,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/parser/gramparse.h,v 1.45 2009/07/12 17:12:34 tgl Exp $
* $PostgreSQL: pgsql/src/include/parser/gramparse.h,v 1.46 2009/07/13 02:02:20 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -36,19 +36,85 @@
*/
#include "parser/gram.h"
/*
* The YY_EXTRA data that a flex scanner allows us to pass around. Private
* state needed for raw parsing/lexing goes here.
*/
typedef struct base_yy_extra_type
{
/*
* The string the lexer is physically scanning. We keep this mainly so
* that we can cheaply compute the offset of the current token (yytext).
*/
char *scanbuf;
Size scanbuflen;
/*
* literalbuf is used to accumulate literal values when multiple rules
* are needed to parse a single literal. Call startlit() to reset buffer
* to empty, addlit() to add text. NOTE: the string in literalbuf is
* NOT necessarily null-terminated, but there always IS room to add a
* trailing null at offset literallen. We store a null only when we
* need it.
*/
char *literalbuf; /* palloc'd expandable buffer */
int literallen; /* actual current string length */
int literalalloc; /* current allocated buffer size */
int xcdepth; /* depth of nesting in slash-star comments */
char *dolqstart; /* current $foo$ quote start string */
/* state variables for literal-lexing warnings */
bool warn_on_first_escape;
bool saw_non_ascii;
/*
* State variables for filtered_base_yylex().
*/
bool have_lookahead; /* is lookahead info valid? */
int lookahead_token; /* one-token lookahead */
YYSTYPE lookahead_yylval; /* yylval for lookahead token */
YYLTYPE lookahead_yylloc; /* yylloc for lookahead token */
/*
* State variables that belong to the grammar, not the lexer. It's
* simpler to keep these here than to invent a separate structure.
* These fields are unused/undefined if the lexer is invoked on its own.
*/
List *parsetree; /* final parse result is delivered here */
bool QueryIsRule; /* signals we are parsing CREATE RULE */
} base_yy_extra_type;
/*
* The type of yyscanner is opaque outside scan.l.
*/
typedef void *base_yyscan_t;
/*
* In principle we should use yyget_extra() to fetch the yyextra field
* from a yyscanner struct. However, flex always puts that field first,
* and this is sufficiently performance-critical to make it seem worth
* cheating a bit to use an inline macro.
*/
#define pg_yyget_extra(yyscanner) (*((base_yy_extra_type **) (yyscanner)))
/* from parser.c */
extern int filtered_base_yylex(void);
extern int filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp,
base_yyscan_t yyscanner);
/* from scan.l */
extern void scanner_init(const char *str);
extern void scanner_finish(void);
extern int base_yylex(void);
extern int scanner_errposition(int location);
extern void base_yyerror(const char *message);
extern base_yyscan_t scanner_init(const char *str, base_yy_extra_type *yyext);
extern void scanner_finish(base_yyscan_t yyscanner);
extern int base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp,
base_yyscan_t yyscanner);
extern int scanner_errposition(int location, base_yyscan_t yyscanner);
extern void scanner_yyerror(const char *message, base_yyscan_t yyscanner);
/* from gram.y */
extern void parser_init(void);
extern int base_yyparse(void);
extern void parser_init(base_yy_extra_type *yyext);
extern int base_yyparse(base_yyscan_t yyscanner);
#endif /* GRAMPARSE_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment