Convert the core lexer and parser into fully reentrant code, by making use

of features added to flex and bison since this code was originally written. This change doesn't in itself offer any new capability, but it's needed infrastructure for planned improvements in plpgsql. Another feature now available in flex is the ability to make it use palloc instead of malloc, so do that to avoid possible memory leaks. (We should at some point change the other lexers likewise, but this commit doesn't touch them.)

Convert the core lexer and parser into fully reentrant code, by making use
of features added to flex and bison since this code was originally written. This change doesn't in itself offer any new capability, but it's needed infrastructure for planned improvements in plpgsql. Another feature now available in flex is the ability to make it use palloc instead of malloc, so do that to avoid possible memory leaks. (We should at some point change the other lexers likewise, but this commit doesn't touch them.)
91e71929 · Tom Lane · da4b9001 · 91e71929 · 91e71929 · 91e71929
Commit 91e71929 authored Jul 13, 2009 by Tom Lane
5 changed files
--- a/src/backend/nls.mk
+++ b/src/backend/nls.mk
-# $PostgreSQL: pgsql/src/backend/nls.mk,v 1.27 2009/06/26 19:33:43 petere Exp $
+# $PostgreSQL: pgsql/src/backend/nls.mk,v 1.28 2009/07/13 02:02:19 tgl Exp $
 CATALOG_NAME	:= postgres
 AVAIL_LANGUAGES	:= de es fr ja pt_BR tr
 GETTEXT_FILES	:= + gettext-files
-GETTEXT_TRIGGERS:= _ errmsg errmsg_plural:1,2 errdetail errdetail_log errdetail_plural:1,2 errhint errcontext write_stderr yyerror
+GETTEXT_TRIGGERS:= _ errmsg errmsg_plural:1,2 errdetail errdetail_log errdetail_plural:1,2 errhint errcontext write_stderr yyerror parser_yyerror

 gettext-files: distprep
 	find $(srcdir)/ $(srcdir)/../port/ -name '*.c' -print >$@

--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
--- a/src/backend/parser/parser.c
+++ b/src/backend/parser/parser.c
@@ -14,7 +14,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/parser/parser.c,v 1.79 2009/07/12 17:12:34 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/parser/parser.c,v 1.80 2009/07/13 02:02:20 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -25,14 +25,6 @@
 #include "parser/parser.h"


-List	   *parsetree;			/* result of parsing is left here */
-
-static bool have_lookahead;		/* is lookahead info valid? */
-static int	lookahead_token;	/* one-token lookahead */
-static YYSTYPE lookahead_yylval;	/* yylval for lookahead token */
-static YYLTYPE lookahead_yylloc;	/* yylloc for lookahead token */
-
-
 /*
 * raw_parser
 *		Given a query in string form, do lexical and grammatical analysis.
@@ -42,22 +34,29 @@ static YYLTYPE lookahead_yylloc;	/* yylloc for lookahead token */
 List *
 raw_parser(const char *str)
 {
+	base_yyscan_t yyscanner;
+	base_yy_extra_type yyextra;
 	int			yyresult;

-	parsetree = NIL;			/* in case grammar forgets to set it */
-	have_lookahead = false;
+	/* initialize the flex scanner */
+	yyscanner = scanner_init(str, &yyextra);
+
+	/* filtered_base_yylex() only needs this much initialization */
+	yyextra.have_lookahead = false;

-	scanner_init(str);
-	parser_init();
+	/* initialize the bison parser */
+	parser_init(&yyextra);

-	yyresult = base_yyparse();
+	/* Parse! */
+	yyresult = base_yyparse(yyscanner);

-	scanner_finish();
+	/* Clean up (release memory) */
+	scanner_finish(yyscanner);

 	if (yyresult)				/* error */
 		return NIL;

-	return parsetree;
+	return yyextra.parsetree;
 }


@@ -69,25 +68,27 @@ raw_parser(const char *str)
 * passed string does represent one single string literal.
 *
 * We export this function to avoid having plpgsql depend on internal details
- * of the core grammar (such as the token code assigned to SCONST).  Note
- * that since the scanner isn't presently re-entrant, this cannot be used
- * during use of the main parser/scanner.
+ * of the core grammar (such as the token code assigned to SCONST).
 */
 char *
 pg_parse_string_token(const char *token)
 {
+	base_yyscan_t yyscanner;
+	base_yy_extra_type yyextra;
 	int			ctoken;
+	YYSTYPE		yylval;
+	YYLTYPE		yylloc;

-	scanner_init(token);
+	yyscanner = scanner_init(token, &yyextra);

-	ctoken = base_yylex();
+	ctoken = base_yylex(&yylval, &yylloc, yyscanner);

 	if (ctoken != SCONST)		/* caller error */
 		elog(ERROR, "expected string constant, got token code %d", ctoken);

-	scanner_finish();
+	scanner_finish(yyscanner);

-	return base_yylval.str;
+	return yylval.str;
 }


@@ -105,23 +106,24 @@ pg_parse_string_token(const char *token)
 * layer does.
 */
 int
-filtered_base_yylex(void)
+filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, base_yyscan_t yyscanner)
 {
+	base_yy_extra_type *yyextra = pg_yyget_extra(yyscanner);
 	int			cur_token;
 	int			next_token;
 	YYSTYPE		cur_yylval;
 	YYLTYPE		cur_yylloc;

 	/* Get next token --- we might already have it */
-	if (have_lookahead)
+	if (yyextra->have_lookahead)
 	{
-		cur_token = lookahead_token;
-		base_yylval = lookahead_yylval;
-		base_yylloc = lookahead_yylloc;
-		have_lookahead = false;
+		cur_token = yyextra->lookahead_token;
+		*lvalp = yyextra->lookahead_yylval;
+		*llocp = yyextra->lookahead_yylloc;
+		yyextra->have_lookahead = false;
 	}
 	else
-		cur_token = base_yylex();
+		cur_token = base_yylex(lvalp, llocp, yyscanner);

 	/* Do we need to look ahead for a possible multiword token? */
 	switch (cur_token)
@@ -131,9 +133,9 @@ filtered_base_yylex(void)
 			/*
 			 * NULLS FIRST and NULLS LAST must be reduced to one token
 			 */
-			cur_yylval = base_yylval;
-			cur_yylloc = base_yylloc;
-			next_token = base_yylex();
+			cur_yylval = *lvalp;
+			cur_yylloc = *llocp;
+			next_token = base_yylex(lvalp, llocp, yyscanner);
 			switch (next_token)
 			{
 				case FIRST_P:
@@ -144,13 +146,13 @@ filtered_base_yylex(void)
 					break;
 				default:
 					/* save the lookahead token for next time */
-					lookahead_token = next_token;
-					lookahead_yylval = base_yylval;
-					lookahead_yylloc = base_yylloc;
-					have_lookahead = true;
+					yyextra->lookahead_token = next_token;
+					yyextra->lookahead_yylval = *lvalp;
+					yyextra->lookahead_yylloc = *llocp;
+					yyextra->have_lookahead = true;
 					/* and back up the output info to cur_token */
-					base_yylval = cur_yylval;
-					base_yylloc = cur_yylloc;
+					*lvalp = cur_yylval;
+					*llocp = cur_yylloc;
 					break;
 			}
 			break;
@@ -160,9 +162,9 @@ filtered_base_yylex(void)
 			/*
 			 * WITH TIME must be reduced to one token
 			 */
-			cur_yylval = base_yylval;
-			cur_yylloc = base_yylloc;
-			next_token = base_yylex();
+			cur_yylval = *lvalp;
+			cur_yylloc = *llocp;
+			next_token = base_yylex(lvalp, llocp, yyscanner);
 			switch (next_token)
 			{
 				case TIME:
@@ -170,13 +172,13 @@ filtered_base_yylex(void)
 					break;
 				default:
 					/* save the lookahead token for next time */
-					lookahead_token = next_token;
-					lookahead_yylval = base_yylval;
-					lookahead_yylloc = base_yylloc;
-					have_lookahead = true;
+					yyextra->lookahead_token = next_token;
+					yyextra->lookahead_yylval = *lvalp;
+					yyextra->lookahead_yylloc = *llocp;
+					yyextra->have_lookahead = true;
 					/* and back up the output info to cur_token */
-					base_yylval = cur_yylval;
-					base_yylloc = cur_yylloc;
+					*lvalp = cur_yylval;
+					*llocp = cur_yylloc;
 					break;
 			}
 			break;

--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
--- a/src/include/parser/gramparse.h
+++ b/src/include/parser/gramparse.h
@@ -11,7 +11,7 @@
 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/parser/gramparse.h,v 1.45 2009/07/12 17:12:34 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/parser/gramparse.h,v 1.46 2009/07/13 02:02:20 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -36,19 +36,85 @@
 */
 #include "parser/gram.h"

+/*
+ * The YY_EXTRA data that a flex scanner allows us to pass around.  Private
+ * state needed for raw parsing/lexing goes here.
+ */
+typedef struct base_yy_extra_type
+{
+	/*
+	 * The string the lexer is physically scanning.  We keep this mainly so
+	 * that we can cheaply compute the offset of the current token (yytext).
+	 */
+	char	   *scanbuf;
+	Size		scanbuflen;
+
+	/*
+	 * literalbuf is used to accumulate literal values when multiple rules
+	 * are needed to parse a single literal.  Call startlit() to reset buffer
+	 * to empty, addlit() to add text.  NOTE: the string in literalbuf is
+	 * NOT necessarily null-terminated, but there always IS room to add a
+	 * trailing null at offset literallen.  We store a null only when we
+	 * need it.
+	 */
+	char	   *literalbuf;		/* palloc'd expandable buffer */
+	int			literallen;		/* actual current string length */
+	int			literalalloc;	/* current allocated buffer size */
+
+	int			xcdepth;		/* depth of nesting in slash-star comments */
+	char	   *dolqstart;		/* current $foo$ quote start string */
+
+	/* state variables for literal-lexing warnings */
+	bool		warn_on_first_escape;
+	bool		saw_non_ascii;
+
+	/*
+	 * State variables for filtered_base_yylex().
+	 */
+	bool		have_lookahead;		/* is lookahead info valid? */
+	int			lookahead_token;	/* one-token lookahead */
+	YYSTYPE		lookahead_yylval;	/* yylval for lookahead token */
+	YYLTYPE		lookahead_yylloc;	/* yylloc for lookahead token */
+
+	/*
+	 * State variables that belong to the grammar, not the lexer.  It's
+	 * simpler to keep these here than to invent a separate structure.
+	 * These fields are unused/undefined if the lexer is invoked on its own.
+	 */
+
+	List	   *parsetree;		/* final parse result is delivered here */
+
+	bool		QueryIsRule;	/* signals we are parsing CREATE RULE */
+} base_yy_extra_type;
+
+/*
+ * The type of yyscanner is opaque outside scan.l.
+ */
+typedef void *base_yyscan_t;
+
+/*
+ * In principle we should use yyget_extra() to fetch the yyextra field
+ * from a yyscanner struct.  However, flex always puts that field first,
+ * and this is sufficiently performance-critical to make it seem worth
+ * cheating a bit to use an inline macro.
+ */
+#define pg_yyget_extra(yyscanner) (*((base_yy_extra_type **) (yyscanner)))
+

 /* from parser.c */
-extern int	filtered_base_yylex(void);
+extern int	filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp,
+								base_yyscan_t yyscanner);

 /* from scan.l */
-extern void scanner_init(const char *str);
-extern void scanner_finish(void);
-extern int	base_yylex(void);
-extern int	scanner_errposition(int location);
-extern void base_yyerror(const char *message);
+extern base_yyscan_t scanner_init(const char *str, base_yy_extra_type *yyext);
+extern void scanner_finish(base_yyscan_t yyscanner);
+extern int	base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp,
+					   base_yyscan_t yyscanner);
+extern int	scanner_errposition(int location, base_yyscan_t yyscanner);
+extern void scanner_yyerror(const char *message, base_yyscan_t yyscanner);

 /* from gram.y */
-extern void parser_init(void);
-extern int	base_yyparse(void);
+extern void parser_init(base_yy_extra_type *yyext);
+extern int	base_yyparse(base_yyscan_t yyscanner);

 #endif   /* GRAMPARSE_H */