Remove fixed-size literal buffer from ecpg's lexer (same

fix recently applied to backend's lexer). I see that YY_USES_REJECT still gets defined for this lexer, which means it's going to have trouble parsing really long tokens. Not sure if it's worth doing anything about that or not; I don't have the interest right now to understand why ecpg's additions to the syntax cause this problem...

Remove fixed-size literal buffer from ecpg's lexer (same
fix recently applied to backend's lexer). I see that YY_USES_REJECT still gets defined for this lexer, which means it's going to have trouble parsing really long tokens. Not sure if it's worth doing anything about that or not; I don't have the interest right now to understand why ecpg's additions to the syntax cause this problem...
45c00246 · Tom Lane · abceb20a · 45c00246 · 45c00246
Commit 45c00246 authored Oct 22, 1999 by Tom Lane
Show whitespace changes
Inline Side-by-side

Showing with 107 additions and 84 deletions

src/interfaces/ecpg/preproc/Makefile src/interfaces/ecpg/preproc/Makefile +11 -13

src/interfaces/ecpg/preproc/pgc.l src/interfaces/ecpg/preproc/pgc.l +96 -71

No files found.
--- a/src/interfaces/ecpg/preproc/Makefile
+++ b/src/interfaces/ecpg/preproc/Makefile
@@ -15,11 +15,19 @@ OBJ=preproc.o pgc.o type.o ecpg.o ecpg_keywords.o \
 all:: ecpg
+# Rule that really do something.
+ecpg: $(OBJ)
+	$(CC) -o ecpg $(OBJ) $(LEXLIB) $(LDFLAGS)
 preproc.c preproc.h: preproc.y
 	$(YACC) $(YFLAGS) $<
 	mv y.tab.c preproc.c
 	mv y.tab.h preproc.h
+pgc.c: pgc.l
+	$(LEX) $<
+	mv lex.yy.c pgc.c
 clean:
 	rm -f *.o core a.out ecpg$(X) *~ *.output
 # And the garbage that might have been left behind by partial build:
@@ -33,19 +41,9 @@ install: all
 uninstall:
 	rm -f $(BINDIR)/ecpg
-# Rule that really do something.
+preproc.o: preproc.h ../include/ecpgtype.h keywords.c c_keywords.c ecpg_keywords.c
-ecpg: $(OBJ)
+type.o: ../include/ecpgtype.h
-	$(CC) -o ecpg $(OBJ) $(LEXLIB) $(LDFLAGS)
+pgc.o: ../include/ecpgtype.h keywords.c c_keywords.c ecpg_keywords.c preproc.h
-pgc.c: pgc.l
-	$(LEX) $<
-	sed -e 's/#define YY_BUF_SIZE .*/#define YY_BUF_SIZE 65536/' \
-		<lex.yy.c >pgc.c
-	rm -f lex.yy.c
-preproc.o : preproc.h ../include/ecpgtype.h keywords.c c_keywords.c ecpg_keywords.c
-type.o : ../include/ecpgtype.h
-pgc.o : ../include/ecpgtype.h keywords.c c_keywords.c ecpg_keywords.c preproc.h
 keywords.o: ../include/ecpgtype.h preproc.h
 c_keywords.o: ../include/ecpgtype.h preproc.h 
 ecpg_keywords.o: ../include/ecpgtype.h preproc.h 

--- a/src/interfaces/ecpg/preproc/pgc.l
+++ b/src/interfaces/ecpg/preproc/pgc.l
-/* This is a modified version of src/backend/parser/scan.l */
 %{
+/*-------------------------------------------------------------------------
+ *
+ * pgc.l
+ *	  lexical scanner for ecpg
+ *
+ * This is a modified version of src/backend/parser/scan.l
+ *
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/pgc.l,v 1.45 1999/10/22 23:14:50 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
 #include <ctype.h>
 #include <sys/types.h>
 #include <limits.h>
 #include <errno.h>
 #include "postgres.h"
 #ifndef PATH_MAX
 #include <sys/param.h>
 #define PATH_MAX MAXPATHLEN
 #endif
 #include "miscadmin.h"
-#include "nodes/pg_list.h"
 #include "nodes/parsenodes.h"
+#include "nodes/pg_list.h"
 #include "parser/gramparse.h"
 #include "parser/scansup.h"
 #include "extern.h"
 #include "preproc.h"
 #include "utils/builtins.h"
-#ifdef  YY_READ_BUF_SIZE
-#undef  YY_READ_BUF_SIZE
-#endif
-#define YY_READ_BUF_SIZE	MAX_PARSE_BUFFER
 /* some versions of lex define this as a macro */
 #if defined(yywrap)
 #undef yywrap
 #endif /* yywrap */
 extern YYSTYPE yylval;
-int llen;
-char literal[MAX_PARSE_BUFFER];
+/*
+ * literalbuf is used to accumulate literal values when multiple rules
+ * are needed to parse a single literal.  Call startlit to reset buffer
+ * to empty, addlit to add text.  Note that the buffer is permanently
+ * malloc'd to the largest size needed so far in the current run.
+ */
+static char	   *literalbuf = NULL;		/* expandable buffer */
+static int		literallen;		/* actual current length */
+static int		literalalloc;	/* current allocated buffer size */
+#define startlit()  (literalbuf[0] = '\0', literallen = 0)
+static void addlit(char *ytext, int yleng);
 int before_comment;
 struct _yy_buffer { YY_BUFFER_STATE 	buffer;
@@ -142,16 +165,14 @@ self			[,()\[\].;$\:\+\-\*\/\%\^\<\>\=\|]
 op_and_self		[\~\!\@\#\^\&\|\`\?\$\:\+\-\*\/\%\<\>\=]
 operator		{op_and_self}+
-/* we do not allow unary minus in numbers.
+/* we no longer allow unary minus in numbers. 
- * instead we pass it verbatim to parser. there it gets
+ * instead we pass it separately to parser. there it gets
 * coerced via doNegate() -- Leon aug 20 1999 
 */
 integer			{digit}+
 decimal			(({digit}*\.{digit}+)|({digit}+\.{digit}*))
 real				((({digit}*\.{digit}+)|({digit}+\.{digit}*)|({digit}+))([Ee][-+]?{digit}+))
-/*
-real			(((({digit}*\.{digit}+)|({digit}+\.{digit}*))([Ee][-+]?{digit}+)?)|({digit}+[Ee][-+]?{digit}+))
-*/
 param			\${integer}
@@ -200,25 +221,21 @@ cppline		{space}*#.*(\\{space}*\n)*\n*
 <SQL>{xbstart}		{
 					BEGIN(xb);
-					llen = 0;
+					startlit();
-					*literal = '\0';
 				}
 <xb>{xbstop}	{
 					char* endptr;
 					BEGIN(SQL);
 					errno = 0;
-					yylval.ival = strtol((char *)literal,&endptr,2);
+					yylval.ival = strtol(literalbuf, &endptr, 2);
 					if (*endptr != '\0' || errno == ERANGE)
 						yyerror("ERROR: Bad binary integer input!");
 					return ICONST;
 				}
 <xh>{xhinside}	|
 <xb>{xbinside}	{
-					if ((llen+yyleng) > (MAX_PARSE_BUFFER - 1))
+					addlit(yytext, yyleng);
-						yyerror("ERROR: quoted string parse buffer exceeded");
-					memcpy(literal+llen, yytext, yyleng+1);
-					llen += yyleng;
 				}
 <xh>{xhcat}		|
 <xb>{xbcat}		{
@@ -226,15 +243,14 @@ cppline		{space}*#.*(\\{space}*\n)*\n*
 <SQL>{xhstart}		{
 					BEGIN(xh);
-					llen = 0;
+					startlit();
-					*literal = '\0';
 				}
 <xh>{xhstop}	{
 					char* endptr;
 					BEGIN(SQL);
 					errno = 0;
-					yylval.ival = strtol((char *)literal,&endptr,16);
+					yylval.ival = strtol(literalbuf, &endptr, 16);
 					if (*endptr != '\0' || errno == ERANGE)
 						yyerror("ERROR: Bad hexadecimal integer input");
 					return ICONST;
@@ -242,21 +258,17 @@ cppline		{space}*#.*(\\{space}*\n)*\n*
 <SQL>{xqstart}		{
 					BEGIN(xq);
-					llen = 0;
+					startlit();
-					*literal = '\0';
 				}
 <xq>{xqstop}	{
 					BEGIN(SQL);
-					yylval.str = mm_strdup(literal);
+					yylval.str = mm_strdup(literalbuf);
 					return SCONST;
 				}
 <xq>{xqdouble}	|
 <xq>{xqinside}	|
 <xq>{xqliteral} {
-					if ((llen+yyleng) > (MAX_PARSE_BUFFER - 1))
+					addlit(yytext, yyleng);
-						yyerror("ERROR: quoted string parse buffer exceeded");
-					memcpy(literal+llen, yytext, yyleng+1);
-					llen += yyleng;
 				}
 <xq>{xqcat}		{
 				}
@@ -264,35 +276,27 @@ cppline		{space}*#.*(\\{space}*\n)*\n*
 <SQL>{xdstart}		{
 					BEGIN(xd);
-					llen = 0;
+					startlit();
-					*literal = '\0';
 				}
 <xd>{xdstop}	{
 					BEGIN(SQL);
-					yylval.str = mm_strdup(literal);
+					yylval.str = mm_strdup(literalbuf);
 					return CSTRING;
 				}
 <xd>{xdinside}	{
-					if ((llen+yyleng) > (MAX_PARSE_BUFFER - 1))
+					addlit(yytext, yyleng);
-						yyerror("ERROR: quoted string parse buffer exceeded");
-					memcpy(literal+llen, yytext, yyleng+1);
-					llen += yyleng;
 				}
 {xdstart}		{
 					BEGIN(xdc);
-					llen = 0;
+					startlit();
-					*literal = '\0';
 				}
 <xdc>{xdstop}	{
 					BEGIN(C);
-					yylval.str = mm_strdup(literal);
+					yylval.str = mm_strdup(literalbuf);
 					return CSTRING;
 				}
 <xdc>{xdcinside}	{
-					if ((llen+yyleng) > (MAX_PARSE_BUFFER - 1))
+					addlit(yytext, yyleng);
-						yyerror("ERROR: quoted string parse buffer exceeded");
-					memcpy(literal+llen, yytext, yyleng+1);
-					llen += yyleng;
 				}
 <SQL>{typecast}			{ 	return TYPECAST; }
 <SQL>{self}			{ /* 
@@ -486,8 +490,7 @@ cppline		{space}*#.*(\\{space}*\n)*\n*
 <def_ident>{identifier}	{
 				old = mm_strdup(yytext);
 				BEGIN(def);
-				llen = 0;
+				startlit();
-				*literal = '\0';
 			}
 <def>{space}		/* eat the whitespace */
 <def>";"		{
@@ -498,8 +501,8 @@ cppline		{space}*#.*(\\{space}*\n)*\n*
                                     if (strcmp(old, ptr->old) == 0)
                                     {
 					free(ptr->new);
-					/* ptr->new = mm_strdup(scanstr(literal));*/
+					/* ptr->new = mm_strdup(scanstr(literalbuf));*/
-					ptr->new = mm_strdup(literal);
+					ptr->new = mm_strdup(literalbuf);
                                     }
                                }
 				if (ptr == NULL)
@@ -508,8 +511,8 @@ cppline		{space}*#.*(\\{space}*\n)*\n*
                                        /* initial definition */
                                        this->old = old;
-                                        /* this->new = mm_strdup(scanstr(literal));*/
+                                        /* this->new = mm_strdup(scanstr(literalbuf));*/
-                                        this->new = mm_strdup(literal);
+                                        this->new = mm_strdup(literalbuf);
 					this->next = defines;
 					defines = this;
 				}
@@ -517,10 +520,7 @@ cppline		{space}*#.*(\\{space}*\n)*\n*
 				BEGIN(C);
 			}
 <def>[^";"]		{
-				if ((llen+yyleng) > (MAX_PARSE_BUFFER - 1))
+				addlit(yytext, yyleng);
-					yyerror("ERROR: define statement parse buffer exceeded");
-				memcpy(literal+llen, yytext, yyleng+1);
-				llen += yyleng;
 			}
 <C>{exec}{space}{sql}{space}{include}	{ BEGIN(incl); }
 <incl>{space}		/* eat the whitespace */
@@ -602,9 +602,34 @@ void
 lex_init(void)
 {
    braces_open = 0;
+	/* initialize literal buffer to a reasonable but expansible size */
+	if (literalbuf == NULL)
+	{
+		literalalloc = 128;
+		literalbuf = (char *) malloc(literalalloc);
+	}
+	startlit();
    BEGIN C;
 }
+static void
+addlit(char *ytext, int yleng)
+{
+	/* enlarge buffer if needed */
+	if ((literallen+yleng) >= literalalloc)
+	{
+		do {
+			literalalloc *= 2;
+		} while ((literallen+yleng) >= literalalloc);
+		literalbuf = (char *) realloc(literalbuf, literalalloc);
+	}
+	/* append data --- note we assume ytext is null-terminated */
+	memcpy(literalbuf+literallen, ytext, yleng+1);
+	literallen += yleng;
+}
 int yywrap(void) 
 { 
    return 1;