Sync backend/parser/scan.l with bin/psql/psqlscan.l.

Make some minor formatting adjustments to make it easier to diff these files and see that they indeed implement the same flex rules (at least to the extent that we want them to be the same). (Someday it'd be nice to make ecpg's pgc.l more easily diff'able too, but today is not that day.) Also run relevant parts of these files and psqlscanslash.l through pgindent. No actual behavioral changes here, just obsessive neatnik-ism.

Sync backend/parser/scan.l with bin/psql/psqlscan.l.
Make some minor formatting adjustments to make it easier to diff these files and see that they indeed implement the same flex rules (at least to the extent that we want them to be the same). (Someday it'd be nice to make ecpg's pgc.l more easily diff'able too, but today is not that day.) Also run relevant parts of these files and psqlscanslash.l through pgindent. No actual behavioral changes here, just obsessive neatnik-ism.
21c8ee79 · Tom Lane · 72b1e3a2 · 21c8ee79 · 21c8ee79 · 21c8ee79
Commit 21c8ee79 authored Mar 19, 2016 by Tom Lane
Showing with 124 additions and 126 deletions

src/backend/parser/scan.l src/backend/parser/scan.l +84 -75

src/bin/psql/psqlscan.l src/bin/psql/psqlscan.l +33 -39

src/bin/psql/psqlscanslash.l src/bin/psql/psqlscanslash.l +7 -12

No files found.
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -360,8 +360,8 @@ operator		{op_chars}+
 * instead we pass it separately to parser. there it gets
 * coerced via doNegate() -- Leon aug 20 1999
 *
-* {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
+ * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
-*
+ *
 * {realfail1} and {realfail2} are added to prevent the need for scanner
 * backup when the {real} rule fails to match completely.
 */
@@ -558,10 +558,12 @@ other			.
 					/* xusend state looks for possible UESCAPE */
 					BEGIN(xusend);
 				}
-<xusend>{whitespace} { /* stay in xusend state over whitespace */ }
+<xusend>{whitespace} {
+					/* stay in xusend state over whitespace */
+				}
+<xusend><<EOF>> |
 <xusend>{other} |
-<xusend>{xustop1} |
+<xusend>{xustop1} {
-<xusend><<EOF>>	{
 					/* no UESCAPE after the quote, throw back everything */
 					yyless(0);
 					BEGIN(INITIAL);
@@ -571,13 +573,14 @@ other			.
 <xusend>{xustop2} {
 					/* found UESCAPE after the end quote */
 					BEGIN(INITIAL);
-					if (!check_uescapechar(yytext[yyleng-2]))
+					if (!check_uescapechar(yytext[yyleng - 2]))
 					{
 						SET_YYLLOC();
-						ADVANCE_YYLLOC(yyleng-2);
+						ADVANCE_YYLLOC(yyleng - 2);
 						yyerror("invalid Unicode escape character");
 					}
-					yylval->str = litbuf_udeescape(yytext[yyleng-2], yyscanner);
+					yylval->str = litbuf_udeescape(yytext[yyleng - 2],
+												   yyscanner);
 					return SCONST;
 				}
 <xq,xe,xus>{xqdouble} {
@@ -590,7 +593,7 @@ other			.
 					addlit(yytext, yyleng, yyscanner);
 				}
 <xe>{xeunicode} {
-					pg_wchar c = strtoul(yytext+2, NULL, 16);
+					pg_wchar	c = strtoul(yytext + 2, NULL, 16);
 					check_escape_warning(yyscanner);
@@ -605,7 +608,7 @@ other			.
 						addunicode(c, yyscanner);
 				}
 <xeu>{xeunicode} {
-					pg_wchar c = strtoul(yytext+2, NULL, 16);
+					pg_wchar	c = strtoul(yytext + 2, NULL, 16);
 					if (!is_utf16_surrogate_second(c))
 						yyerror("invalid Unicode surrogate pair");
@@ -643,7 +646,7 @@ other			.
 							   yyscanner);
 				}
 <xe>{xeoctesc}  {
-					unsigned char c = strtoul(yytext+1, NULL, 8);
+					unsigned char c = strtoul(yytext + 1, NULL, 8);
 					check_escape_warning(yyscanner);
 					addlitchar(c, yyscanner);
@@ -651,7 +654,7 @@ other			.
 						yyextra->saw_non_ascii = true;
 				}
 <xe>{xehexesc}  {
-					unsigned char c = strtoul(yytext+2, NULL, 16);
+					unsigned char c = strtoul(yytext + 2, NULL, 16);
 					check_escape_warning(yyscanner);
 					addlitchar(c, yyscanner);
@@ -696,8 +699,8 @@ other			.
 						 * the $... part to the output, but put back the final
 						 * $ for rescanning.  Consider $delim$...$junk$delim$
 						 */
-						addlit(yytext, yyleng-1, yyscanner);
+						addlit(yytext, yyleng - 1, yyscanner);
-						yyless(yyleng-1);
+						yyless(yyleng - 1);
 					}
 				}
 <xdolq>{dolqinside} {
@@ -739,10 +742,12 @@ other			.
 					/* xuiend state looks for possible UESCAPE */
 					BEGIN(xuiend);
 				}
-<xuiend>{whitespace} { /* stay in xuiend state over whitespace */ }
+<xuiend>{whitespace} {
+					/* stay in xuiend state over whitespace */
+				}
+<xuiend><<EOF>> |
 <xuiend>{other} |
-<xuiend>{xustop1} |
+<xuiend>{xustop1} {
-<xuiend><<EOF>>	{
 					/* no UESCAPE after the quote, throw back everything */
 					char	   *ident;
 					int			identlen;
@@ -767,10 +772,10 @@ other			.
 					BEGIN(INITIAL);
 					if (yyextra->literallen == 0)
 						yyerror("zero-length delimited identifier");
-					if (!check_uescapechar(yytext[yyleng-2]))
+					if (!check_uescapechar(yytext[yyleng - 2]))
 					{
 						SET_YYLLOC();
-						ADVANCE_YYLLOC(yyleng-2);
+						ADVANCE_YYLLOC(yyleng - 2);
 						yyerror("invalid Unicode escape character");
 					}
 					ident = litbuf_udeescape(yytext[yyleng - 2], yyscanner);
@@ -878,12 +883,12 @@ other			.
 					 * sequences of SQL operators.
 					 */
 					while (nchars > 1 &&
-						   (yytext[nchars-1] == '+' ||
+						   (yytext[nchars - 1] == '+' ||
-							yytext[nchars-1] == '-'))
+							yytext[nchars - 1] == '-'))
 					{
 						int			ic;
-						for (ic = nchars-2; ic >= 0; ic--)
+						for (ic = nchars - 2; ic >= 0; ic--)
 						{
 							if (strchr("~!@#^&|`?%", yytext[ic]))
 								break;
@@ -940,7 +945,7 @@ other			.
 				}
 {decimalfail}	{
 					/* throw back the .., and treat as integer */
-					yyless(yyleng-2);
+					yyless(yyleng - 2);
 					SET_YYLLOC();
 					return process_integer_literal(yytext, yylval);
 				}
@@ -956,14 +961,14 @@ other			.
 					 * but since this case will almost certainly lead to a
 					 * syntax error anyway, we don't bother to distinguish.
 					 */
-					yyless(yyleng-1);
+					yyless(yyleng - 1);
 					SET_YYLLOC();
 					yylval->str = pstrdup(yytext);
 					return FCONST;
 				}
 {realfail2}		{
 					/* throw back the [Ee][+-], and proceed as above */
-					yyless(yyleng-2);
+					yyless(yyleng - 2);
 					SET_YYLLOC();
 					yylval->str = pstrdup(yytext);
 					return FCONST;
@@ -1133,11 +1138,11 @@ void
 scanner_finish(core_yyscan_t yyscanner)
 {
 	/*
-	 * We don't bother to call yylex_destroy(), because all it would do
+	 * We don't bother to call yylex_destroy(), because all it would do is
-	 * is pfree a small amount of control storage.  It's cheaper to leak
+	 * pfree a small amount of control storage.  It's cheaper to leak the
-	 * the storage until the parsing context is destroyed.  The amount of
+	 * storage until the parsing context is destroyed.  The amount of space
-	 * space involved is usually negligible compared to the output parse
+	 * involved is usually negligible compared to the output parse tree
-	 * tree anyway.
+	 * anyway.
 	 *
 	 * We do bother to pfree the scanbuf and literal buffer, but only if they
 	 * represent a nontrivial amount of space.  The 8K cutoff is arbitrary.
@@ -1155,7 +1160,8 @@ addlit(char *ytext, int yleng, core_yyscan_t yyscanner)
 	/* enlarge buffer if needed */
 	if ((yyextra->literallen + yleng) >= yyextra->literalalloc)
 	{
-		do {
+		do
+		{
 			yyextra->literalalloc *= 2;
 		} while ((yyextra->literallen + yleng) >= yyextra->literalalloc);
 		yyextra->literalbuf = (char *) repalloc(yyextra->literalbuf,
@@ -1303,7 +1309,9 @@ static char *
 litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner)
 {
 	char	   *new;
-	char *litbuf, *in, *out;
+	char	   *litbuf,
+			   *in,
+			   *out;
 	pg_wchar	pair_first = 0;
 	/* Make literalbuf null-terminated to simplify the scanning loop */
@@ -1311,8 +1319,8 @@ litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner)
 	litbuf[yyextra->literallen] = '\0';
 	/*
-	 * This relies on the subtle assumption that a UTF-8 expansion
+	 * This relies on the subtle assumption that a UTF-8 expansion cannot be
-	 * cannot be longer than its escaped representation.
+	 * longer than its escaped representation.
 	 */
 	new = palloc(yyextra->literallen + 1);
@@ -1429,10 +1437,11 @@ litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner)
 	}
 	*out = '\0';
 	/*
 	 * We could skip pg_verifymbstr if we didn't process any non-7-bit-ASCII
-	 * codes; but it's probably not worth the trouble, since this isn't
+	 * codes; but it's probably not worth the trouble, since this isn't likely
-	 * likely to be a performance-critical path.
+	 * to be a performance-critical path.
 	 */
 	pg_verifymbstr(new, out - new, false);
 	return new;

--- a/src/bin/psql/psqlscan.l
+++ b/src/bin/psql/psqlscan.l
@@ -78,8 +78,8 @@ extern void psql_yyset_column(int column_no, yyscan_t yyscanner);
 * src/backend/parser/scan.l so far as the flex patterns are concerned.
 * The rule bodies are just ECHO as opposed to what the backend does,
 * however.  (But be sure to duplicate code that affects the lexing process,
- * such as BEGIN().)  Also, psqlscan uses a single <<EOF>> rule whereas
+ * such as BEGIN() and yyless().)  Also, psqlscan uses a single <<EOF>> rule
- * scan.l has a separate one for each exclusive state.
+ * whereas scan.l has a separate one for each exclusive state.
 */
 /*
@@ -351,11 +351,10 @@ other			.
 		/*
 		 * Force flex into the state indicated by start_state.  This has a
-		 * couple of purposes: it lets some of the functions below set a
+		 * couple of purposes: it lets some of the functions below set a new
-		 * new starting state without ugly direct access to flex variables,
+		 * starting state without ugly direct access to flex variables, and it
-		 * and it allows us to transition from one flex lexer to another
+		 * allows us to transition from one flex lexer to another so that we
-		 * so that we can lex different parts of the source string using
+		 * can lex different parts of the source string using separate lexers.
-		 * separate lexers.
 		 */
 		BEGIN(cur_state->start_state);
 %}
@@ -390,9 +389,7 @@ other			.
 <xc>{xcstop}	{
 					if (cur_state->xcdepth <= 0)
-					{
 						BEGIN(INITIAL);
-					}
 					else
 						cur_state->xcdepth--;
 					ECHO;
@@ -474,6 +471,7 @@ other			.
 				}
 <xus>{quotestop} |
 <xus>{quotefail} {
+					/* throw back all but the quote */
 					yyless(1);
 					BEGIN(xusend);
 					ECHO;
@@ -547,7 +545,7 @@ other			.
 						 * the $... part to the output, but put back the final
 						 * $ for rescanning.  Consider $delim$...$junk$delim$
 						 */
-						yyless(yyleng-1);
+						yyless(yyleng - 1);
 					}
 					ECHO;
 				}
@@ -717,8 +715,8 @@ other			.
 					else
 					{
 						/*
-						 * if the variable doesn't exist we'll copy the
+						 * if the variable doesn't exist we'll copy the string
-						 * string as is
+						 * as is
 						 */
 						ECHO;
 					}
@@ -790,12 +788,12 @@ other			.
 					 * sequences of SQL operators.
 					 */
 					while (nchars > 1 &&
-						   (yytext[nchars-1] == '+' ||
+						   (yytext[nchars - 1] == '+' ||
-							yytext[nchars-1] == '-'))
+							yytext[nchars - 1] == '-'))
 					{
 						int			ic;
-						for (ic = nchars-2; ic >= 0; ic--)
+						for (ic = nchars - 2; ic >= 0; ic--)
 						{
 							if (strchr("~!@#^&|`?%", yytext[ic]))
 								break;
@@ -825,7 +823,7 @@ other			.
 				}
 {decimalfail}	{
 					/* throw back the .., and treat as integer */
-					yyless(yyleng-2);
+					yyless(yyleng - 2);
 					ECHO;
 				}
 {real}			{
@@ -838,12 +836,12 @@ other			.
 					 * but since this case will almost certainly lead to a
 					 * syntax error anyway, we don't bother to distinguish.
 					 */
-					yyless(yyleng-1);
+					yyless(yyleng - 1);
 					ECHO;
 				}
 {realfail2}		{
 					/* throw back the [Ee][+-], and proceed as above */
-					yyless(yyleng-2);
+					yyless(yyleng - 2);
 					ECHO;
 				}
@@ -856,10 +854,6 @@ other			.
 					ECHO;
 				}
-	/*
-	 * psql uses a single <<EOF>> rule, unlike the backend.
-	 */
 <<EOF>>			{
 					if (cur_state->buffer_stack == NULL)
 					{
@@ -1192,8 +1186,8 @@ psqlscan_push_new_buffer(PsqlScanState state, const char *newstr,
 	stackelem = (StackElem *) pg_malloc(sizeof(StackElem));
 	/*
-	 * In current usage, the passed varname points at the current flex
+	 * In current usage, the passed varname points at the current flex input
-	 * input buffer; we must copy it before calling psqlscan_prepare_buffer()
+	 * buffer; we must copy it before calling psqlscan_prepare_buffer()
 	 * because that will change the buffer state.
 	 */
 	stackelem->varname = varname ? pg_strdup(varname) : NULL;

--- a/src/bin/psql/psqlscanslash.l
+++ b/src/bin/psql/psqlscanslash.l
@@ -113,11 +113,10 @@ other			.
 		/*
 		 * Force flex into the state indicated by start_state.  This has a
-		 * couple of purposes: it lets some of the functions below set a
+		 * couple of purposes: it lets some of the functions below set a new
-		 * new starting state without ugly direct access to flex variables,
+		 * starting state without ugly direct access to flex variables, and it
-		 * and it allows us to transition from one flex lexer to another
+		 * allows us to transition from one flex lexer to another so that we
-		 * so that we can lex different parts of the source string using
+		 * can lex different parts of the source string using separate lexers.
-		 * separate lexers.
 		 */
 		BEGIN(cur_state->start_state);
 %}
@@ -396,10 +395,6 @@ other			.
 }
-	/*
-	 * psql uses a single <<EOF>> rule, unlike the backend.
-	 */
 <<EOF>>			{
 					if (cur_state->buffer_stack == NULL)
 					{