Commit 21c8ee79 authored by Tom Lane's avatar Tom Lane

Sync backend/parser/scan.l with bin/psql/psqlscan.l.

Make some minor formatting adjustments to make it easier to diff these
files and see that they indeed implement the same flex rules (at least
to the extent that we want them to be the same).

(Someday it'd be nice to make ecpg's pgc.l more easily diff'able too,
but today is not that day.)

Also run relevant parts of these files and psqlscanslash.l through
pgindent.

No actual behavioral changes here, just obsessive neatnik-ism.
parent 72b1e3a2
...@@ -360,8 +360,8 @@ operator {op_chars}+ ...@@ -360,8 +360,8 @@ operator {op_chars}+
* instead we pass it separately to parser. there it gets * instead we pass it separately to parser. there it gets
* coerced via doNegate() -- Leon aug 20 1999 * coerced via doNegate() -- Leon aug 20 1999
* *
* {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10. * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
* *
* {realfail1} and {realfail2} are added to prevent the need for scanner * {realfail1} and {realfail2} are added to prevent the need for scanner
* backup when the {real} rule fails to match completely. * backup when the {real} rule fails to match completely.
*/ */
...@@ -558,10 +558,12 @@ other . ...@@ -558,10 +558,12 @@ other .
/* xusend state looks for possible UESCAPE */ /* xusend state looks for possible UESCAPE */
BEGIN(xusend); BEGIN(xusend);
} }
<xusend>{whitespace} { /* stay in xusend state over whitespace */ } <xusend>{whitespace} {
/* stay in xusend state over whitespace */
}
<xusend><<EOF>> |
<xusend>{other} | <xusend>{other} |
<xusend>{xustop1} | <xusend>{xustop1} {
<xusend><<EOF>> {
/* no UESCAPE after the quote, throw back everything */ /* no UESCAPE after the quote, throw back everything */
yyless(0); yyless(0);
BEGIN(INITIAL); BEGIN(INITIAL);
...@@ -571,13 +573,14 @@ other . ...@@ -571,13 +573,14 @@ other .
<xusend>{xustop2} { <xusend>{xustop2} {
/* found UESCAPE after the end quote */ /* found UESCAPE after the end quote */
BEGIN(INITIAL); BEGIN(INITIAL);
if (!check_uescapechar(yytext[yyleng-2])) if (!check_uescapechar(yytext[yyleng - 2]))
{ {
SET_YYLLOC(); SET_YYLLOC();
ADVANCE_YYLLOC(yyleng-2); ADVANCE_YYLLOC(yyleng - 2);
yyerror("invalid Unicode escape character"); yyerror("invalid Unicode escape character");
} }
yylval->str = litbuf_udeescape(yytext[yyleng-2], yyscanner); yylval->str = litbuf_udeescape(yytext[yyleng - 2],
yyscanner);
return SCONST; return SCONST;
} }
<xq,xe,xus>{xqdouble} { <xq,xe,xus>{xqdouble} {
...@@ -590,7 +593,7 @@ other . ...@@ -590,7 +593,7 @@ other .
addlit(yytext, yyleng, yyscanner); addlit(yytext, yyleng, yyscanner);
} }
<xe>{xeunicode} { <xe>{xeunicode} {
pg_wchar c = strtoul(yytext+2, NULL, 16); pg_wchar c = strtoul(yytext + 2, NULL, 16);
check_escape_warning(yyscanner); check_escape_warning(yyscanner);
...@@ -605,7 +608,7 @@ other . ...@@ -605,7 +608,7 @@ other .
addunicode(c, yyscanner); addunicode(c, yyscanner);
} }
<xeu>{xeunicode} { <xeu>{xeunicode} {
pg_wchar c = strtoul(yytext+2, NULL, 16); pg_wchar c = strtoul(yytext + 2, NULL, 16);
if (!is_utf16_surrogate_second(c)) if (!is_utf16_surrogate_second(c))
yyerror("invalid Unicode surrogate pair"); yyerror("invalid Unicode surrogate pair");
...@@ -643,7 +646,7 @@ other . ...@@ -643,7 +646,7 @@ other .
yyscanner); yyscanner);
} }
<xe>{xeoctesc} { <xe>{xeoctesc} {
unsigned char c = strtoul(yytext+1, NULL, 8); unsigned char c = strtoul(yytext + 1, NULL, 8);
check_escape_warning(yyscanner); check_escape_warning(yyscanner);
addlitchar(c, yyscanner); addlitchar(c, yyscanner);
...@@ -651,7 +654,7 @@ other . ...@@ -651,7 +654,7 @@ other .
yyextra->saw_non_ascii = true; yyextra->saw_non_ascii = true;
} }
<xe>{xehexesc} { <xe>{xehexesc} {
unsigned char c = strtoul(yytext+2, NULL, 16); unsigned char c = strtoul(yytext + 2, NULL, 16);
check_escape_warning(yyscanner); check_escape_warning(yyscanner);
addlitchar(c, yyscanner); addlitchar(c, yyscanner);
...@@ -696,8 +699,8 @@ other . ...@@ -696,8 +699,8 @@ other .
* the $... part to the output, but put back the final * the $... part to the output, but put back the final
* $ for rescanning. Consider $delim$...$junk$delim$ * $ for rescanning. Consider $delim$...$junk$delim$
*/ */
addlit(yytext, yyleng-1, yyscanner); addlit(yytext, yyleng - 1, yyscanner);
yyless(yyleng-1); yyless(yyleng - 1);
} }
} }
<xdolq>{dolqinside} { <xdolq>{dolqinside} {
...@@ -739,10 +742,12 @@ other . ...@@ -739,10 +742,12 @@ other .
/* xuiend state looks for possible UESCAPE */ /* xuiend state looks for possible UESCAPE */
BEGIN(xuiend); BEGIN(xuiend);
} }
<xuiend>{whitespace} { /* stay in xuiend state over whitespace */ } <xuiend>{whitespace} {
/* stay in xuiend state over whitespace */
}
<xuiend><<EOF>> |
<xuiend>{other} | <xuiend>{other} |
<xuiend>{xustop1} | <xuiend>{xustop1} {
<xuiend><<EOF>> {
/* no UESCAPE after the quote, throw back everything */ /* no UESCAPE after the quote, throw back everything */
char *ident; char *ident;
int identlen; int identlen;
...@@ -767,10 +772,10 @@ other . ...@@ -767,10 +772,10 @@ other .
BEGIN(INITIAL); BEGIN(INITIAL);
if (yyextra->literallen == 0) if (yyextra->literallen == 0)
yyerror("zero-length delimited identifier"); yyerror("zero-length delimited identifier");
if (!check_uescapechar(yytext[yyleng-2])) if (!check_uescapechar(yytext[yyleng - 2]))
{ {
SET_YYLLOC(); SET_YYLLOC();
ADVANCE_YYLLOC(yyleng-2); ADVANCE_YYLLOC(yyleng - 2);
yyerror("invalid Unicode escape character"); yyerror("invalid Unicode escape character");
} }
ident = litbuf_udeescape(yytext[yyleng - 2], yyscanner); ident = litbuf_udeescape(yytext[yyleng - 2], yyscanner);
...@@ -878,12 +883,12 @@ other . ...@@ -878,12 +883,12 @@ other .
* sequences of SQL operators. * sequences of SQL operators.
*/ */
while (nchars > 1 && while (nchars > 1 &&
(yytext[nchars-1] == '+' || (yytext[nchars - 1] == '+' ||
yytext[nchars-1] == '-')) yytext[nchars - 1] == '-'))
{ {
int ic; int ic;
for (ic = nchars-2; ic >= 0; ic--) for (ic = nchars - 2; ic >= 0; ic--)
{ {
if (strchr("~!@#^&|`?%", yytext[ic])) if (strchr("~!@#^&|`?%", yytext[ic]))
break; break;
...@@ -940,7 +945,7 @@ other . ...@@ -940,7 +945,7 @@ other .
} }
{decimalfail} { {decimalfail} {
/* throw back the .., and treat as integer */ /* throw back the .., and treat as integer */
yyless(yyleng-2); yyless(yyleng - 2);
SET_YYLLOC(); SET_YYLLOC();
return process_integer_literal(yytext, yylval); return process_integer_literal(yytext, yylval);
} }
...@@ -956,14 +961,14 @@ other . ...@@ -956,14 +961,14 @@ other .
* but since this case will almost certainly lead to a * but since this case will almost certainly lead to a
* syntax error anyway, we don't bother to distinguish. * syntax error anyway, we don't bother to distinguish.
*/ */
yyless(yyleng-1); yyless(yyleng - 1);
SET_YYLLOC(); SET_YYLLOC();
yylval->str = pstrdup(yytext); yylval->str = pstrdup(yytext);
return FCONST; return FCONST;
} }
{realfail2} { {realfail2} {
/* throw back the [Ee][+-], and proceed as above */ /* throw back the [Ee][+-], and proceed as above */
yyless(yyleng-2); yyless(yyleng - 2);
SET_YYLLOC(); SET_YYLLOC();
yylval->str = pstrdup(yytext); yylval->str = pstrdup(yytext);
return FCONST; return FCONST;
...@@ -1133,11 +1138,11 @@ void ...@@ -1133,11 +1138,11 @@ void
scanner_finish(core_yyscan_t yyscanner) scanner_finish(core_yyscan_t yyscanner)
{ {
/* /*
* We don't bother to call yylex_destroy(), because all it would do * We don't bother to call yylex_destroy(), because all it would do is
* is pfree a small amount of control storage. It's cheaper to leak * pfree a small amount of control storage. It's cheaper to leak the
* the storage until the parsing context is destroyed. The amount of * storage until the parsing context is destroyed. The amount of space
* space involved is usually negligible compared to the output parse * involved is usually negligible compared to the output parse tree
* tree anyway. * anyway.
* *
* We do bother to pfree the scanbuf and literal buffer, but only if they * We do bother to pfree the scanbuf and literal buffer, but only if they
* represent a nontrivial amount of space. The 8K cutoff is arbitrary. * represent a nontrivial amount of space. The 8K cutoff is arbitrary.
...@@ -1155,7 +1160,8 @@ addlit(char *ytext, int yleng, core_yyscan_t yyscanner) ...@@ -1155,7 +1160,8 @@ addlit(char *ytext, int yleng, core_yyscan_t yyscanner)
/* enlarge buffer if needed */ /* enlarge buffer if needed */
if ((yyextra->literallen + yleng) >= yyextra->literalalloc) if ((yyextra->literallen + yleng) >= yyextra->literalalloc)
{ {
do { do
{
yyextra->literalalloc *= 2; yyextra->literalalloc *= 2;
} while ((yyextra->literallen + yleng) >= yyextra->literalalloc); } while ((yyextra->literallen + yleng) >= yyextra->literalalloc);
yyextra->literalbuf = (char *) repalloc(yyextra->literalbuf, yyextra->literalbuf = (char *) repalloc(yyextra->literalbuf,
...@@ -1303,7 +1309,9 @@ static char * ...@@ -1303,7 +1309,9 @@ static char *
litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner) litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner)
{ {
char *new; char *new;
char *litbuf, *in, *out; char *litbuf,
*in,
*out;
pg_wchar pair_first = 0; pg_wchar pair_first = 0;
/* Make literalbuf null-terminated to simplify the scanning loop */ /* Make literalbuf null-terminated to simplify the scanning loop */
...@@ -1311,8 +1319,8 @@ litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner) ...@@ -1311,8 +1319,8 @@ litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner)
litbuf[yyextra->literallen] = '\0'; litbuf[yyextra->literallen] = '\0';
/* /*
* This relies on the subtle assumption that a UTF-8 expansion * This relies on the subtle assumption that a UTF-8 expansion cannot be
* cannot be longer than its escaped representation. * longer than its escaped representation.
*/ */
new = palloc(yyextra->literallen + 1); new = palloc(yyextra->literallen + 1);
...@@ -1429,10 +1437,11 @@ litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner) ...@@ -1429,10 +1437,11 @@ litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner)
} }
*out = '\0'; *out = '\0';
/* /*
* We could skip pg_verifymbstr if we didn't process any non-7-bit-ASCII * We could skip pg_verifymbstr if we didn't process any non-7-bit-ASCII
* codes; but it's probably not worth the trouble, since this isn't * codes; but it's probably not worth the trouble, since this isn't likely
* likely to be a performance-critical path. * to be a performance-critical path.
*/ */
pg_verifymbstr(new, out - new, false); pg_verifymbstr(new, out - new, false);
return new; return new;
......
...@@ -78,8 +78,8 @@ extern void psql_yyset_column(int column_no, yyscan_t yyscanner); ...@@ -78,8 +78,8 @@ extern void psql_yyset_column(int column_no, yyscan_t yyscanner);
* src/backend/parser/scan.l so far as the flex patterns are concerned. * src/backend/parser/scan.l so far as the flex patterns are concerned.
* The rule bodies are just ECHO as opposed to what the backend does, * The rule bodies are just ECHO as opposed to what the backend does,
* however. (But be sure to duplicate code that affects the lexing process, * however. (But be sure to duplicate code that affects the lexing process,
* such as BEGIN().) Also, psqlscan uses a single <<EOF>> rule whereas * such as BEGIN() and yyless().) Also, psqlscan uses a single <<EOF>> rule
* scan.l has a separate one for each exclusive state. * whereas scan.l has a separate one for each exclusive state.
*/ */
/* /*
...@@ -351,11 +351,10 @@ other . ...@@ -351,11 +351,10 @@ other .
/* /*
* Force flex into the state indicated by start_state. This has a * Force flex into the state indicated by start_state. This has a
* couple of purposes: it lets some of the functions below set a * couple of purposes: it lets some of the functions below set a new
* new starting state without ugly direct access to flex variables, * starting state without ugly direct access to flex variables, and it
* and it allows us to transition from one flex lexer to another * allows us to transition from one flex lexer to another so that we
* so that we can lex different parts of the source string using * can lex different parts of the source string using separate lexers.
* separate lexers.
*/ */
BEGIN(cur_state->start_state); BEGIN(cur_state->start_state);
%} %}
...@@ -390,9 +389,7 @@ other . ...@@ -390,9 +389,7 @@ other .
<xc>{xcstop} { <xc>{xcstop} {
if (cur_state->xcdepth <= 0) if (cur_state->xcdepth <= 0)
{
BEGIN(INITIAL); BEGIN(INITIAL);
}
else else
cur_state->xcdepth--; cur_state->xcdepth--;
ECHO; ECHO;
...@@ -474,6 +471,7 @@ other . ...@@ -474,6 +471,7 @@ other .
} }
<xus>{quotestop} | <xus>{quotestop} |
<xus>{quotefail} { <xus>{quotefail} {
/* throw back all but the quote */
yyless(1); yyless(1);
BEGIN(xusend); BEGIN(xusend);
ECHO; ECHO;
...@@ -547,7 +545,7 @@ other . ...@@ -547,7 +545,7 @@ other .
* the $... part to the output, but put back the final * the $... part to the output, but put back the final
* $ for rescanning. Consider $delim$...$junk$delim$ * $ for rescanning. Consider $delim$...$junk$delim$
*/ */
yyless(yyleng-1); yyless(yyleng - 1);
} }
ECHO; ECHO;
} }
...@@ -717,8 +715,8 @@ other . ...@@ -717,8 +715,8 @@ other .
else else
{ {
/* /*
* if the variable doesn't exist we'll copy the * if the variable doesn't exist we'll copy the string
* string as is * as is
*/ */
ECHO; ECHO;
} }
...@@ -790,12 +788,12 @@ other . ...@@ -790,12 +788,12 @@ other .
* sequences of SQL operators. * sequences of SQL operators.
*/ */
while (nchars > 1 && while (nchars > 1 &&
(yytext[nchars-1] == '+' || (yytext[nchars - 1] == '+' ||
yytext[nchars-1] == '-')) yytext[nchars - 1] == '-'))
{ {
int ic; int ic;
for (ic = nchars-2; ic >= 0; ic--) for (ic = nchars - 2; ic >= 0; ic--)
{ {
if (strchr("~!@#^&|`?%", yytext[ic])) if (strchr("~!@#^&|`?%", yytext[ic]))
break; break;
...@@ -825,7 +823,7 @@ other . ...@@ -825,7 +823,7 @@ other .
} }
{decimalfail} { {decimalfail} {
/* throw back the .., and treat as integer */ /* throw back the .., and treat as integer */
yyless(yyleng-2); yyless(yyleng - 2);
ECHO; ECHO;
} }
{real} { {real} {
...@@ -838,12 +836,12 @@ other . ...@@ -838,12 +836,12 @@ other .
* but since this case will almost certainly lead to a * but since this case will almost certainly lead to a
* syntax error anyway, we don't bother to distinguish. * syntax error anyway, we don't bother to distinguish.
*/ */
yyless(yyleng-1); yyless(yyleng - 1);
ECHO; ECHO;
} }
{realfail2} { {realfail2} {
/* throw back the [Ee][+-], and proceed as above */ /* throw back the [Ee][+-], and proceed as above */
yyless(yyleng-2); yyless(yyleng - 2);
ECHO; ECHO;
} }
...@@ -856,10 +854,6 @@ other . ...@@ -856,10 +854,6 @@ other .
ECHO; ECHO;
} }
/*
* psql uses a single <<EOF>> rule, unlike the backend.
*/
<<EOF>> { <<EOF>> {
if (cur_state->buffer_stack == NULL) if (cur_state->buffer_stack == NULL)
{ {
...@@ -1192,8 +1186,8 @@ psqlscan_push_new_buffer(PsqlScanState state, const char *newstr, ...@@ -1192,8 +1186,8 @@ psqlscan_push_new_buffer(PsqlScanState state, const char *newstr,
stackelem = (StackElem *) pg_malloc(sizeof(StackElem)); stackelem = (StackElem *) pg_malloc(sizeof(StackElem));
/* /*
* In current usage, the passed varname points at the current flex * In current usage, the passed varname points at the current flex input
* input buffer; we must copy it before calling psqlscan_prepare_buffer() * buffer; we must copy it before calling psqlscan_prepare_buffer()
* because that will change the buffer state. * because that will change the buffer state.
*/ */
stackelem->varname = varname ? pg_strdup(varname) : NULL; stackelem->varname = varname ? pg_strdup(varname) : NULL;
......
...@@ -113,11 +113,10 @@ other . ...@@ -113,11 +113,10 @@ other .
/* /*
* Force flex into the state indicated by start_state. This has a * Force flex into the state indicated by start_state. This has a
* couple of purposes: it lets some of the functions below set a * couple of purposes: it lets some of the functions below set a new
* new starting state without ugly direct access to flex variables, * starting state without ugly direct access to flex variables, and it
* and it allows us to transition from one flex lexer to another * allows us to transition from one flex lexer to another so that we
* so that we can lex different parts of the source string using * can lex different parts of the source string using separate lexers.
* separate lexers.
*/ */
BEGIN(cur_state->start_state); BEGIN(cur_state->start_state);
%} %}
...@@ -396,10 +395,6 @@ other . ...@@ -396,10 +395,6 @@ other .
} }
/*
* psql uses a single <<EOF>> rule, unlike the backend.
*/
<<EOF>> { <<EOF>> {
if (cur_state->buffer_stack == NULL) if (cur_state->buffer_stack == NULL)
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment