Commit 21c8ee79 authored by Tom Lane's avatar Tom Lane

Sync backend/parser/scan.l with bin/psql/psqlscan.l.

Make some minor formatting adjustments to make it easier to diff these
files and see that they indeed implement the same flex rules (at least
to the extent that we want them to be the same).

(Someday it'd be nice to make ecpg's pgc.l more easily diff'able too,
but today is not that day.)

Also run relevant parts of these files and psqlscanslash.l through
pgindent.

No actual behavioral changes here, just obsessive neatnik-ism.
parent 72b1e3a2
......@@ -360,8 +360,8 @@ operator {op_chars}+
* instead we pass it separately to parser. there it gets
* coerced via doNegate() -- Leon aug 20 1999
*
* {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
*
* {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
*
* {realfail1} and {realfail2} are added to prevent the need for scanner
* backup when the {real} rule fails to match completely.
*/
......@@ -558,10 +558,12 @@ other .
/* xusend state looks for possible UESCAPE */
BEGIN(xusend);
}
<xusend>{whitespace} { /* stay in xusend state over whitespace */ }
<xusend>{whitespace} {
/* stay in xusend state over whitespace */
}
<xusend><<EOF>> |
<xusend>{other} |
<xusend>{xustop1} |
<xusend><<EOF>> {
<xusend>{xustop1} {
/* no UESCAPE after the quote, throw back everything */
yyless(0);
BEGIN(INITIAL);
......@@ -571,13 +573,14 @@ other .
<xusend>{xustop2} {
/* found UESCAPE after the end quote */
BEGIN(INITIAL);
if (!check_uescapechar(yytext[yyleng-2]))
if (!check_uescapechar(yytext[yyleng - 2]))
{
SET_YYLLOC();
ADVANCE_YYLLOC(yyleng-2);
ADVANCE_YYLLOC(yyleng - 2);
yyerror("invalid Unicode escape character");
}
yylval->str = litbuf_udeescape(yytext[yyleng-2], yyscanner);
yylval->str = litbuf_udeescape(yytext[yyleng - 2],
yyscanner);
return SCONST;
}
<xq,xe,xus>{xqdouble} {
......@@ -590,7 +593,7 @@ other .
addlit(yytext, yyleng, yyscanner);
}
<xe>{xeunicode} {
pg_wchar c = strtoul(yytext+2, NULL, 16);
pg_wchar c = strtoul(yytext + 2, NULL, 16);
check_escape_warning(yyscanner);
......@@ -605,7 +608,7 @@ other .
addunicode(c, yyscanner);
}
<xeu>{xeunicode} {
pg_wchar c = strtoul(yytext+2, NULL, 16);
pg_wchar c = strtoul(yytext + 2, NULL, 16);
if (!is_utf16_surrogate_second(c))
yyerror("invalid Unicode surrogate pair");
......@@ -643,7 +646,7 @@ other .
yyscanner);
}
<xe>{xeoctesc} {
unsigned char c = strtoul(yytext+1, NULL, 8);
unsigned char c = strtoul(yytext + 1, NULL, 8);
check_escape_warning(yyscanner);
addlitchar(c, yyscanner);
......@@ -651,7 +654,7 @@ other .
yyextra->saw_non_ascii = true;
}
<xe>{xehexesc} {
unsigned char c = strtoul(yytext+2, NULL, 16);
unsigned char c = strtoul(yytext + 2, NULL, 16);
check_escape_warning(yyscanner);
addlitchar(c, yyscanner);
......@@ -696,8 +699,8 @@ other .
* the $... part to the output, but put back the final
* $ for rescanning. Consider $delim$...$junk$delim$
*/
addlit(yytext, yyleng-1, yyscanner);
yyless(yyleng-1);
addlit(yytext, yyleng - 1, yyscanner);
yyless(yyleng - 1);
}
}
<xdolq>{dolqinside} {
......@@ -739,10 +742,12 @@ other .
/* xuiend state looks for possible UESCAPE */
BEGIN(xuiend);
}
<xuiend>{whitespace} { /* stay in xuiend state over whitespace */ }
<xuiend>{whitespace} {
/* stay in xuiend state over whitespace */
}
<xuiend><<EOF>> |
<xuiend>{other} |
<xuiend>{xustop1} |
<xuiend><<EOF>> {
<xuiend>{xustop1} {
/* no UESCAPE after the quote, throw back everything */
char *ident;
int identlen;
......@@ -767,10 +772,10 @@ other .
BEGIN(INITIAL);
if (yyextra->literallen == 0)
yyerror("zero-length delimited identifier");
if (!check_uescapechar(yytext[yyleng-2]))
if (!check_uescapechar(yytext[yyleng - 2]))
{
SET_YYLLOC();
ADVANCE_YYLLOC(yyleng-2);
ADVANCE_YYLLOC(yyleng - 2);
yyerror("invalid Unicode escape character");
}
ident = litbuf_udeescape(yytext[yyleng - 2], yyscanner);
......@@ -878,12 +883,12 @@ other .
* sequences of SQL operators.
*/
while (nchars > 1 &&
(yytext[nchars-1] == '+' ||
yytext[nchars-1] == '-'))
(yytext[nchars - 1] == '+' ||
yytext[nchars - 1] == '-'))
{
int ic;
for (ic = nchars-2; ic >= 0; ic--)
for (ic = nchars - 2; ic >= 0; ic--)
{
if (strchr("~!@#^&|`?%", yytext[ic]))
break;
......@@ -940,7 +945,7 @@ other .
}
{decimalfail} {
/* throw back the .., and treat as integer */
yyless(yyleng-2);
yyless(yyleng - 2);
SET_YYLLOC();
return process_integer_literal(yytext, yylval);
}
......@@ -956,14 +961,14 @@ other .
* but since this case will almost certainly lead to a
* syntax error anyway, we don't bother to distinguish.
*/
yyless(yyleng-1);
yyless(yyleng - 1);
SET_YYLLOC();
yylval->str = pstrdup(yytext);
return FCONST;
}
{realfail2} {
/* throw back the [Ee][+-], and proceed as above */
yyless(yyleng-2);
yyless(yyleng - 2);
SET_YYLLOC();
yylval->str = pstrdup(yytext);
return FCONST;
......@@ -1133,11 +1138,11 @@ void
scanner_finish(core_yyscan_t yyscanner)
{
/*
* We don't bother to call yylex_destroy(), because all it would do
* is pfree a small amount of control storage. It's cheaper to leak
* the storage until the parsing context is destroyed. The amount of
* space involved is usually negligible compared to the output parse
* tree anyway.
* We don't bother to call yylex_destroy(), because all it would do is
* pfree a small amount of control storage. It's cheaper to leak the
* storage until the parsing context is destroyed. The amount of space
* involved is usually negligible compared to the output parse tree
* anyway.
*
* We do bother to pfree the scanbuf and literal buffer, but only if they
* represent a nontrivial amount of space. The 8K cutoff is arbitrary.
......@@ -1155,7 +1160,8 @@ addlit(char *ytext, int yleng, core_yyscan_t yyscanner)
/* enlarge buffer if needed */
if ((yyextra->literallen + yleng) >= yyextra->literalalloc)
{
do {
do
{
yyextra->literalalloc *= 2;
} while ((yyextra->literallen + yleng) >= yyextra->literalalloc);
yyextra->literalbuf = (char *) repalloc(yyextra->literalbuf,
......@@ -1303,7 +1309,9 @@ static char *
litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner)
{
char *new;
char *litbuf, *in, *out;
char *litbuf,
*in,
*out;
pg_wchar pair_first = 0;
/* Make literalbuf null-terminated to simplify the scanning loop */
......@@ -1311,8 +1319,8 @@ litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner)
litbuf[yyextra->literallen] = '\0';
/*
* This relies on the subtle assumption that a UTF-8 expansion
* cannot be longer than its escaped representation.
* This relies on the subtle assumption that a UTF-8 expansion cannot be
* longer than its escaped representation.
*/
new = palloc(yyextra->literallen + 1);
......@@ -1429,10 +1437,11 @@ litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner)
}
*out = '\0';
/*
* We could skip pg_verifymbstr if we didn't process any non-7-bit-ASCII
* codes; but it's probably not worth the trouble, since this isn't
* likely to be a performance-critical path.
* codes; but it's probably not worth the trouble, since this isn't likely
* to be a performance-critical path.
*/
pg_verifymbstr(new, out - new, false);
return new;
......
......@@ -78,8 +78,8 @@ extern void psql_yyset_column(int column_no, yyscan_t yyscanner);
* src/backend/parser/scan.l so far as the flex patterns are concerned.
* The rule bodies are just ECHO as opposed to what the backend does,
* however. (But be sure to duplicate code that affects the lexing process,
* such as BEGIN().) Also, psqlscan uses a single <<EOF>> rule whereas
* scan.l has a separate one for each exclusive state.
* such as BEGIN() and yyless().) Also, psqlscan uses a single <<EOF>> rule
* whereas scan.l has a separate one for each exclusive state.
*/
/*
......@@ -351,11 +351,10 @@ other .
/*
* Force flex into the state indicated by start_state. This has a
* couple of purposes: it lets some of the functions below set a
* new starting state without ugly direct access to flex variables,
* and it allows us to transition from one flex lexer to another
* so that we can lex different parts of the source string using
* separate lexers.
* couple of purposes: it lets some of the functions below set a new
* starting state without ugly direct access to flex variables, and it
* allows us to transition from one flex lexer to another so that we
* can lex different parts of the source string using separate lexers.
*/
BEGIN(cur_state->start_state);
%}
......@@ -390,9 +389,7 @@ other .
<xc>{xcstop} {
if (cur_state->xcdepth <= 0)
{
BEGIN(INITIAL);
}
else
cur_state->xcdepth--;
ECHO;
......@@ -474,6 +471,7 @@ other .
}
<xus>{quotestop} |
<xus>{quotefail} {
/* throw back all but the quote */
yyless(1);
BEGIN(xusend);
ECHO;
......@@ -547,7 +545,7 @@ other .
* the $... part to the output, but put back the final
* $ for rescanning. Consider $delim$...$junk$delim$
*/
yyless(yyleng-1);
yyless(yyleng - 1);
}
ECHO;
}
......@@ -717,8 +715,8 @@ other .
else
{
/*
* if the variable doesn't exist we'll copy the
* string as is
* if the variable doesn't exist we'll copy the string
* as is
*/
ECHO;
}
......@@ -790,12 +788,12 @@ other .
* sequences of SQL operators.
*/
while (nchars > 1 &&
(yytext[nchars-1] == '+' ||
yytext[nchars-1] == '-'))
(yytext[nchars - 1] == '+' ||
yytext[nchars - 1] == '-'))
{
int ic;
for (ic = nchars-2; ic >= 0; ic--)
for (ic = nchars - 2; ic >= 0; ic--)
{
if (strchr("~!@#^&|`?%", yytext[ic]))
break;
......@@ -825,7 +823,7 @@ other .
}
{decimalfail} {
/* throw back the .., and treat as integer */
yyless(yyleng-2);
yyless(yyleng - 2);
ECHO;
}
{real} {
......@@ -838,12 +836,12 @@ other .
* but since this case will almost certainly lead to a
* syntax error anyway, we don't bother to distinguish.
*/
yyless(yyleng-1);
yyless(yyleng - 1);
ECHO;
}
{realfail2} {
/* throw back the [Ee][+-], and proceed as above */
yyless(yyleng-2);
yyless(yyleng - 2);
ECHO;
}
......@@ -856,10 +854,6 @@ other .
ECHO;
}
/*
* psql uses a single <<EOF>> rule, unlike the backend.
*/
<<EOF>> {
if (cur_state->buffer_stack == NULL)
{
......@@ -1192,8 +1186,8 @@ psqlscan_push_new_buffer(PsqlScanState state, const char *newstr,
stackelem = (StackElem *) pg_malloc(sizeof(StackElem));
/*
* In current usage, the passed varname points at the current flex
* input buffer; we must copy it before calling psqlscan_prepare_buffer()
* In current usage, the passed varname points at the current flex input
* buffer; we must copy it before calling psqlscan_prepare_buffer()
* because that will change the buffer state.
*/
stackelem->varname = varname ? pg_strdup(varname) : NULL;
......
......@@ -113,11 +113,10 @@ other .
/*
* Force flex into the state indicated by start_state. This has a
* couple of purposes: it lets some of the functions below set a
* new starting state without ugly direct access to flex variables,
* and it allows us to transition from one flex lexer to another
* so that we can lex different parts of the source string using
* separate lexers.
* couple of purposes: it lets some of the functions below set a new
* starting state without ugly direct access to flex variables, and it
* allows us to transition from one flex lexer to another so that we
* can lex different parts of the source string using separate lexers.
*/
BEGIN(cur_state->start_state);
%}
......@@ -396,10 +395,6 @@ other .
}
/*
* psql uses a single <<EOF>> rule, unlike the backend.
*/
<<EOF>> {
if (cur_state->buffer_stack == NULL)
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment