Commit 21c8ee79 authored by Tom Lane's avatar Tom Lane

Sync backend/parser/scan.l with bin/psql/psqlscan.l.

Make some minor formatting adjustments to make it easier to diff these
files and see that they indeed implement the same flex rules (at least
to the extent that we want them to be the same).

(Someday it'd be nice to make ecpg's pgc.l more easily diff'able too,
but today is not that day.)

Also run relevant parts of these files and psqlscanslash.l through
pgindent.

No actual behavioral changes here, just obsessive neatnik-ism.
parent 72b1e3a2
...@@ -35,7 +35,7 @@ ...@@ -35,7 +35,7 @@
#include <unistd.h> #include <unistd.h>
#include "parser/gramparse.h" #include "parser/gramparse.h"
#include "parser/parser.h" /* only needed for GUC variables */ #include "parser/parser.h" /* only needed for GUC variables */
#include "parser/scansup.h" #include "parser/scansup.h"
#include "mb/pg_wchar.h" #include "mb/pg_wchar.h"
} }
...@@ -58,9 +58,9 @@ fprintf_to_ereport(const char *fmt, const char *msg) ...@@ -58,9 +58,9 @@ fprintf_to_ereport(const char *fmt, const char *msg)
* But we shall have to live with it as a short-term thing until the switch * But we shall have to live with it as a short-term thing until the switch
* to SQL-standard string syntax is complete. * to SQL-standard string syntax is complete.
*/ */
int backslash_quote = BACKSLASH_QUOTE_SAFE_ENCODING; int backslash_quote = BACKSLASH_QUOTE_SAFE_ENCODING;
bool escape_string_warning = true; bool escape_string_warning = true;
bool standard_conforming_strings = true; bool standard_conforming_strings = true;
/* /*
* Set the type of YYSTYPE. * Set the type of YYSTYPE.
...@@ -87,7 +87,7 @@ bool standard_conforming_strings = true; ...@@ -87,7 +87,7 @@ bool standard_conforming_strings = true;
*/ */
#define ADVANCE_YYLLOC(delta) ( *(yylloc) += (delta) ) #define ADVANCE_YYLLOC(delta) ( *(yylloc) += (delta) )
#define startlit() ( yyextra->literallen = 0 ) #define startlit() ( yyextra->literallen = 0 )
static void addlit(char *ytext, int yleng, core_yyscan_t yyscanner); static void addlit(char *ytext, int yleng, core_yyscan_t yyscanner);
static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner); static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner);
static char *litbufdup(core_yyscan_t yyscanner); static char *litbufdup(core_yyscan_t yyscanner);
...@@ -360,8 +360,8 @@ operator {op_chars}+ ...@@ -360,8 +360,8 @@ operator {op_chars}+
* instead we pass it separately to parser. there it gets * instead we pass it separately to parser. there it gets
* coerced via doNegate() -- Leon aug 20 1999 * coerced via doNegate() -- Leon aug 20 1999
* *
* {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10. * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
* *
* {realfail1} and {realfail2} are added to prevent the need for scanner * {realfail1} and {realfail2} are added to prevent the need for scanner
* backup when the {real} rule fails to match completely. * backup when the {real} rule fails to match completely.
*/ */
...@@ -490,7 +490,7 @@ other . ...@@ -490,7 +490,7 @@ other .
const ScanKeyword *keyword; const ScanKeyword *keyword;
SET_YYLLOC(); SET_YYLLOC();
yyless(1); /* eat only 'n' this time */ yyless(1); /* eat only 'n' this time */
keyword = ScanKeywordLookup("nchar", keyword = ScanKeywordLookup("nchar",
yyextra->keywords, yyextra->keywords,
...@@ -558,10 +558,12 @@ other . ...@@ -558,10 +558,12 @@ other .
/* xusend state looks for possible UESCAPE */ /* xusend state looks for possible UESCAPE */
BEGIN(xusend); BEGIN(xusend);
} }
<xusend>{whitespace} { /* stay in xusend state over whitespace */ } <xusend>{whitespace} {
/* stay in xusend state over whitespace */
}
<xusend><<EOF>> |
<xusend>{other} | <xusend>{other} |
<xusend>{xustop1} | <xusend>{xustop1} {
<xusend><<EOF>> {
/* no UESCAPE after the quote, throw back everything */ /* no UESCAPE after the quote, throw back everything */
yyless(0); yyless(0);
BEGIN(INITIAL); BEGIN(INITIAL);
...@@ -571,13 +573,14 @@ other . ...@@ -571,13 +573,14 @@ other .
<xusend>{xustop2} { <xusend>{xustop2} {
/* found UESCAPE after the end quote */ /* found UESCAPE after the end quote */
BEGIN(INITIAL); BEGIN(INITIAL);
if (!check_uescapechar(yytext[yyleng-2])) if (!check_uescapechar(yytext[yyleng - 2]))
{ {
SET_YYLLOC(); SET_YYLLOC();
ADVANCE_YYLLOC(yyleng-2); ADVANCE_YYLLOC(yyleng - 2);
yyerror("invalid Unicode escape character"); yyerror("invalid Unicode escape character");
} }
yylval->str = litbuf_udeescape(yytext[yyleng-2], yyscanner); yylval->str = litbuf_udeescape(yytext[yyleng - 2],
yyscanner);
return SCONST; return SCONST;
} }
<xq,xe,xus>{xqdouble} { <xq,xe,xus>{xqdouble} {
...@@ -590,7 +593,7 @@ other . ...@@ -590,7 +593,7 @@ other .
addlit(yytext, yyleng, yyscanner); addlit(yytext, yyleng, yyscanner);
} }
<xe>{xeunicode} { <xe>{xeunicode} {
pg_wchar c = strtoul(yytext+2, NULL, 16); pg_wchar c = strtoul(yytext + 2, NULL, 16);
check_escape_warning(yyscanner); check_escape_warning(yyscanner);
...@@ -605,7 +608,7 @@ other . ...@@ -605,7 +608,7 @@ other .
addunicode(c, yyscanner); addunicode(c, yyscanner);
} }
<xeu>{xeunicode} { <xeu>{xeunicode} {
pg_wchar c = strtoul(yytext+2, NULL, 16); pg_wchar c = strtoul(yytext + 2, NULL, 16);
if (!is_utf16_surrogate_second(c)) if (!is_utf16_surrogate_second(c))
yyerror("invalid Unicode surrogate pair"); yyerror("invalid Unicode surrogate pair");
...@@ -620,11 +623,11 @@ other . ...@@ -620,11 +623,11 @@ other .
<xeu>\n { yyerror("invalid Unicode surrogate pair"); } <xeu>\n { yyerror("invalid Unicode surrogate pair"); }
<xeu><<EOF>> { yyerror("invalid Unicode surrogate pair"); } <xeu><<EOF>> { yyerror("invalid Unicode surrogate pair"); }
<xe,xeu>{xeunicodefail} { <xe,xeu>{xeunicodefail} {
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE), (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
errmsg("invalid Unicode escape"), errmsg("invalid Unicode escape"),
errhint("Unicode escapes must be \\uXXXX or \\UXXXXXXXX."), errhint("Unicode escapes must be \\uXXXX or \\UXXXXXXXX."),
lexer_errposition())); lexer_errposition()));
} }
<xe>{xeescape} { <xe>{xeescape} {
if (yytext[1] == '\'') if (yytext[1] == '\'')
...@@ -643,7 +646,7 @@ other . ...@@ -643,7 +646,7 @@ other .
yyscanner); yyscanner);
} }
<xe>{xeoctesc} { <xe>{xeoctesc} {
unsigned char c = strtoul(yytext+1, NULL, 8); unsigned char c = strtoul(yytext + 1, NULL, 8);
check_escape_warning(yyscanner); check_escape_warning(yyscanner);
addlitchar(c, yyscanner); addlitchar(c, yyscanner);
...@@ -651,7 +654,7 @@ other . ...@@ -651,7 +654,7 @@ other .
yyextra->saw_non_ascii = true; yyextra->saw_non_ascii = true;
} }
<xe>{xehexesc} { <xe>{xehexesc} {
unsigned char c = strtoul(yytext+2, NULL, 16); unsigned char c = strtoul(yytext + 2, NULL, 16);
check_escape_warning(yyscanner); check_escape_warning(yyscanner);
addlitchar(c, yyscanner); addlitchar(c, yyscanner);
...@@ -696,8 +699,8 @@ other . ...@@ -696,8 +699,8 @@ other .
* the $... part to the output, but put back the final * the $... part to the output, but put back the final
* $ for rescanning. Consider $delim$...$junk$delim$ * $ for rescanning. Consider $delim$...$junk$delim$
*/ */
addlit(yytext, yyleng-1, yyscanner); addlit(yytext, yyleng - 1, yyscanner);
yyless(yyleng-1); yyless(yyleng - 1);
} }
} }
<xdolq>{dolqinside} { <xdolq>{dolqinside} {
...@@ -723,7 +726,7 @@ other . ...@@ -723,7 +726,7 @@ other .
startlit(); startlit();
} }
<xd>{xdstop} { <xd>{xdstop} {
char *ident; char *ident;
BEGIN(INITIAL); BEGIN(INITIAL);
if (yyextra->literallen == 0) if (yyextra->literallen == 0)
...@@ -739,10 +742,12 @@ other . ...@@ -739,10 +742,12 @@ other .
/* xuiend state looks for possible UESCAPE */ /* xuiend state looks for possible UESCAPE */
BEGIN(xuiend); BEGIN(xuiend);
} }
<xuiend>{whitespace} { /* stay in xuiend state over whitespace */ } <xuiend>{whitespace} {
/* stay in xuiend state over whitespace */
}
<xuiend><<EOF>> |
<xuiend>{other} | <xuiend>{other} |
<xuiend>{xustop1} | <xuiend>{xustop1} {
<xuiend><<EOF>> {
/* no UESCAPE after the quote, throw back everything */ /* no UESCAPE after the quote, throw back everything */
char *ident; char *ident;
int identlen; int identlen;
...@@ -767,10 +772,10 @@ other . ...@@ -767,10 +772,10 @@ other .
BEGIN(INITIAL); BEGIN(INITIAL);
if (yyextra->literallen == 0) if (yyextra->literallen == 0)
yyerror("zero-length delimited identifier"); yyerror("zero-length delimited identifier");
if (!check_uescapechar(yytext[yyleng-2])) if (!check_uescapechar(yytext[yyleng - 2]))
{ {
SET_YYLLOC(); SET_YYLLOC();
ADVANCE_YYLLOC(yyleng-2); ADVANCE_YYLLOC(yyleng - 2);
yyerror("invalid Unicode escape character"); yyerror("invalid Unicode escape character");
} }
ident = litbuf_udeescape(yytext[yyleng - 2], yyscanner); ident = litbuf_udeescape(yytext[yyleng - 2], yyscanner);
...@@ -789,7 +794,7 @@ other . ...@@ -789,7 +794,7 @@ other .
<xd,xui><<EOF>> { yyerror("unterminated quoted identifier"); } <xd,xui><<EOF>> { yyerror("unterminated quoted identifier"); }
{xufailed} { {xufailed} {
char *ident; char *ident;
SET_YYLLOC(); SET_YYLLOC();
/* throw back all but the initial u/U */ /* throw back all but the initial u/U */
...@@ -815,7 +820,7 @@ other . ...@@ -815,7 +820,7 @@ other .
return COLON_EQUALS; return COLON_EQUALS;
} }
{equals_greater} { {equals_greater} {
SET_YYLLOC(); SET_YYLLOC();
return EQUALS_GREATER; return EQUALS_GREATER;
} }
...@@ -854,9 +859,9 @@ other . ...@@ -854,9 +859,9 @@ other .
* Note that slash-star or dash-dash at the first * Note that slash-star or dash-dash at the first
* character will match a prior rule, not this one. * character will match a prior rule, not this one.
*/ */
int nchars = yyleng; int nchars = yyleng;
char *slashstar = strstr(yytext, "/*"); char *slashstar = strstr(yytext, "/*");
char *dashdash = strstr(yytext, "--"); char *dashdash = strstr(yytext, "--");
if (slashstar && dashdash) if (slashstar && dashdash)
{ {
...@@ -878,12 +883,12 @@ other . ...@@ -878,12 +883,12 @@ other .
* sequences of SQL operators. * sequences of SQL operators.
*/ */
while (nchars > 1 && while (nchars > 1 &&
(yytext[nchars-1] == '+' || (yytext[nchars - 1] == '+' ||
yytext[nchars-1] == '-')) yytext[nchars - 1] == '-'))
{ {
int ic; int ic;
for (ic = nchars-2; ic >= 0; ic--) for (ic = nchars - 2; ic >= 0; ic--)
{ {
if (strchr("~!@#^&|`?%", yytext[ic])) if (strchr("~!@#^&|`?%", yytext[ic]))
break; break;
...@@ -940,7 +945,7 @@ other . ...@@ -940,7 +945,7 @@ other .
} }
{decimalfail} { {decimalfail} {
/* throw back the .., and treat as integer */ /* throw back the .., and treat as integer */
yyless(yyleng-2); yyless(yyleng - 2);
SET_YYLLOC(); SET_YYLLOC();
return process_integer_literal(yytext, yylval); return process_integer_literal(yytext, yylval);
} }
...@@ -956,14 +961,14 @@ other . ...@@ -956,14 +961,14 @@ other .
* but since this case will almost certainly lead to a * but since this case will almost certainly lead to a
* syntax error anyway, we don't bother to distinguish. * syntax error anyway, we don't bother to distinguish.
*/ */
yyless(yyleng-1); yyless(yyleng - 1);
SET_YYLLOC(); SET_YYLLOC();
yylval->str = pstrdup(yytext); yylval->str = pstrdup(yytext);
return FCONST; return FCONST;
} }
{realfail2} { {realfail2} {
/* throw back the [Ee][+-], and proceed as above */ /* throw back the [Ee][+-], and proceed as above */
yyless(yyleng-2); yyless(yyleng - 2);
SET_YYLLOC(); SET_YYLLOC();
yylval->str = pstrdup(yytext); yylval->str = pstrdup(yytext);
return FCONST; return FCONST;
...@@ -972,7 +977,7 @@ other . ...@@ -972,7 +977,7 @@ other .
{identifier} { {identifier} {
const ScanKeyword *keyword; const ScanKeyword *keyword;
char *ident; char *ident;
SET_YYLLOC(); SET_YYLLOC();
...@@ -1018,9 +1023,9 @@ other . ...@@ -1018,9 +1023,9 @@ other .
/* Likewise for a couple of other things we need. */ /* Likewise for a couple of other things we need. */
#undef yylloc #undef yylloc
#define yylloc (((struct yyguts_t *) yyscanner)->yylloc_r) #define yylloc (((struct yyguts_t *) yyscanner)->yylloc_r)
#undef yyleng #undef yyleng
#define yyleng (((struct yyguts_t *) yyscanner)->yyleng_r) #define yyleng (((struct yyguts_t *) yyscanner)->yyleng_r)
/* /*
...@@ -1037,7 +1042,7 @@ other . ...@@ -1037,7 +1042,7 @@ other .
int int
scanner_errposition(int location, core_yyscan_t yyscanner) scanner_errposition(int location, core_yyscan_t yyscanner)
{ {
int pos; int pos;
if (location < 0) if (location < 0)
return 0; /* no-op if location is unknown */ return 0; /* no-op if location is unknown */
...@@ -1069,7 +1074,7 @@ scanner_yyerror(const char *message, core_yyscan_t yyscanner) ...@@ -1069,7 +1074,7 @@ scanner_yyerror(const char *message, core_yyscan_t yyscanner)
{ {
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
/* translator: %s is typically the translation of "syntax error" */ /* translator: %s is typically the translation of "syntax error" */
errmsg("%s at end of input", _(message)), errmsg("%s at end of input", _(message)),
lexer_errposition())); lexer_errposition()));
} }
...@@ -1077,7 +1082,7 @@ scanner_yyerror(const char *message, core_yyscan_t yyscanner) ...@@ -1077,7 +1082,7 @@ scanner_yyerror(const char *message, core_yyscan_t yyscanner)
{ {
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
/* translator: first %s is typically the translation of "syntax error" */ /* translator: first %s is typically the translation of "syntax error" */
errmsg("%s at or near \"%s\"", _(message), loc), errmsg("%s at or near \"%s\"", _(message), loc),
lexer_errposition())); lexer_errposition()));
} }
...@@ -1133,11 +1138,11 @@ void ...@@ -1133,11 +1138,11 @@ void
scanner_finish(core_yyscan_t yyscanner) scanner_finish(core_yyscan_t yyscanner)
{ {
/* /*
* We don't bother to call yylex_destroy(), because all it would do * We don't bother to call yylex_destroy(), because all it would do is
* is pfree a small amount of control storage. It's cheaper to leak * pfree a small amount of control storage. It's cheaper to leak the
* the storage until the parsing context is destroyed. The amount of * storage until the parsing context is destroyed. The amount of space
* space involved is usually negligible compared to the output parse * involved is usually negligible compared to the output parse tree
* tree anyway. * anyway.
* *
* We do bother to pfree the scanbuf and literal buffer, but only if they * We do bother to pfree the scanbuf and literal buffer, but only if they
* represent a nontrivial amount of space. The 8K cutoff is arbitrary. * represent a nontrivial amount of space. The 8K cutoff is arbitrary.
...@@ -1155,7 +1160,8 @@ addlit(char *ytext, int yleng, core_yyscan_t yyscanner) ...@@ -1155,7 +1160,8 @@ addlit(char *ytext, int yleng, core_yyscan_t yyscanner)
/* enlarge buffer if needed */ /* enlarge buffer if needed */
if ((yyextra->literallen + yleng) >= yyextra->literalalloc) if ((yyextra->literallen + yleng) >= yyextra->literalalloc)
{ {
do { do
{
yyextra->literalalloc *= 2; yyextra->literalalloc *= 2;
} while ((yyextra->literallen + yleng) >= yyextra->literalalloc); } while ((yyextra->literallen + yleng) >= yyextra->literalalloc);
yyextra->literalbuf = (char *) repalloc(yyextra->literalbuf, yyextra->literalbuf = (char *) repalloc(yyextra->literalbuf,
...@@ -1208,7 +1214,7 @@ process_integer_literal(const char *token, YYSTYPE *lval) ...@@ -1208,7 +1214,7 @@ process_integer_literal(const char *token, YYSTYPE *lval)
val = strtol(token, &endptr, 10); val = strtol(token, &endptr, 10);
if (*endptr != '\0' || errno == ERANGE if (*endptr != '\0' || errno == ERANGE
#ifdef HAVE_LONG_INT_64 #ifdef HAVE_LONG_INT_64
/* if long > 32 bits, check for overflow of int4 */ /* if long > 32 bits, check for overflow of int4 */
|| val != (long) ((int32) val) || val != (long) ((int32) val)
#endif #endif
) )
...@@ -1231,7 +1237,7 @@ hexval(unsigned char c) ...@@ -1231,7 +1237,7 @@ hexval(unsigned char c)
if (c >= 'A' && c <= 'F') if (c >= 'A' && c <= 'F')
return c - 'A' + 0xA; return c - 'A' + 0xA;
elog(ERROR, "invalid hexadecimal digit"); elog(ERROR, "invalid hexadecimal digit");
return 0; /* not reached */ return 0; /* not reached */
} }
static void static void
...@@ -1242,7 +1248,7 @@ check_unicode_value(pg_wchar c, char *loc, core_yyscan_t yyscanner) ...@@ -1242,7 +1248,7 @@ check_unicode_value(pg_wchar c, char *loc, core_yyscan_t yyscanner)
if (c > 0x7F) if (c > 0x7F)
{ {
ADVANCE_YYLLOC(loc - yyextra->literalbuf + 3); /* 3 for U&" */ ADVANCE_YYLLOC(loc - yyextra->literalbuf + 3); /* 3 for U&" */
yyerror("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8"); yyerror("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8");
} }
} }
...@@ -1268,7 +1274,7 @@ surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second) ...@@ -1268,7 +1274,7 @@ surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second)
static void static void
addunicode(pg_wchar c, core_yyscan_t yyscanner) addunicode(pg_wchar c, core_yyscan_t yyscanner)
{ {
char buf[8]; char buf[8];
if (c == 0 || c > 0x10FFFF) if (c == 0 || c > 0x10FFFF)
yyerror("invalid Unicode escape value"); yyerror("invalid Unicode escape value");
...@@ -1302,17 +1308,19 @@ check_uescapechar(unsigned char escape) ...@@ -1302,17 +1308,19 @@ check_uescapechar(unsigned char escape)
static char * static char *
litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner) litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner)
{ {
char *new; char *new;
char *litbuf, *in, *out; char *litbuf,
pg_wchar pair_first = 0; *in,
*out;
pg_wchar pair_first = 0;
/* Make literalbuf null-terminated to simplify the scanning loop */ /* Make literalbuf null-terminated to simplify the scanning loop */
litbuf = yyextra->literalbuf; litbuf = yyextra->literalbuf;
litbuf[yyextra->literallen] = '\0'; litbuf[yyextra->literallen] = '\0';
/* /*
* This relies on the subtle assumption that a UTF-8 expansion * This relies on the subtle assumption that a UTF-8 expansion cannot be
* cannot be longer than its escaped representation. * longer than its escaped representation.
*/ */
new = palloc(yyextra->literallen + 1); new = palloc(yyextra->literallen + 1);
...@@ -1326,7 +1334,7 @@ litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner) ...@@ -1326,7 +1334,7 @@ litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner)
{ {
if (pair_first) if (pair_first)
{ {
ADVANCE_YYLLOC(in - litbuf + 3); /* 3 for U&" */ ADVANCE_YYLLOC(in - litbuf + 3); /* 3 for U&" */
yyerror("invalid Unicode surrogate pair"); yyerror("invalid Unicode surrogate pair");
} }
*out++ = escape; *out++ = escape;
...@@ -1337,7 +1345,7 @@ litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner) ...@@ -1337,7 +1345,7 @@ litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner)
isxdigit((unsigned char) in[3]) && isxdigit((unsigned char) in[3]) &&
isxdigit((unsigned char) in[4])) isxdigit((unsigned char) in[4]))
{ {
pg_wchar unicode; pg_wchar unicode;
unicode = (hexval(in[1]) << 12) + unicode = (hexval(in[1]) << 12) +
(hexval(in[2]) << 8) + (hexval(in[2]) << 8) +
...@@ -1353,7 +1361,7 @@ litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner) ...@@ -1353,7 +1361,7 @@ litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner)
} }
else else
{ {
ADVANCE_YYLLOC(in - litbuf + 3); /* 3 for U&" */ ADVANCE_YYLLOC(in - litbuf + 3); /* 3 for U&" */
yyerror("invalid Unicode surrogate pair"); yyerror("invalid Unicode surrogate pair");
} }
} }
...@@ -1377,7 +1385,7 @@ litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner) ...@@ -1377,7 +1385,7 @@ litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner)
isxdigit((unsigned char) in[6]) && isxdigit((unsigned char) in[6]) &&
isxdigit((unsigned char) in[7])) isxdigit((unsigned char) in[7]))
{ {
pg_wchar unicode; pg_wchar unicode;
unicode = (hexval(in[2]) << 20) + unicode = (hexval(in[2]) << 20) +
(hexval(in[3]) << 16) + (hexval(in[3]) << 16) +
...@@ -1395,7 +1403,7 @@ litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner) ...@@ -1395,7 +1403,7 @@ litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner)
} }
else else
{ {
ADVANCE_YYLLOC(in - litbuf + 3); /* 3 for U&" */ ADVANCE_YYLLOC(in - litbuf + 3); /* 3 for U&" */
yyerror("invalid Unicode surrogate pair"); yyerror("invalid Unicode surrogate pair");
} }
} }
...@@ -1413,7 +1421,7 @@ litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner) ...@@ -1413,7 +1421,7 @@ litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner)
} }
else else
{ {
ADVANCE_YYLLOC(in - litbuf + 3); /* 3 for U&" */ ADVANCE_YYLLOC(in - litbuf + 3); /* 3 for U&" */
yyerror("invalid Unicode escape value"); yyerror("invalid Unicode escape value");
} }
} }
...@@ -1421,7 +1429,7 @@ litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner) ...@@ -1421,7 +1429,7 @@ litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner)
{ {
if (pair_first) if (pair_first)
{ {
ADVANCE_YYLLOC(in - litbuf + 3); /* 3 for U&" */ ADVANCE_YYLLOC(in - litbuf + 3); /* 3 for U&" */
yyerror("invalid Unicode surrogate pair"); yyerror("invalid Unicode surrogate pair");
} }
*out++ = *in++; *out++ = *in++;
...@@ -1429,10 +1437,11 @@ litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner) ...@@ -1429,10 +1437,11 @@ litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner)
} }
*out = '\0'; *out = '\0';
/* /*
* We could skip pg_verifymbstr if we didn't process any non-7-bit-ASCII * We could skip pg_verifymbstr if we didn't process any non-7-bit-ASCII
* codes; but it's probably not worth the trouble, since this isn't * codes; but it's probably not worth the trouble, since this isn't likely
* likely to be a performance-critical path. * to be a performance-critical path.
*/ */
pg_verifymbstr(new, out - new, false); pg_verifymbstr(new, out - new, false);
return new; return new;
...@@ -1496,9 +1505,9 @@ check_escape_warning(core_yyscan_t yyscanner) ...@@ -1496,9 +1505,9 @@ check_escape_warning(core_yyscan_t yyscanner)
ereport(WARNING, ereport(WARNING,
(errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER), (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER),
errmsg("nonstandard use of escape in a string literal"), errmsg("nonstandard use of escape in a string literal"),
errhint("Use the escape string syntax for escapes, e.g., E'\\r\\n'."), errhint("Use the escape string syntax for escapes, e.g., E'\\r\\n'."),
lexer_errposition())); lexer_errposition()));
yyextra->warn_on_first_escape = false; /* warn only once per string */ yyextra->warn_on_first_escape = false; /* warn only once per string */
} }
/* /*
......
...@@ -78,8 +78,8 @@ extern void psql_yyset_column(int column_no, yyscan_t yyscanner); ...@@ -78,8 +78,8 @@ extern void psql_yyset_column(int column_no, yyscan_t yyscanner);
* src/backend/parser/scan.l so far as the flex patterns are concerned. * src/backend/parser/scan.l so far as the flex patterns are concerned.
* The rule bodies are just ECHO as opposed to what the backend does, * The rule bodies are just ECHO as opposed to what the backend does,
* however. (But be sure to duplicate code that affects the lexing process, * however. (But be sure to duplicate code that affects the lexing process,
* such as BEGIN().) Also, psqlscan uses a single <<EOF>> rule whereas * such as BEGIN() and yyless().) Also, psqlscan uses a single <<EOF>> rule
* scan.l has a separate one for each exclusive state. * whereas scan.l has a separate one for each exclusive state.
*/ */
/* /*
...@@ -351,11 +351,10 @@ other . ...@@ -351,11 +351,10 @@ other .
/* /*
* Force flex into the state indicated by start_state. This has a * Force flex into the state indicated by start_state. This has a
* couple of purposes: it lets some of the functions below set a * couple of purposes: it lets some of the functions below set a new
* new starting state without ugly direct access to flex variables, * starting state without ugly direct access to flex variables, and it
* and it allows us to transition from one flex lexer to another * allows us to transition from one flex lexer to another so that we
* so that we can lex different parts of the source string using * can lex different parts of the source string using separate lexers.
* separate lexers.
*/ */
BEGIN(cur_state->start_state); BEGIN(cur_state->start_state);
%} %}
...@@ -390,9 +389,7 @@ other . ...@@ -390,9 +389,7 @@ other .
<xc>{xcstop} { <xc>{xcstop} {
if (cur_state->xcdepth <= 0) if (cur_state->xcdepth <= 0)
{
BEGIN(INITIAL); BEGIN(INITIAL);
}
else else
cur_state->xcdepth--; cur_state->xcdepth--;
ECHO; ECHO;
...@@ -447,7 +444,7 @@ other . ...@@ -447,7 +444,7 @@ other .
} }
{xnstart} { {xnstart} {
yyless(1); /* eat only 'n' this time */ yyless(1); /* eat only 'n' this time */
ECHO; ECHO;
} }
...@@ -474,6 +471,7 @@ other . ...@@ -474,6 +471,7 @@ other .
} }
<xus>{quotestop} | <xus>{quotestop} |
<xus>{quotefail} { <xus>{quotefail} {
/* throw back all but the quote */
yyless(1); yyless(1);
BEGIN(xusend); BEGIN(xusend);
ECHO; ECHO;
...@@ -547,7 +545,7 @@ other . ...@@ -547,7 +545,7 @@ other .
* the $... part to the output, but put back the final * the $... part to the output, but put back the final
* $ for rescanning. Consider $delim$...$junk$delim$ * $ for rescanning. Consider $delim$...$junk$delim$
*/ */
yyless(yyleng-1); yyless(yyleng - 1);
} }
ECHO; ECHO;
} }
...@@ -682,8 +680,8 @@ other . ...@@ -682,8 +680,8 @@ other .
:{variable_char}+ { :{variable_char}+ {
/* Possible psql variable substitution */ /* Possible psql variable substitution */
char *varname; char *varname;
char *value; char *value;
varname = psqlscan_extract_substring(cur_state, varname = psqlscan_extract_substring(cur_state,
yytext + 1, yytext + 1,
...@@ -717,8 +715,8 @@ other . ...@@ -717,8 +715,8 @@ other .
else else
{ {
/* /*
* if the variable doesn't exist we'll copy the * if the variable doesn't exist we'll copy the string
* string as is * as is
*/ */
ECHO; ECHO;
} }
...@@ -766,9 +764,9 @@ other . ...@@ -766,9 +764,9 @@ other .
* Note that slash-star or dash-dash at the first * Note that slash-star or dash-dash at the first
* character will match a prior rule, not this one. * character will match a prior rule, not this one.
*/ */
int nchars = yyleng; int nchars = yyleng;
char *slashstar = strstr(yytext, "/*"); char *slashstar = strstr(yytext, "/*");
char *dashdash = strstr(yytext, "--"); char *dashdash = strstr(yytext, "--");
if (slashstar && dashdash) if (slashstar && dashdash)
{ {
...@@ -790,12 +788,12 @@ other . ...@@ -790,12 +788,12 @@ other .
* sequences of SQL operators. * sequences of SQL operators.
*/ */
while (nchars > 1 && while (nchars > 1 &&
(yytext[nchars-1] == '+' || (yytext[nchars - 1] == '+' ||
yytext[nchars-1] == '-')) yytext[nchars - 1] == '-'))
{ {
int ic; int ic;
for (ic = nchars-2; ic >= 0; ic--) for (ic = nchars - 2; ic >= 0; ic--)
{ {
if (strchr("~!@#^&|`?%", yytext[ic])) if (strchr("~!@#^&|`?%", yytext[ic]))
break; break;
...@@ -825,7 +823,7 @@ other . ...@@ -825,7 +823,7 @@ other .
} }
{decimalfail} { {decimalfail} {
/* throw back the .., and treat as integer */ /* throw back the .., and treat as integer */
yyless(yyleng-2); yyless(yyleng - 2);
ECHO; ECHO;
} }
{real} { {real} {
...@@ -838,12 +836,12 @@ other . ...@@ -838,12 +836,12 @@ other .
* but since this case will almost certainly lead to a * but since this case will almost certainly lead to a
* syntax error anyway, we don't bother to distinguish. * syntax error anyway, we don't bother to distinguish.
*/ */
yyless(yyleng-1); yyless(yyleng - 1);
ECHO; ECHO;
} }
{realfail2} { {realfail2} {
/* throw back the [Ee][+-], and proceed as above */ /* throw back the [Ee][+-], and proceed as above */
yyless(yyleng-2); yyless(yyleng - 2);
ECHO; ECHO;
} }
...@@ -856,15 +854,11 @@ other . ...@@ -856,15 +854,11 @@ other .
ECHO; ECHO;
} }
/*
* psql uses a single <<EOF>> rule, unlike the backend.
*/
<<EOF>> { <<EOF>> {
if (cur_state->buffer_stack == NULL) if (cur_state->buffer_stack == NULL)
{ {
cur_state->start_state = YY_START; cur_state->start_state = YY_START;
return LEXRES_EOL; /* end of input reached */ return LEXRES_EOL; /* end of input reached */
} }
/* /*
...@@ -1192,8 +1186,8 @@ psqlscan_push_new_buffer(PsqlScanState state, const char *newstr, ...@@ -1192,8 +1186,8 @@ psqlscan_push_new_buffer(PsqlScanState state, const char *newstr,
stackelem = (StackElem *) pg_malloc(sizeof(StackElem)); stackelem = (StackElem *) pg_malloc(sizeof(StackElem));
/* /*
* In current usage, the passed varname points at the current flex * In current usage, the passed varname points at the current flex input
* input buffer; we must copy it before calling psqlscan_prepare_buffer() * buffer; we must copy it before calling psqlscan_prepare_buffer()
* because that will change the buffer state. * because that will change the buffer state.
*/ */
stackelem->varname = varname ? pg_strdup(varname) : NULL; stackelem->varname = varname ? pg_strdup(varname) : NULL;
...@@ -1301,11 +1295,11 @@ psqlscan_prepare_buffer(PsqlScanState state, const char *txt, int len, ...@@ -1301,11 +1295,11 @@ psqlscan_prepare_buffer(PsqlScanState state, const char *txt, int len,
else else
{ {
/* Gotta do it the hard way */ /* Gotta do it the hard way */
int i = 0; int i = 0;
while (i < len) while (i < len)
{ {
int thislen = PQmblen(txt + i, state->encoding); int thislen = PQmblen(txt + i, state->encoding);
/* first byte should always be okay... */ /* first byte should always be okay... */
newtxt[i] = txt[i]; newtxt[i] = txt[i];
...@@ -1337,13 +1331,13 @@ psqlscan_emit(PsqlScanState state, const char *txt, int len) ...@@ -1337,13 +1331,13 @@ psqlscan_emit(PsqlScanState state, const char *txt, int len)
{ {
/* Gotta do it the hard way */ /* Gotta do it the hard way */
const char *reference = state->refline; const char *reference = state->refline;
int i; int i;
reference += (txt - state->curline); reference += (txt - state->curline);
for (i = 0; i < len; i++) for (i = 0; i < len; i++)
{ {
char ch = txt[i]; char ch = txt[i];
if (ch == (char) 0xFF) if (ch == (char) 0xFF)
ch = reference[i]; ch = reference[i];
...@@ -1369,13 +1363,13 @@ psqlscan_extract_substring(PsqlScanState state, const char *txt, int len) ...@@ -1369,13 +1363,13 @@ psqlscan_extract_substring(PsqlScanState state, const char *txt, int len)
{ {
/* Gotta do it the hard way */ /* Gotta do it the hard way */
const char *reference = state->refline; const char *reference = state->refline;
int i; int i;
reference += (txt - state->curline); reference += (txt - state->curline);
for (i = 0; i < len; i++) for (i = 0; i < len; i++)
{ {
char ch = txt[i]; char ch = txt[i];
if (ch == (char) 0xFF) if (ch == (char) 0xFF)
ch = reference[i]; ch = reference[i];
...@@ -1391,7 +1385,7 @@ psqlscan_extract_substring(PsqlScanState state, const char *txt, int len) ...@@ -1391,7 +1385,7 @@ psqlscan_extract_substring(PsqlScanState state, const char *txt, int len)
* *
* If the variable name is found, escape its value using the appropriate * If the variable name is found, escape its value using the appropriate
* quoting method and emit the value to output_buf. (Since the result is * quoting method and emit the value to output_buf. (Since the result is
* surely quoted, there is never any reason to rescan it.) If we don't * surely quoted, there is never any reason to rescan it.) If we don't
* find the variable or escaping fails, emit the token as-is. * find the variable or escaping fails, emit the token as-is.
*/ */
void void
......
...@@ -113,11 +113,10 @@ other . ...@@ -113,11 +113,10 @@ other .
/* /*
* Force flex into the state indicated by start_state. This has a * Force flex into the state indicated by start_state. This has a
* couple of purposes: it lets some of the functions below set a * couple of purposes: it lets some of the functions below set a new
* new starting state without ugly direct access to flex variables, * starting state without ugly direct access to flex variables, and it
* and it allows us to transition from one flex lexer to another * allows us to transition from one flex lexer to another so that we
* so that we can lex different parts of the source string using * can lex different parts of the source string using separate lexers.
* separate lexers.
*/ */
BEGIN(cur_state->start_state); BEGIN(cur_state->start_state);
%} %}
...@@ -228,8 +227,8 @@ other . ...@@ -228,8 +227,8 @@ other .
ECHO; ECHO;
else else
{ {
char *varname; char *varname;
char *value; char *value;
varname = psqlscan_extract_substring(cur_state, varname = psqlscan_extract_substring(cur_state,
yytext + 1, yytext + 1,
...@@ -396,15 +395,11 @@ other . ...@@ -396,15 +395,11 @@ other .
} }
/*
* psql uses a single <<EOF>> rule, unlike the backend.
*/
<<EOF>> { <<EOF>> {
if (cur_state->buffer_stack == NULL) if (cur_state->buffer_stack == NULL)
{ {
cur_state->start_state = YY_START; cur_state->start_state = YY_START;
return LEXRES_EOL; /* end of input reached */ return LEXRES_EOL; /* end of input reached */
} }
/* /*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment