Commit 799ac992 authored by Tom Lane's avatar Tom Lane

Sync psql's scanner with recent changes in backend scanner's flex rules.

Marko Kreen, Tom Lane
parent 3686bcb9
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.161 2009/09/25 21:13:06 petere Exp $ * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.162 2009/09/27 03:27:23 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -571,18 +571,16 @@ other . ...@@ -571,18 +571,16 @@ other .
BEGIN(xe); BEGIN(xe);
} }
<xeu>. | <xeu>. { yyerror("invalid Unicode surrogate pair"); }
<xeu>\n | <xeu>\n { yyerror("invalid Unicode surrogate pair"); }
<xeu><<EOF>> { yyerror("invalid Unicode surrogate pair"); } <xeu><<EOF>> { yyerror("invalid Unicode surrogate pair"); }
<xe,xeu>{xeunicodefail} { <xe,xeu>{xeunicodefail} {
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE), (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
errmsg("invalid Unicode escape"), errmsg("invalid Unicode escape"),
errhint("Unicode escapes must be \\uXXXX or \\UXXXXXXXX."), errhint("Unicode escapes must be \\uXXXX or \\UXXXXXXXX."),
lexer_errposition())); lexer_errposition()));
} }
<xe>{xeescape} { <xe>{xeescape} {
if (yytext[1] == '\'') if (yytext[1] == '\'')
{ {
......
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.28 2009/01/01 17:23:55 momjian Exp $ * $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.29 2009/09/27 03:27:24 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -117,6 +117,7 @@ static void push_new_buffer(const char *newstr); ...@@ -117,6 +117,7 @@ static void push_new_buffer(const char *newstr);
static YY_BUFFER_STATE prepare_buffer(const char *txt, int len, static YY_BUFFER_STATE prepare_buffer(const char *txt, int len,
char **txtcopy); char **txtcopy);
static void emit(const char *txt, int len); static void emit(const char *txt, int len);
static bool is_utf16_surrogate_first(uint32 c);
#define ECHO emit(yytext, yyleng) #define ECHO emit(yytext, yyleng)
...@@ -158,6 +159,7 @@ static void emit(const char *txt, int len); ...@@ -158,6 +159,7 @@ static void emit(const char *txt, int len);
* <xdolq> $foo$ quoted strings * <xdolq> $foo$ quoted strings
* <xui> quoted identifier with Unicode escapes * <xui> quoted identifier with Unicode escapes
* <xus> quoted string with Unicode escapes * <xus> quoted string with Unicode escapes
* <xeu> Unicode surrogate pair in extended quoted string
*/ */
%x xb %x xb
...@@ -169,6 +171,7 @@ static void emit(const char *txt, int len); ...@@ -169,6 +171,7 @@ static void emit(const char *txt, int len);
%x xdolq %x xdolq
%x xui %x xui
%x xus %x xus
%x xeu
/* Additional exclusive states for psql only: lex backslash commands */ /* Additional exclusive states for psql only: lex backslash commands */
%x xslashcmd %x xslashcmd
%x xslasharg %x xslasharg
...@@ -192,6 +195,9 @@ static void emit(const char *txt, int len); ...@@ -192,6 +195,9 @@ static void emit(const char *txt, int len);
* did not end with a newline. * did not end with a newline.
* *
* XXX perhaps \f (formfeed) should be treated as a newline as well? * XXX perhaps \f (formfeed) should be treated as a newline as well?
*
* XXX if you change the set of whitespace characters, fix scanner_isspace()
* to agree, and see also the plpgsql lexer.
*/ */
space [ \t\n\r\f] space [ \t\n\r\f]
...@@ -253,6 +259,8 @@ xeinside [^\\']+ ...@@ -253,6 +259,8 @@ xeinside [^\\']+
xeescape [\\][^0-7] xeescape [\\][^0-7]
xeoctesc [\\][0-7]{1,3} xeoctesc [\\][0-7]{1,3}
xehexesc [\\]x[0-9A-Fa-f]{1,2} xehexesc [\\]x[0-9A-Fa-f]{1,2}
xeunicode [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
xeunicodefail [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
/* Extended quote /* Extended quote
* xqdouble implements embedded quote, '''' * xqdouble implements embedded quote, ''''
...@@ -334,6 +342,10 @@ identifier {ident_start}{ident_cont}* ...@@ -334,6 +342,10 @@ identifier {ident_start}{ident_cont}*
typecast "::" typecast "::"
/* these two token types are used by PL/pgsql, though not in core SQL */
dot_dot \.\.
colon_equals ":="
/* /*
* "self" is the set of chars that should be returned as single-character * "self" is the set of chars that should be returned as single-character
* tokens. "op_chars" is the set of chars that can make up "Op" tokens, * tokens. "op_chars" is the set of chars that can make up "Op" tokens,
...@@ -511,6 +523,22 @@ other . ...@@ -511,6 +523,22 @@ other .
<xe>{xeinside} { <xe>{xeinside} {
ECHO; ECHO;
} }
<xe>{xeunicode} {
uint32 c = strtoul(yytext+2, NULL, 16);
if (is_utf16_surrogate_first(c))
BEGIN(xeu);
ECHO;
}
<xeu>{xeunicode} {
BEGIN(xe);
ECHO;
}
<xeu>. { ECHO; }
<xeu>\n { ECHO; }
<xe,xeu>{xeunicodefail} {
ECHO;
}
<xe>{xeescape} { <xe>{xeescape} {
ECHO; ECHO;
} }
...@@ -605,6 +633,14 @@ other . ...@@ -605,6 +633,14 @@ other .
ECHO; ECHO;
} }
{dot_dot} {
ECHO;
}
{colon_equals} {
ECHO;
}
/* /*
* These rules are specific to psql --- they implement parenthesis * These rules are specific to psql --- they implement parenthesis
* counting and detection of command-ending semicolon. These must * counting and detection of command-ending semicolon. These must
...@@ -1690,3 +1726,9 @@ emit(const char *txt, int len) ...@@ -1690,3 +1726,9 @@ emit(const char *txt, int len)
} }
} }
} }
static bool
is_utf16_surrogate_first(uint32 c)
{
return (c >= 0xD800 && c <= 0xDBFF);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment