Commit f3a9d75e authored by Tom Lane's avatar Tom Lane

Finish cleaning up backend's handling of /* ... */ and -- comments,

per pghackers discussion around 20-Feb.  Also add specific error messages
for unterminated comments and unterminated quoted strings.  These things
are nonissues for input coming from psql, but they do matter for input
coming from other front ends.
parent 370186e8
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.65 2000/02/21 18:47:02 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.66 2000/03/11 05:14:06 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -133,22 +133,24 @@ xdstop {dquote} ...@@ -133,22 +133,24 @@ xdstop {dquote}
xdinside [^"]+ xdinside [^"]+
/* C-style comments /* C-style comments
* Ignored by the scanner and parser.
* *
* The "extended comment" syntax closely resembles allowable operator syntax. * The "extended comment" syntax closely resembles allowable operator syntax.
* The tricky part here is to get lex to recognize a string starting with * The tricky part here is to get lex to recognize a string starting with
* slash-star as a comment, when interpreting it as an operator would produce * slash-star as a comment, when interpreting it as an operator would produce
* a longer match --- remember lex will prefer a longer match! So, we have * a longer match --- remember lex will prefer a longer match! Also, if we
* to provide a special rule for xcline (a complete comment that could * have something like plus-slash-star, lex will think this is a 3-character
* otherwise look like an operator), as well as append {op_and_self}* to * operator whereas we want to see it as a + operator and a comment start.
* xcstart so that it matches at least as much as {operator} would. * The solution is two-fold:
* Then the tie-breaker (first matching rule of same length) wins. * 1. append {op_and_self}* to xcstart so that it matches as much text as
* There is still a problem if someone writes, eg, slash-star-star-slash-plus. * {operator} would. Then the tie-breaker (first matching rule of same
* It'll be taken as an xcstart, rather than xcline and an operator as one * length) ensures xcstart wins. We put back the extra stuff with yyless()
* could wish. I don't see any way around that given lex's behavior; * in case it contains a star-slash that should terminate the comment.
* that someone will just have to write a space after the comment. * 2. In the operator rule, check for slash-star within the operator, and
* if found throw it back with yyless(). This handles the plus-slash-star
* problem.
* SQL92-style comments, which start with dash-dash, have similar interactions
* with the operator rule.
*/ */
xcline \/\*{op_and_self}*\*\/
xcstart \/\*{op_and_self}* xcstart \/\*{op_and_self}*
xcstop \*+\/ xcstop \*+\/
xcinside ([^*]+)|(\*+[^/]) xcinside ([^*]+)|(\*+[^/])
...@@ -161,6 +163,7 @@ identifier {letter}{letter_or_digit}* ...@@ -161,6 +163,7 @@ identifier {letter}{letter_or_digit}*
typecast "::" typecast "::"
/* NB: if you change "self", fix the copy in the operator rule too! */
self [,()\[\].;$\:\+\-\*\/\%\^\<\>\=\|] self [,()\[\].;$\:\+\-\*\/\%\^\<\>\=\|]
op_and_self [\~\!\@\#\^\&\|\`\?\$\:\+\-\*\/\%\<\>\=] op_and_self [\~\!\@\#\^\&\|\`\?\$\:\+\-\*\/\%\<\>\=]
operator {op_and_self}+ operator {op_and_self}+
...@@ -218,27 +221,30 @@ other . ...@@ -218,27 +221,30 @@ other .
* *
* Quoted strings must allow some special characters such as single-quote * Quoted strings must allow some special characters such as single-quote
* and newline. * and newline.
* Embedded single-quotes are implemented both in the SQL/92-standard * Embedded single-quotes are implemented both in the SQL92-standard
* style of two adjacent single quotes "''" and in the Postgres/Java style * style of two adjacent single quotes "''" and in the Postgres/Java style
* of escaped-quote "\'". * of escaped-quote "\'".
* Other embedded escaped characters are matched explicitly and the leading * Other embedded escaped characters are matched explicitly and the leading
* backslash is dropped from the string. - thomas 1997-09-24 * backslash is dropped from the string. - thomas 1997-09-24
* Note that xcline must appear before xcstart, which must appear before * Note that xcstart must appear before operator, as explained above!
* operator, as explained above! Also whitespace (comment) must appear * Also whitespace (comment) must appear before operator.
* before operator.
*/ */
%% %%
{whitespace} { /* ignore */ } {whitespace} { /* ignore */ }
{xcline} { /* ignore */ } {xcstart} {
BEGIN(xc);
{xcstart} { BEGIN(xc); } /* Put back any characters past slash-star; see above */
yyless(2);
}
<xc>{xcstop} { BEGIN(INITIAL); } <xc>{xcstop} { BEGIN(INITIAL); }
<xc>{xcinside} { /* ignore */ } <xc>{xcinside} { /* ignore */ }
<xc><<EOF>> { elog(ERROR, "Unterminated /* comment"); }
{xbstart} { {xbstart} {
BEGIN(xb); BEGIN(xb);
startlit(); startlit();
...@@ -262,6 +268,7 @@ other . ...@@ -262,6 +268,7 @@ other .
<xb>{xbcat} { <xb>{xbcat} {
/* ignore */ /* ignore */
} }
<xb><<EOF>> { elog(ERROR, "Unterminated binary integer"); }
{xhstart} { {xhstart} {
BEGIN(xh); BEGIN(xh);
...@@ -278,6 +285,7 @@ other . ...@@ -278,6 +285,7 @@ other .
literalbuf); literalbuf);
return ICONST; return ICONST;
} }
<xh><<EOF>> { elog(ERROR, "Unterminated hexadecimal integer"); }
{xqstart} { {xqstart} {
BEGIN(xq); BEGIN(xq);
...@@ -296,6 +304,7 @@ other . ...@@ -296,6 +304,7 @@ other .
<xq>{xqcat} { <xq>{xqcat} {
/* ignore */ /* ignore */
} }
<xq><<EOF>> { elog(ERROR, "Unterminated quoted string"); }
{xdstart} { {xdstart} {
...@@ -310,12 +319,39 @@ other . ...@@ -310,12 +319,39 @@ other .
<xd>{xdinside} { <xd>{xdinside} {
addlit(yytext, yyleng); addlit(yytext, yyleng);
} }
<xd><<EOF>> { elog(ERROR, "Unterminated quoted identifier"); }
{typecast} { return TYPECAST; } {typecast} { return TYPECAST; }
{self} { return yytext[0]; } {self} { return yytext[0]; }
{operator} { {operator} {
/* Check for embedded slash-star or dash-dash */
char *slashstar = strstr((char*)yytext, "/*");
char *dashdash = strstr((char*)yytext, "--");
if (slashstar && dashdash)
{
if (slashstar > dashdash)
slashstar = dashdash;
}
else if (!slashstar)
slashstar = dashdash;
if (slashstar)
{
int nchars = slashstar - ((char*)yytext);
yyless(nchars);
/* If what we have left is only one char, and it's
* one of the characters matching "self", then
* return it as a character token the same way
* that the "self" rule would have.
*/
if (nchars == 1 &&
strchr(",()[].;$:+-*/%^<>=|", yytext[0]))
return yytext[0];
}
if (strcmp((char*)yytext, "!=") == 0) if (strcmp((char*)yytext, "!=") == 0)
yylval.str = pstrdup("<>"); /* compatibility */ yylval.str = pstrdup("<>"); /* compatibility */
else else
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment