Commit 3cfdd8fd authored by Tom Lane's avatar Tom Lane

Clean up scan.l's handling of \r vs \n --- they are reliably treated as

equivalent now, which should make Windows and Mac clients happier.
Also fix failure to handle SQL comments between segments of a multiline
quoted literal.
parent 905404a2
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.63 2000/01/26 05:56:43 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.64 2000/02/19 04:17:25 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -41,15 +41,19 @@ static char *parseCh; ...@@ -41,15 +41,19 @@ static char *parseCh;
/* set up my input handler --- need one flavor for flex, one for lex */ /* set up my input handler --- need one flavor for flex, one for lex */
#if defined(FLEX_SCANNER) #if defined(FLEX_SCANNER)
#define YY_NO_UNPUT #define YY_NO_UNPUT
static int myinput(char* buf, int max); static int myinput(char* buf, int max);
#undef YY_INPUT #undef YY_INPUT
#define YY_INPUT(buf,result,max) {result = myinput(buf,max);} #define YY_INPUT(buf,result,max) {result = myinput(buf,max);}
#else
#else /* !FLEX_SCANNER */
#undef input #undef input
int input(); int input();
#undef unput #undef unput
void unput(char); void unput(char);
#endif /* FLEX_SCANNER */ #endif /* FLEX_SCANNER */
extern YYSTYPE yylval; extern YYSTYPE yylval;
...@@ -68,27 +72,22 @@ static int literalalloc; /* current allocated buffer size */ ...@@ -68,27 +72,22 @@ static int literalalloc; /* current allocated buffer size */
static void addlit(char *ytext, int yleng); static void addlit(char *ytext, int yleng);
%} %}
/* OK, here is a short description of lex/flex rules behavior. /*
* OK, here is a short description of lex/flex rules behavior.
* The longest pattern which matches an input string is always chosen. * The longest pattern which matches an input string is always chosen.
* For equal-length patterns, the first occurring in the rules list is chosen. * For equal-length patterns, the first occurring in the rules list is chosen.
* INITIAL is the starting condition, to which all non-conditional rules apply. * INITIAL is the starting state, to which all non-conditional rules apply.
* When in an exclusive condition, only those rules defined for that condition apply. * Exclusive states change parsing rules while the state is active. When in
* an exclusive state, only those rules defined for that state apply.
* *
* Exclusive states change parsing rules while the state is active. * We use exclusive states for quoted strings, extended comments,
* There are exclusive states for quoted strings, extended comments, * and to eliminate parsing troubles for numeric strings.
* and to eliminate parsing troubles for numeric strings.
* Exclusive states: * Exclusive states:
* <xb> binary numeric string - thomas 1997-11-16 * <xb> binary numeric string - thomas 1997-11-16
* <xc> extended C-style comments - tgl 1997-07-12 * <xc> extended C-style comments - tgl 1997-07-12
* <xd> delimited identifiers (double-quoted identifiers) - tgl 1997-10-27 * <xd> delimited identifiers (double-quoted identifiers) - tgl 1997-10-27
* <xh> hexadecimal numeric string - thomas 1997-11-16 * <xh> hexadecimal numeric string - thomas 1997-11-16
* <xq> quoted strings - tgl 1997-07-30 * <xq> quoted strings - tgl 1997-07-30
*
* The "extended comment" syntax closely resembles allowable operator syntax.
* So, when in condition <xc>, only strings which would terminate the
* "extended comment" trigger any action other than "ignore".
* Be sure to match _any_ candidate comment, including those with appended
* operator-like symbols. - thomas 1997-07-14
*/ */
%x xb %x xb
...@@ -101,29 +100,29 @@ static void addlit(char *ytext, int yleng); ...@@ -101,29 +100,29 @@ static void addlit(char *ytext, int yleng);
*/ */
xbstart [bB]{quote} xbstart [bB]{quote}
xbstop {quote} xbstop {quote}
xbinside [^']* xbinside [^']+
xbcat {quote}{space}*\n{space}*{quote} xbcat {quote}{whitespace_with_newline}{quote}
/* Hexadecimal number /* Hexadecimal number
*/ */
xhstart [xX]{quote} xhstart [xX]{quote}
xhstop {quote} xhstop {quote}
xhinside [^']* xhinside [^']+
xhcat {quote}{space}*\n{space}*{quote} xhcat {quote}{whitespace_with_newline}{quote}
/* Extended quote /* Extended quote
* xqdouble implements SQL92 embedded quote * xqdouble implements SQL92 embedded quote
* xqcat allows strings to cross input lines * xqcat allows strings to cross input lines
* Note: reduction of '' and \ sequences to output text is done in scanstr(), * Note: reduction of '' and \ sequences to output text is done in scanstr(),
* not by rules here. * not by rules here. But we do get rid of xqcat sequences here.
*/ */
quote ' quote '
xqstart {quote} xqstart {quote}
xqstop {quote} xqstop {quote}
xqdouble {quote}{quote} xqdouble {quote}{quote}
xqinside [^\\']* xqinside [^\\']+
xqliteral [\\](.|\n) xqliteral [\\](.|\n)
xqcat {quote}{space}*\n{space}*{quote} xqcat {quote}{whitespace_with_newline}{quote}
/* Delimited quote /* Delimited quote
* Allows embedded spaces and other special characters into identifiers. * Allows embedded spaces and other special characters into identifiers.
...@@ -131,16 +130,28 @@ xqcat {quote}{space}*\n{space}*{quote} ...@@ -131,16 +130,28 @@ xqcat {quote}{space}*\n{space}*{quote}
dquote \" dquote \"
xdstart {dquote} xdstart {dquote}
xdstop {dquote} xdstop {dquote}
xdinside [^"]* xdinside [^"]+
/* Comments /* C-style comments
* Ignored by the scanner and parser. * Ignored by the scanner and parser.
*
* The "extended comment" syntax closely resembles allowable operator syntax.
* The tricky part here is to get lex to recognize a string starting with
* slash-star as a comment, when interpreting it as an operator would produce
* a longer match --- remember lex will prefer a longer match! So, we have
* to provide a special rule for xcline (a complete comment that could
* otherwise look like an operator), as well as append {op_and_self}* to
* xcstart so that it matches at least as much as {operator} would.
* Then the tie-breaker (first matching rule of same length) wins.
* There is still a problem if someone writes, eg, slash-star-star-slash-plus.
* It'll be taken as an xcstart, rather than xcline and an operator as one
* could wish. I don't see any way around that given lex's behavior;
* that someone will just have to write a space after the comment.
*/ */
xcline [\/][\*].*[\*][\/]{space}*\n* xcline \/\*{op_and_self}*\*\/
xcstart [\/][\*]{op_and_self}* xcstart \/\*{op_and_self}*
xcstop {op_and_self}*[\*][\/]({space}*|\n) xcstop \*+\/
xcinside [^*]* xcinside ([^*]+)|(\*+[^/])
xcstar [^/]
digit [0-9] digit [0-9]
letter [\200-\377_A-Za-z] letter [\200-\377_A-Za-z]
...@@ -161,13 +172,44 @@ operator {op_and_self}+ ...@@ -161,13 +172,44 @@ operator {op_and_self}+
integer {digit}+ integer {digit}+
decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*)) decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
real ((({digit}*\.{digit}+)|({digit}+\.{digit}*)|({digit}+))([Ee][-+]?{digit}+)) real ((({digit}*\.{digit}+)|({digit}+\.{digit}*)|({digit}+))([Ee][-+]?{digit}+))
param \${integer} param \${integer}
comment ("--"|"//").* /*
* In order to make the world safe for Windows and Mac clients as well as
* Unix ones, we accept either \n or \r as a newline. A DOS-style \r\n
* sequence will be seen as two successive newlines, but that doesn't cause
* any problems. SQL92-style comments, which start with -- and extend to the
* next newline, are treated as equivalent to a single whitespace character.
*
* NOTE a fine point: if there is no newline following --, we will absorb
* everything to the end of the input as a comment. This is correct. Older
* versions of Postgres failed to recognize -- as a comment if the input
* did not end with a newline.
*
* XXX perhaps \f (formfeed) should be treated as a newline as well?
*/
space [ \t\n\r\f] space [ \t\n\r\f]
horiz_space [ \t\f]
newline [\n\r]
non_newline [^\n\r]
comment (("--"|"//"){non_newline}*)
whitespace ({space}|{comment})
/*
* SQL92 requires at least one newline in the whitespace separating
* string literals that are to be concatenated. Silly, but who are we
* to argue? Note that {whitespace_with_newline} should not have * after
* it, whereas {whitespace} should generally have a * after it...
*/
horiz_whitespace ({horiz_space}|{comment})
whitespace_with_newline ({horiz_whitespace}*{newline}{whitespace}*)
other . other .
/* DO NOT PUT ANY COMMENTS IN THE FOLLOWING SECTION. /* DO NOT PUT ANY COMMENTS IN THE FOLLOWING SECTION.
...@@ -181,14 +223,16 @@ other . ...@@ -181,14 +223,16 @@ other .
* of escaped-quote "\'". * of escaped-quote "\'".
* Other embedded escaped characters are matched explicitly and the leading * Other embedded escaped characters are matched explicitly and the leading
* backslash is dropped from the string. - thomas 1997-09-24 * backslash is dropped from the string. - thomas 1997-09-24
* Note that xcline must appear before xcstart, which must appear before
* operator, as explained above! Also whitespace (comment) must appear
* before operator.
*/ */
%% %%
{comment} { /* ignore */ } {whitespace} { /* ignore */ }
{xcline} { /* ignore */ } {xcline} { /* ignore */ }
<xc>{xcstar} |
{xcstart} { BEGIN(xc); } {xcstart} { BEGIN(xc); }
<xc>{xcstop} { BEGIN(INITIAL); } <xc>{xcstop} { BEGIN(INITIAL); }
...@@ -216,6 +260,7 @@ other . ...@@ -216,6 +260,7 @@ other .
} }
<xh>{xhcat} | <xh>{xhcat} |
<xb>{xbcat} { <xb>{xbcat} {
/* ignore */
} }
{xhstart} { {xhstart} {
...@@ -249,6 +294,7 @@ other . ...@@ -249,6 +294,7 @@ other .
addlit(yytext, yyleng); addlit(yytext, yyleng);
} }
<xq>{xqcat} { <xq>{xqcat} {
/* ignore */
} }
...@@ -270,18 +316,18 @@ other . ...@@ -270,18 +316,18 @@ other .
{self} { return yytext[0]; } {self} { return yytext[0]; }
{operator} { {operator} {
if (strcmp((char*)yytext,"!=") == 0) if (strcmp((char*)yytext, "!=") == 0)
yylval.str = pstrdup("<>"); /* compatability */ yylval.str = pstrdup("<>"); /* compatibility */
else else
yylval.str = pstrdup((char*)yytext); yylval.str = pstrdup((char*)yytext);
return Op; return Op;
} }
{param} { {param} {
yylval.ival = atoi((char*)&yytext[1]); yylval.ival = atoi((char*)&yytext[1]);
return PARAM; return PARAM;
} }
{integer} { {integer} {
char* endptr; char* endptr;
...@@ -354,7 +400,6 @@ other . ...@@ -354,7 +400,6 @@ other .
return IDENT; return IDENT;
} }
} }
{space} { /* ignore */ }
{other} { return yytext[0]; } {other} { return yytext[0]; }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment