Commit 61446e09 authored by Tom Lane's avatar Tom Lane

Improve lexer's error reporting. You get the whole token mentioned now

in parse error messages, not just the part scanned by the last flex rule.
For example,
	select "foo" "bar";
used to draw
	ERROR:  parser: parse error at or near """
which was rather unhelpful.  Now it gives
	ERROR:  parser: parse error at or near ""bar""
Also, error messages concerning bitstring literals and suchlike will
quote the source text at you, not the processed internal form of the literal.
parent 241978b9
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.92 2002/04/20 21:56:14 petere Exp $ * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.93 2002/05/01 17:12:07 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -56,6 +56,17 @@ static void addlit(char *ytext, int yleng); ...@@ -56,6 +56,17 @@ static void addlit(char *ytext, int yleng);
static void addlitchar(unsigned char ychar); static void addlitchar(unsigned char ychar);
static char *litbufdup(void); static char *litbufdup(void);
/*
* When we parse a token that requires multiple lexer rules to process,
* we set token_start to point at the true start of the token, for use
* by yyerror(). yytext will point at just the text consumed by the last
* rule, so it's not very helpful (eg, it might contain just the last
* quote mark of a quoted identifier). But to avoid cluttering every rule
* with setting token_start, we allow token_start = NULL to denote that
* it's okay to use yytext.
*/
static char *token_start;
/* Handles to the buffer that the lexer uses internally */ /* Handles to the buffer that the lexer uses internally */
static YY_BUFFER_STATE scanbufhandle; static YY_BUFFER_STATE scanbufhandle;
static char *scanbuf; static char *scanbuf;
...@@ -208,7 +219,7 @@ non_newline [^\n\r] ...@@ -208,7 +219,7 @@ non_newline [^\n\r]
comment ("--"{non_newline}*) comment ("--"{non_newline}*)
whitespace ({space}|{comment}) whitespace ({space}+|{comment})
/* /*
* SQL92 requires at least one newline in the whitespace separating * SQL92 requires at least one newline in the whitespace separating
...@@ -235,9 +246,16 @@ other . ...@@ -235,9 +246,16 @@ other .
*/ */
%% %%
%{
/* code to execute during start of each call of yylex() */
token_start = NULL;
%}
{whitespace} { /* ignore */ } {whitespace} { /* ignore */ }
{xcstart} { {xcstart} {
token_start = yytext;
xcdepth = 0; xcdepth = 0;
BEGIN(xc); BEGIN(xc);
/* Put back any characters past slash-star; see above */ /* Put back any characters past slash-star; see above */
...@@ -252,7 +270,11 @@ other . ...@@ -252,7 +270,11 @@ other .
<xc>{xcstop} { <xc>{xcstop} {
if (xcdepth <= 0) if (xcdepth <= 0)
{
BEGIN(INITIAL); BEGIN(INITIAL);
/* reset token_start for next token */
token_start = NULL;
}
else else
xcdepth--; xcdepth--;
} }
...@@ -261,9 +283,10 @@ other . ...@@ -261,9 +283,10 @@ other .
<xc>{op_chars} { /* ignore */ } <xc>{op_chars} { /* ignore */ }
<xc><<EOF>> { elog(ERROR, "Unterminated /* comment"); } <xc><<EOF>> { yyerror("unterminated /* comment"); }
{xbitstart} { {xbitstart} {
token_start = yytext;
BEGIN(xbit); BEGIN(xbit);
startlit(); startlit();
addlitchar('b'); addlitchar('b');
...@@ -271,8 +294,7 @@ other . ...@@ -271,8 +294,7 @@ other .
<xbit>{xbitstop} { <xbit>{xbitstop} {
BEGIN(INITIAL); BEGIN(INITIAL);
if (literalbuf[strspn(literalbuf + 1, "01") + 1] != '\0') if (literalbuf[strspn(literalbuf + 1, "01") + 1] != '\0')
elog(ERROR, "invalid bit string input: '%s'", yyerror("invalid bit string input");
literalbuf);
yylval.str = litbufdup(); yylval.str = litbufdup();
return BITCONST; return BITCONST;
} }
...@@ -284,9 +306,10 @@ other . ...@@ -284,9 +306,10 @@ other .
<xbit>{xbitcat} { <xbit>{xbitcat} {
/* ignore */ /* ignore */
} }
<xbit><<EOF>> { elog(ERROR, "unterminated bit string literal"); } <xbit><<EOF>> { yyerror("unterminated bit string literal"); }
{xhstart} { {xhstart} {
token_start = yytext;
BEGIN(xh); BEGIN(xh);
startlit(); startlit();
} }
...@@ -303,14 +326,14 @@ other . ...@@ -303,14 +326,14 @@ other .
|| val != (long) ((int32) val) || val != (long) ((int32) val)
#endif #endif
) )
elog(ERROR, "Bad hexadecimal integer input '%s'", yyerror("bad hexadecimal integer input");
literalbuf);
yylval.ival = val; yylval.ival = val;
return ICONST; return ICONST;
} }
<xh><<EOF>> { elog(ERROR, "Unterminated hexadecimal integer"); } <xh><<EOF>> { yyerror("unterminated hexadecimal integer"); }
{xqstart} { {xqstart} {
token_start = yytext;
BEGIN(xq); BEGIN(xq);
startlit(); startlit();
} }
...@@ -335,30 +358,31 @@ other . ...@@ -335,30 +358,31 @@ other .
<xq>{xqcat} { <xq>{xqcat} {
/* ignore */ /* ignore */
} }
<xq><<EOF>> { elog(ERROR, "Unterminated quoted string"); } <xq><<EOF>> { yyerror("unterminated quoted string"); }
{xdstart} { {xdstart} {
token_start = yytext;
BEGIN(xd); BEGIN(xd);
startlit(); startlit();
} }
<xd>{xdstop} { <xd>{xdstop} {
BEGIN(INITIAL); BEGIN(INITIAL);
if (strlen(literalbuf) == 0) if (literallen == 0)
elog(ERROR, "zero-length delimited identifier"); yyerror("zero-length delimited identifier");
if (strlen(literalbuf) >= NAMEDATALEN) if (literallen >= NAMEDATALEN)
{ {
#ifdef MULTIBYTE
int len; int len;
len = pg_mbcliplen(literalbuf,strlen(literalbuf),NAMEDATALEN-1); #ifdef MULTIBYTE
elog(WARNING, "identifier \"%s\" will be truncated to \"%.*s\"", len = pg_mbcliplen(literalbuf, literallen,
literalbuf, len, literalbuf); NAMEDATALEN-1);
literalbuf[len] = '\0';
#else #else
elog(WARNING, "identifier \"%s\" will be truncated to \"%.*s\"", len = NAMEDATALEN-1;
literalbuf, NAMEDATALEN-1, literalbuf);
literalbuf[NAMEDATALEN-1] = '\0';
#endif #endif
elog(NOTICE, "identifier \"%s\" will be truncated to \"%.*s\"",
literalbuf, len, literalbuf);
literalbuf[len] = '\0';
literallen = len;
} }
yylval.str = litbufdup(); yylval.str = litbufdup();
return IDENT; return IDENT;
...@@ -369,7 +393,7 @@ other . ...@@ -369,7 +393,7 @@ other .
<xd>{xdinside} { <xd>{xdinside} {
addlit(yytext, yyleng); addlit(yytext, yyleng);
} }
<xd><<EOF>> { elog(ERROR, "Unterminated quoted identifier"); } <xd><<EOF>> { yyerror("unterminated quoted identifier"); }
{typecast} { return TYPECAST; } {typecast} { return TYPECAST; }
...@@ -383,8 +407,8 @@ other . ...@@ -383,8 +407,8 @@ other .
* character will match a prior rule, not this one. * character will match a prior rule, not this one.
*/ */
int nchars = yyleng; int nchars = yyleng;
char *slashstar = strstr((char*)yytext, "/*"); char *slashstar = strstr(yytext, "/*");
char *dashdash = strstr((char*)yytext, "--"); char *dashdash = strstr(yytext, "--");
if (slashstar && dashdash) if (slashstar && dashdash)
{ {
...@@ -395,7 +419,7 @@ other . ...@@ -395,7 +419,7 @@ other .
else if (!slashstar) else if (!slashstar)
slashstar = dashdash; slashstar = dashdash;
if (slashstar) if (slashstar)
nchars = slashstar - ((char*)yytext); nchars = slashstar - yytext;
/* /*
* For SQL92 compatibility, '+' and '-' cannot be the * For SQL92 compatibility, '+' and '-' cannot be the
...@@ -437,15 +461,15 @@ other . ...@@ -437,15 +461,15 @@ other .
} }
/* Convert "!=" operator to "<>" for compatibility */ /* Convert "!=" operator to "<>" for compatibility */
if (strcmp((char*)yytext, "!=") == 0) if (strcmp(yytext, "!=") == 0)
yylval.str = pstrdup("<>"); yylval.str = pstrdup("<>");
else else
yylval.str = pstrdup((char*)yytext); yylval.str = pstrdup(yytext);
return Op; return Op;
} }
{param} { {param} {
yylval.ival = atol((char*)&yytext[1]); yylval.ival = atol(yytext + 1);
return PARAM; return PARAM;
} }
...@@ -454,7 +478,7 @@ other . ...@@ -454,7 +478,7 @@ other .
char* endptr; char* endptr;
errno = 0; errno = 0;
val = strtol((char *)yytext, &endptr, 10); val = strtol(yytext, &endptr, 10);
if (*endptr != '\0' || errno == ERANGE if (*endptr != '\0' || errno == ERANGE
#ifdef HAVE_LONG_INT_64 #ifdef HAVE_LONG_INT_64
/* if long > 32 bits, check for overflow of int4 */ /* if long > 32 bits, check for overflow of int4 */
...@@ -463,28 +487,29 @@ other . ...@@ -463,28 +487,29 @@ other .
) )
{ {
/* integer too large, treat it as a float */ /* integer too large, treat it as a float */
yylval.str = pstrdup((char*)yytext); yylval.str = pstrdup(yytext);
return FCONST; return FCONST;
} }
yylval.ival = val; yylval.ival = val;
return ICONST; return ICONST;
} }
{decimal} { {decimal} {
yylval.str = pstrdup((char*)yytext); yylval.str = pstrdup(yytext);
return FCONST; return FCONST;
} }
{real} { {real} {
yylval.str = pstrdup((char*)yytext); yylval.str = pstrdup(yytext);
return FCONST; return FCONST;
} }
{identifier} { {identifier} {
ScanKeyword *keyword; ScanKeyword *keyword;
char *ident;
int i; int i;
/* Is it a keyword? */ /* Is it a keyword? */
keyword = ScanKeywordLookup((char*) yytext); keyword = ScanKeywordLookup(yytext);
if (keyword != NULL) if (keyword != NULL)
return keyword->value; return keyword->value;
...@@ -496,26 +521,25 @@ other . ...@@ -496,26 +521,25 @@ other .
* which seems appropriate under SQL99 rules, whereas * which seems appropriate under SQL99 rules, whereas
* the keyword comparison was NOT locale-dependent. * the keyword comparison was NOT locale-dependent.
*/ */
for (i = 0; yytext[i]; i++) ident = pstrdup(yytext);
for (i = 0; ident[i]; i++)
{ {
if (isupper((unsigned char) yytext[i])) if (isupper((unsigned char) ident[i]))
yytext[i] = tolower((unsigned char) yytext[i]); ident[i] = tolower((unsigned char) ident[i]);
} }
if (i >= NAMEDATALEN) if (i >= NAMEDATALEN)
{ {
#ifdef MULTIBYTE
int len; int len;
len = pg_mbcliplen(yytext,i,NAMEDATALEN-1); #ifdef MULTIBYTE
elog(WARNING, "identifier \"%s\" will be truncated to \"%.*s\"", len = pg_mbcliplen(ident, i, NAMEDATALEN-1);
yytext, len, yytext);
yytext[len] = '\0';
#else #else
elog(WARNING, "identifier \"%s\" will be truncated to \"%.*s\"", len = NAMEDATALEN-1;
yytext, NAMEDATALEN-1, yytext);
yytext[NAMEDATALEN-1] = '\0';
#endif #endif
elog(NOTICE, "identifier \"%s\" will be truncated to \"%.*s\"",
ident, len, ident);
ident[len] = '\0';
} }
yylval.str = pstrdup((char*) yytext); yylval.str = ident;
return IDENT; return IDENT;
} }
...@@ -526,7 +550,8 @@ other . ...@@ -526,7 +550,8 @@ other .
void void
yyerror(const char *message) yyerror(const char *message)
{ {
elog(ERROR, "parser: %s at or near \"%s\"", message, yytext); elog(ERROR, "parser: %s at or near \"%s\"", message,
token_start ? token_start : yytext);
} }
......
CATALOG_NAME := postgres CATALOG_NAME := postgres
AVAIL_LANGUAGES := cs de hu ru zh_CN zh_TW AVAIL_LANGUAGES := cs de hu ru zh_CN zh_TW
GETTEXT_FILES := + gettext-files GETTEXT_FILES := + gettext-files
GETTEXT_TRIGGERS:= elog:2 postmaster_error GETTEXT_TRIGGERS:= elog:2 postmaster_error yyerror
...@@ -17,7 +17,7 @@ SELECT 'first line' ...@@ -17,7 +17,7 @@ SELECT 'first line'
' - next line' /* this comment is not allowed here */ ' - next line' /* this comment is not allowed here */
' - third line' ' - third line'
AS "Illegal comment within continuation"; AS "Illegal comment within continuation";
ERROR: parser: parse error at or near "'" ERROR: parser: parse error at or near "' - third line'"
-- --
-- test conversions between various string types -- test conversions between various string types
-- --
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment