Commit 3a624e92 authored by Tom Lane's avatar Tom Lane

Revise plpgsql's scanner to process comments and string literals in a way

more nearly matching the core SQL scanner.  The user-visible effects are:

* Block comments (slash-star comments) now nest, as per SQL spec.

* In standard_conforming_strings mode, backslash as the last character of a
  non-E string literal is now correctly taken as an ordinary character;
  formerly it was misinterpreted as escaping the ending quote.  (Since the
  string also had to pass through the core scanner, this invariably led
  to syntax errors.)

* Formerly, backslashes in the format string of RAISE were always treated as
  quoting the next character, regardless of mode.  Now, they are ordinary
  characters with standard_conforming_strings on, while with it off, they
  introduce the same set of escapes as in the core SQL scanner.  Also,
  escape_string_warning is now effective for RAISE format strings.  These
  changes make RAISE format strings work just like any other string literal.

This is implemented by copying and pasting a lot of logic from the core
scanner.  It would be a good idea to look into getting rid of plpgsql's
scanner entirely in favor of using the core scanner.  However, that involves
more change than I can justify making during beta --- in particular, the core
scanner would have to become re-entrant.

In passing, remove the kluge that made the plpgsql scanner emit T_FUNCTION or
T_TRIGGER as a made-up first token.  That presumably had some value once upon
a time, but now it's just useless complication for both the scanner and the
grammar.
parent 7f2f798b
<!-- $PostgreSQL: pgsql/doc/src/sgml/plpgsql.sgml,v 1.139 2009/04/02 19:20:45 momjian Exp $ --> <!-- $PostgreSQL: pgsql/doc/src/sgml/plpgsql.sgml,v 1.140 2009/04/19 18:52:56 tgl Exp $ -->
<chapter id="plpgsql"> <chapter id="plpgsql">
<title><application>PL/pgSQL</application> - <acronym>SQL</acronym> Procedural Language</title> <title><application>PL/pgSQL</application> - <acronym>SQL</acronym> Procedural Language</title>
...@@ -220,10 +220,8 @@ END <optional> <replaceable>label</replaceable> </optional>; ...@@ -220,10 +220,8 @@ END <optional> <replaceable>label</replaceable> </optional>;
There are two types of comments in <application>PL/pgSQL</>. A double There are two types of comments in <application>PL/pgSQL</>. A double
dash (<literal>--</literal>) starts a comment that extends to the end of dash (<literal>--</literal>) starts a comment that extends to the end of
the line. A <literal>/*</literal> starts a block comment that extends to the line. A <literal>/*</literal> starts a block comment that extends to
the next occurrence of <literal>*/</literal>. Block comments cannot be the next occurrence of <literal>*/</literal>. Block comments nest,
nested, but double dash comments can be enclosed into a block comment and just as in ordinary SQL.
a double dash can hide the block comment delimiters <literal>/*</literal>
and <literal>*/</literal>.
</para> </para>
<para> <para>
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/pl/plpgsql/src/gram.y,v 1.121 2009/02/18 11:33:04 petere Exp $ * $PostgreSQL: pgsql/src/pl/plpgsql/src/gram.y,v 1.122 2009/04/19 18:52:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -62,6 +62,8 @@ static PLpgSQL_row *make_scalar_list1(const char *initial_name, ...@@ -62,6 +62,8 @@ static PLpgSQL_row *make_scalar_list1(const char *initial_name,
int lineno); int lineno);
static void check_sql_expr(const char *stmt); static void check_sql_expr(const char *stmt);
static void plpgsql_sql_error_callback(void *arg); static void plpgsql_sql_error_callback(void *arg);
static char *parse_string_token(const char *token);
static void plpgsql_string_error_callback(void *arg);
static char *check_label(const char *yytxt); static char *check_label(const char *yytxt);
static void check_labels(const char *start_label, static void check_labels(const char *start_label,
const char *end_label); const char *end_label);
...@@ -228,8 +230,6 @@ static List *read_raise_options(void); ...@@ -228,8 +230,6 @@ static List *read_raise_options(void);
/* /*
* Other tokens * Other tokens
*/ */
%token T_FUNCTION
%token T_TRIGGER
%token T_STRING %token T_STRING
%token T_NUMBER %token T_NUMBER
%token T_SCALAR /* a VAR, RECFIELD, or TRIGARG */ %token T_SCALAR /* a VAR, RECFIELD, or TRIGARG */
...@@ -244,13 +244,9 @@ static List *read_raise_options(void); ...@@ -244,13 +244,9 @@ static List *read_raise_options(void);
%% %%
pl_function : T_FUNCTION comp_optsect pl_block opt_semi pl_function : comp_optsect pl_block opt_semi
{ {
yylval.program = (PLpgSQL_stmt_block *)$3; yylval.program = (PLpgSQL_stmt_block *) $2;
}
| T_TRIGGER comp_optsect pl_block opt_semi
{
yylval.program = (PLpgSQL_stmt_block *)$3;
} }
; ;
...@@ -1403,7 +1399,7 @@ stmt_raise : K_RAISE lno ...@@ -1403,7 +1399,7 @@ stmt_raise : K_RAISE lno
if (tok == T_STRING) if (tok == T_STRING)
{ {
/* old style message and parameters */ /* old style message and parameters */
new->message = plpgsql_get_string_value(); new->message = parse_string_token(yytext);
/* /*
* We expect either a semi-colon, which * We expect either a semi-colon, which
* indicates no parameters, or a comma that * indicates no parameters, or a comma that
...@@ -1435,7 +1431,7 @@ stmt_raise : K_RAISE lno ...@@ -1435,7 +1431,7 @@ stmt_raise : K_RAISE lno
if (yylex() != T_STRING) if (yylex() != T_STRING)
yyerror("syntax error"); yyerror("syntax error");
sqlstatestr = plpgsql_get_string_value(); sqlstatestr = parse_string_token(yytext);
if (strlen(sqlstatestr) != 5) if (strlen(sqlstatestr) != 5)
yyerror("invalid SQLSTATE code"); yyerror("invalid SQLSTATE code");
...@@ -1778,7 +1774,7 @@ proc_condition : opt_lblname ...@@ -1778,7 +1774,7 @@ proc_condition : opt_lblname
/* next token should be a string literal */ /* next token should be a string literal */
if (yylex() != T_STRING) if (yylex() != T_STRING)
yyerror("syntax error"); yyerror("syntax error");
sqlstatestr = plpgsql_get_string_value(); sqlstatestr = parse_string_token(yytext);
if (strlen(sqlstatestr) != 5) if (strlen(sqlstatestr) != 5)
yyerror("invalid SQLSTATE code"); yyerror("invalid SQLSTATE code");
...@@ -2738,6 +2734,49 @@ plpgsql_sql_error_callback(void *arg) ...@@ -2738,6 +2734,49 @@ plpgsql_sql_error_callback(void *arg)
errposition(0); errposition(0);
} }
/*
* Convert a string-literal token to the represented string value.
*
* To do this, we need to invoke the core lexer. To avoid confusion between
* the core bison/flex definitions and our own, the actual invocation is in
* pl_funcs.c. Here we are only concerned with setting up the right errcontext
* state, which is handled the same as in check_sql_expr().
*/
static char *
parse_string_token(const char *token)
{
char *result;
ErrorContextCallback syntax_errcontext;
ErrorContextCallback *previous_errcontext;
/* See comments in check_sql_expr() */
Assert(error_context_stack->callback == plpgsql_compile_error_callback);
previous_errcontext = error_context_stack;
syntax_errcontext.callback = plpgsql_string_error_callback;
syntax_errcontext.arg = (char *) token;
syntax_errcontext.previous = error_context_stack->previous;
error_context_stack = &syntax_errcontext;
result = plpgsql_parse_string_token(token);
/* Restore former ereport callback */
error_context_stack = previous_errcontext;
return result;
}
static void
plpgsql_string_error_callback(void *arg)
{
Assert(plpgsql_error_funcname);
errcontext("string literal in PL/PgSQL function \"%s\" near line %d",
plpgsql_error_funcname, plpgsql_error_lineno);
/* representing the string literal as internalquery seems overkill */
errposition(0);
}
static char * static char *
check_label(const char *yytxt) check_label(const char *yytxt)
{ {
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_comp.c,v 1.134 2009/02/18 11:33:04 petere Exp $ * $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_comp.c,v 1.135 2009/04/19 18:52:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -261,7 +261,7 @@ do_compile(FunctionCallInfo fcinfo, ...@@ -261,7 +261,7 @@ do_compile(FunctionCallInfo fcinfo,
bool forValidator) bool forValidator)
{ {
Form_pg_proc procStruct = (Form_pg_proc) GETSTRUCT(procTup); Form_pg_proc procStruct = (Form_pg_proc) GETSTRUCT(procTup);
int functype = CALLED_AS_TRIGGER(fcinfo) ? T_TRIGGER : T_FUNCTION; bool is_trigger = CALLED_AS_TRIGGER(fcinfo);
Datum prosrcdatum; Datum prosrcdatum;
bool isnull; bool isnull;
char *proc_source; char *proc_source;
...@@ -293,7 +293,7 @@ do_compile(FunctionCallInfo fcinfo, ...@@ -293,7 +293,7 @@ do_compile(FunctionCallInfo fcinfo,
if (isnull) if (isnull)
elog(ERROR, "null prosrc"); elog(ERROR, "null prosrc");
proc_source = TextDatumGetCString(prosrcdatum); proc_source = TextDatumGetCString(prosrcdatum);
plpgsql_scanner_init(proc_source, functype); plpgsql_scanner_init(proc_source);
plpgsql_error_funcname = pstrdup(NameStr(procStruct->proname)); plpgsql_error_funcname = pstrdup(NameStr(procStruct->proname));
plpgsql_error_lineno = 0; plpgsql_error_lineno = 0;
...@@ -359,13 +359,13 @@ do_compile(FunctionCallInfo fcinfo, ...@@ -359,13 +359,13 @@ do_compile(FunctionCallInfo fcinfo,
function->fn_oid = fcinfo->flinfo->fn_oid; function->fn_oid = fcinfo->flinfo->fn_oid;
function->fn_xmin = HeapTupleHeaderGetXmin(procTup->t_data); function->fn_xmin = HeapTupleHeaderGetXmin(procTup->t_data);
function->fn_tid = procTup->t_self; function->fn_tid = procTup->t_self;
function->fn_functype = functype; function->fn_is_trigger = is_trigger;
function->fn_cxt = func_cxt; function->fn_cxt = func_cxt;
function->out_param_varno = -1; /* set up for no OUT param */ function->out_param_varno = -1; /* set up for no OUT param */
switch (functype) switch (is_trigger)
{ {
case T_FUNCTION: case false:
/* /*
* Fetch info about the procedure's parameters. Allocations aren't * Fetch info about the procedure's parameters. Allocations aren't
...@@ -564,7 +564,7 @@ do_compile(FunctionCallInfo fcinfo, ...@@ -564,7 +564,7 @@ do_compile(FunctionCallInfo fcinfo,
ReleaseSysCache(typeTup); ReleaseSysCache(typeTup);
break; break;
case T_TRIGGER: case true:
/* Trigger procedure's return type is unknown yet */ /* Trigger procedure's return type is unknown yet */
function->fn_rettype = InvalidOid; function->fn_rettype = InvalidOid;
function->fn_retbyval = false; function->fn_retbyval = false;
...@@ -645,7 +645,7 @@ do_compile(FunctionCallInfo fcinfo, ...@@ -645,7 +645,7 @@ do_compile(FunctionCallInfo fcinfo,
break; break;
default: default:
elog(ERROR, "unrecognized function typecode: %u", functype); elog(ERROR, "unrecognized function typecode: %d", (int) is_trigger);
break; break;
} }
...@@ -790,7 +790,7 @@ plpgsql_parse_word(const char *word) ...@@ -790,7 +790,7 @@ plpgsql_parse_word(const char *word)
* Recognize tg_argv when compiling triggers * Recognize tg_argv when compiling triggers
* (XXX this sucks, it should be a regular variable in the namestack) * (XXX this sucks, it should be a regular variable in the namestack)
*/ */
if (plpgsql_curr_compile->fn_functype == T_TRIGGER) if (plpgsql_curr_compile->fn_is_trigger)
{ {
if (strcmp(cp[0], "tg_argv") == 0) if (strcmp(cp[0], "tg_argv") == 0)
{ {
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_funcs.c,v 1.76 2009/02/18 11:33:04 petere Exp $ * $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_funcs.c,v 1.77 2009/04/19 18:52:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -17,6 +17,8 @@ ...@@ -17,6 +17,8 @@
#include <ctype.h> #include <ctype.h>
#include "parser/gramparse.h"
#include "parser/gram.h"
#include "parser/scansup.h" #include "parser/scansup.h"
...@@ -459,6 +461,41 @@ plpgsql_convert_ident(const char *s, char **output, int numidents) ...@@ -459,6 +461,41 @@ plpgsql_convert_ident(const char *s, char **output, int numidents)
} }
/*
* plpgsql_parse_string_token - get the value represented by a string literal
*
* We do not make plpgsql's lexer produce the represented value, because
* in many cases we don't need it. Instead this function is invoked when
* we do need it. The input is the T_STRING token as identified by the lexer.
*
* The result is a palloc'd string.
*
* Note: this is called only from plpgsql's gram.y, but we can't just put it
* there because including parser/gram.h there would cause confusion.
*/
char *
plpgsql_parse_string_token(const char *token)
{
int ctoken;
/*
* We use the core lexer to do the dirty work. Aside from getting the
* right results for escape sequences and so on, this helps us produce
* appropriate warnings for escape_string_warning etc.
*/
scanner_init(token);
ctoken = base_yylex();
if (ctoken != SCONST)
elog(ERROR, "unexpected result from base lexer: %d", ctoken);
scanner_finish();
return base_yylval.str;
}
/* /*
* Statement type as a string, for use in error messages etc. * Statement type as a string, for use in error messages etc.
*/ */
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.110 2009/04/09 02:57:53 tgl Exp $ * $PostgreSQL: pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.111 2009/04/19 18:52:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -650,7 +650,7 @@ typedef struct PLpgSQL_function ...@@ -650,7 +650,7 @@ typedef struct PLpgSQL_function
Oid fn_oid; Oid fn_oid;
TransactionId fn_xmin; TransactionId fn_xmin;
ItemPointerData fn_tid; ItemPointerData fn_tid;
int fn_functype; bool fn_is_trigger;
PLpgSQL_func_hashkey *fn_hashkey; /* back-link to hashtable key */ PLpgSQL_func_hashkey *fn_hashkey; /* back-link to hashtable key */
MemoryContext fn_cxt; MemoryContext fn_cxt;
...@@ -880,6 +880,7 @@ extern void plpgsql_ns_rename(char *oldname, char *newname); ...@@ -880,6 +880,7 @@ extern void plpgsql_ns_rename(char *oldname, char *newname);
* ---------- * ----------
*/ */
extern void plpgsql_convert_ident(const char *s, char **output, int numidents); extern void plpgsql_convert_ident(const char *s, char **output, int numidents);
extern char *plpgsql_parse_string_token(const char *token);
extern const char *plpgsql_stmt_typename(PLpgSQL_stmt *stmt); extern const char *plpgsql_stmt_typename(PLpgSQL_stmt *stmt);
extern void plpgsql_dumptree(PLpgSQL_function *func); extern void plpgsql_dumptree(PLpgSQL_function *func);
...@@ -894,8 +895,7 @@ extern int plpgsql_yylex(void); ...@@ -894,8 +895,7 @@ extern int plpgsql_yylex(void);
extern void plpgsql_push_back_token(int token); extern void plpgsql_push_back_token(int token);
extern void plpgsql_yyerror(const char *message); extern void plpgsql_yyerror(const char *message);
extern int plpgsql_scanner_lineno(void); extern int plpgsql_scanner_lineno(void);
extern void plpgsql_scanner_init(const char *str, int functype); extern void plpgsql_scanner_init(const char *str);
extern void plpgsql_scanner_finish(void); extern void plpgsql_scanner_finish(void);
extern char *plpgsql_get_string_value(void);
#endif /* PLPGSQL_H */ #endif /* PLPGSQL_H */
This diff is collapsed.
...@@ -3737,3 +3737,74 @@ SELECT * FROM leaker_1(true); ...@@ -3737,3 +3737,74 @@ SELECT * FROM leaker_1(true);
DROP FUNCTION leaker_1(bool); DROP FUNCTION leaker_1(bool);
DROP FUNCTION leaker_2(bool); DROP FUNCTION leaker_2(bool);
-- Test handling of string literals.
set standard_conforming_strings = off;
create or replace function strtest() returns text as $$
begin
raise notice 'foo\\bar\041baz';
return 'foo\\bar\041baz';
end
$$ language plpgsql;
WARNING: nonstandard use of \\ in a string literal
HINT: Use the escape string syntax for backslashes, e.g., E'\\'.
CONTEXT: string literal in PL/PgSQL function "strtest" near line 2
WARNING: nonstandard use of \\ in a string literal
LINE 1: SELECT 'foo\\bar\041baz'
^
HINT: Use the escape string syntax for backslashes, e.g., E'\\'.
QUERY: SELECT 'foo\\bar\041baz'
CONTEXT: SQL statement in PL/PgSQL function "strtest" near line 3
select strtest();
NOTICE: foo\bar!baz
WARNING: nonstandard use of \\ in a string literal
LINE 1: SELECT 'foo\\bar\041baz'
^
HINT: Use the escape string syntax for backslashes, e.g., E'\\'.
QUERY: SELECT 'foo\\bar\041baz'
CONTEXT: PL/pgSQL function "strtest" line 3 at RETURN
strtest
-------------
foo\bar!baz
(1 row)
create or replace function strtest() returns text as $$
begin
raise notice E'foo\\bar\041baz';
return E'foo\\bar\041baz';
end
$$ language plpgsql;
select strtest();
NOTICE: foo\bar!baz
strtest
-------------
foo\bar!baz
(1 row)
set standard_conforming_strings = on;
create or replace function strtest() returns text as $$
begin
raise notice 'foo\\bar\041baz\';
return 'foo\\bar\041baz\';
end
$$ language plpgsql;
select strtest();
NOTICE: foo\\bar\041baz\
strtest
------------------
foo\\bar\041baz\
(1 row)
create or replace function strtest() returns text as $$
begin
raise notice E'foo\\bar\041baz';
return E'foo\\bar\041baz';
end
$$ language plpgsql;
select strtest();
NOTICE: foo\bar!baz
strtest
-------------
foo\bar!baz
(1 row)
drop function strtest();
...@@ -3005,3 +3005,47 @@ SELECT * FROM leaker_1(true); ...@@ -3005,3 +3005,47 @@ SELECT * FROM leaker_1(true);
DROP FUNCTION leaker_1(bool); DROP FUNCTION leaker_1(bool);
DROP FUNCTION leaker_2(bool); DROP FUNCTION leaker_2(bool);
-- Test handling of string literals.
set standard_conforming_strings = off;
create or replace function strtest() returns text as $$
begin
raise notice 'foo\\bar\041baz';
return 'foo\\bar\041baz';
end
$$ language plpgsql;
select strtest();
create or replace function strtest() returns text as $$
begin
raise notice E'foo\\bar\041baz';
return E'foo\\bar\041baz';
end
$$ language plpgsql;
select strtest();
set standard_conforming_strings = on;
create or replace function strtest() returns text as $$
begin
raise notice 'foo\\bar\041baz\';
return 'foo\\bar\041baz\';
end
$$ language plpgsql;
select strtest();
create or replace function strtest() returns text as $$
begin
raise notice E'foo\\bar\041baz';
return E'foo\\bar\041baz';
end
$$ language plpgsql;
select strtest();
drop function strtest();
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment