Commit 1efcc594 authored by Tom Lane's avatar Tom Lane

Fix limitations on what SQL commands can be issued to a walsender.

In logical replication mode, a WalSender is supposed to be able
to execute any regular SQL command, as well as the special
replication commands.  Poor design of the replication-command
parser caused it to fail in various cases, notably:

* semicolons embedded in a command, or multiple SQL commands
sent in a single message;

* dollar-quoted literals containing odd numbers of single
or double quote marks;

* commands starting with a comment.

The basic problem here is that we're trying to run repl_scanner.l
across the entire input string even when it's not a replication
command.  Since repl_scanner.l does not understand all of the
token types known to the core lexer, this is doomed to have
failure modes.

We certainly don't want to make repl_scanner.l as big as scan.l,
so instead rejigger stuff so that we only lex the first token of
a non-replication command.  That will usually look like an IDENT
to repl_scanner.l, though a comment would end up getting reported
as a '-' or '/' single-character token.  If the token is a replication
command keyword, we push it back and proceed normally with repl_gram.y
parsing.  Otherwise, we can drop out of exec_replication_command()
without examining the rest of the string.

(It's still theoretically possible for repl_scanner.l to fail on
the first token; but that could only happen if it's an unterminated
single- or double-quoted string, in which case you'd have gotten
largely the same error from the core lexer too.)

In this way, repl_gram.y isn't involved at all in handling general
SQL commands, so we can get rid of the SQLCmd node type.  (In
the back branches, we can't remove it because renumbering enum
NodeTag would be an ABI break; so just leave it sit there unused.)

I failed to resist the temptation to clean up some other sloppy
coding in repl_scanner.l while at it.  The only externally-visible
behavior change from that is it now accepts \r and \f as whitespace,
same as the core lexer.

Per bug #17379 from Greg Rychlewski.  Back-patch to all supported
branches.

Discussion: https://postgr.es/m/17379-6a5c6cfb3f1f5e77@postgresql.org
parent ef9706bb
...@@ -25,8 +25,6 @@ ...@@ -25,8 +25,6 @@
/* Result of the parsing is returned here */ /* Result of the parsing is returned here */
Node *replication_parse_result; Node *replication_parse_result;
static SQLCmd *make_sqlcmd(void);
/* /*
* Bison doesn't allocate anything that needs to live across parser calls, * Bison doesn't allocate anything that needs to live across parser calls,
...@@ -59,7 +57,6 @@ static SQLCmd *make_sqlcmd(void); ...@@ -59,7 +57,6 @@ static SQLCmd *make_sqlcmd(void);
%token <str> SCONST IDENT %token <str> SCONST IDENT
%token <uintval> UCONST %token <uintval> UCONST
%token <recptr> RECPTR %token <recptr> RECPTR
%token T_WORD
/* Keyword tokens. */ /* Keyword tokens. */
%token K_BASE_BACKUP %token K_BASE_BACKUP
...@@ -93,7 +90,7 @@ static SQLCmd *make_sqlcmd(void); ...@@ -93,7 +90,7 @@ static SQLCmd *make_sqlcmd(void);
%type <node> command %type <node> command
%type <node> base_backup start_replication start_logical_replication %type <node> base_backup start_replication start_logical_replication
create_replication_slot drop_replication_slot identify_system create_replication_slot drop_replication_slot identify_system
timeline_history show sql_cmd timeline_history show
%type <list> base_backup_opt_list %type <list> base_backup_opt_list
%type <defelt> base_backup_opt %type <defelt> base_backup_opt
%type <uintval> opt_timeline %type <uintval> opt_timeline
...@@ -126,7 +123,6 @@ command: ...@@ -126,7 +123,6 @@ command:
| drop_replication_slot | drop_replication_slot
| timeline_history | timeline_history
| show | show
| sql_cmd
; ;
/* /*
...@@ -413,25 +409,6 @@ plugin_opt_arg: ...@@ -413,25 +409,6 @@ plugin_opt_arg:
| /* EMPTY */ { $$ = NULL; } | /* EMPTY */ { $$ = NULL; }
; ;
sql_cmd:
IDENT { $$ = (Node *) make_sqlcmd(); }
;
%% %%
static SQLCmd *
make_sqlcmd(void)
{
SQLCmd *cmd = makeNode(SQLCmd);
int tok;
/* Just move lexer to the end of command. */
for (;;)
{
tok = yylex();
if (tok == ';' || tok == 0)
break;
}
return cmd;
}
#include "repl_scanner.c" #include "repl_scanner.c"
...@@ -31,6 +31,10 @@ fprintf_to_ereport(const char *fmt, const char *msg) ...@@ -31,6 +31,10 @@ fprintf_to_ereport(const char *fmt, const char *msg)
/* Handle to the buffer that the lexer uses internally */ /* Handle to the buffer that the lexer uses internally */
static YY_BUFFER_STATE scanbufhandle; static YY_BUFFER_STATE scanbufhandle;
/* Pushed-back token (we only handle one) */
static int repl_pushed_back_token;
/* Work area for collecting literals */
static StringInfoData litbuf; static StringInfoData litbuf;
static void startlit(void); static void startlit(void);
...@@ -51,7 +55,18 @@ static void addlitchar(unsigned char ychar); ...@@ -51,7 +55,18 @@ static void addlitchar(unsigned char ychar);
%option warn %option warn
%option prefix="replication_yy" %option prefix="replication_yy"
%x xq xd /*
* Exclusive states:
* <xd> delimited identifiers (double-quoted identifiers)
* <xq> standard single-quoted strings
*/
%x xd
%x xq
space [ \t\n\r\f]
quote '
quotestop {quote}
/* Extended quote /* Extended quote
* xqdouble implements embedded quote, '''' * xqdouble implements embedded quote, ''''
...@@ -69,11 +84,8 @@ xdstop {dquote} ...@@ -69,11 +84,8 @@ xdstop {dquote}
xddouble {dquote}{dquote} xddouble {dquote}{dquote}
xdinside [^"]+ xdinside [^"]+
digit [0-9]+ digit [0-9]
hexdigit [0-9A-Za-z]+ hexdigit [0-9A-Fa-f]
quote '
quotestop {quote}
ident_start [A-Za-z\200-\377_] ident_start [A-Za-z\200-\377_]
ident_cont [A-Za-z\200-\377_0-9\$] ident_cont [A-Za-z\200-\377_0-9\$]
...@@ -82,6 +94,19 @@ identifier {ident_start}{ident_cont}* ...@@ -82,6 +94,19 @@ identifier {ident_start}{ident_cont}*
%% %%
%{
/* This code is inserted at the start of replication_yylex() */
/* If we have a pushed-back token, return that. */
if (repl_pushed_back_token)
{
int result = repl_pushed_back_token;
repl_pushed_back_token = 0;
return result;
}
%}
BASE_BACKUP { return K_BASE_BACKUP; } BASE_BACKUP { return K_BASE_BACKUP; }
FAST { return K_FAST; } FAST { return K_FAST; }
IDENTIFY_SYSTEM { return K_IDENTIFY_SYSTEM; } IDENTIFY_SYSTEM { return K_IDENTIFY_SYSTEM; }
...@@ -110,14 +135,7 @@ WAIT { return K_WAIT; } ...@@ -110,14 +135,7 @@ WAIT { return K_WAIT; }
MANIFEST { return K_MANIFEST; } MANIFEST { return K_MANIFEST; }
MANIFEST_CHECKSUMS { return K_MANIFEST_CHECKSUMS; } MANIFEST_CHECKSUMS { return K_MANIFEST_CHECKSUMS; }
"," { return ','; } {space}+ { /* do nothing */ }
";" { return ';'; }
"(" { return '('; }
")" { return ')'; }
[\n] ;
[\t] ;
" " ;
{digit}+ { {digit}+ {
yylval.uintval = strtoul(yytext, NULL, 10); yylval.uintval = strtoul(yytext, NULL, 10);
...@@ -179,6 +197,11 @@ MANIFEST_CHECKSUMS { return K_MANIFEST_CHECKSUMS; } ...@@ -179,6 +197,11 @@ MANIFEST_CHECKSUMS { return K_MANIFEST_CHECKSUMS; }
return IDENT; return IDENT;
} }
. {
/* Any char not recognized above is returned as itself */
return yytext[0];
}
<xq,xd><<EOF>> { yyerror("unterminated quoted string"); } <xq,xd><<EOF>> { yyerror("unterminated quoted string"); }
...@@ -186,9 +209,6 @@ MANIFEST_CHECKSUMS { return K_MANIFEST_CHECKSUMS; } ...@@ -186,9 +209,6 @@ MANIFEST_CHECKSUMS { return K_MANIFEST_CHECKSUMS; }
yyterminate(); yyterminate();
} }
. {
return T_WORD;
}
%% %%
/* LCOV_EXCL_STOP */ /* LCOV_EXCL_STOP */
...@@ -248,6 +268,7 @@ replication_scanner_init(const char *str) ...@@ -248,6 +268,7 @@ replication_scanner_init(const char *str)
/* Make sure we start in proper state */ /* Make sure we start in proper state */
BEGIN(INITIAL); BEGIN(INITIAL);
repl_pushed_back_token = 0;
} }
void void
...@@ -256,3 +277,34 @@ replication_scanner_finish(void) ...@@ -256,3 +277,34 @@ replication_scanner_finish(void)
yy_delete_buffer(scanbufhandle); yy_delete_buffer(scanbufhandle);
scanbufhandle = NULL; scanbufhandle = NULL;
} }
/*
* Check to see if the first token of a command is a WalSender keyword.
*
* To keep repl_scanner.l minimal, we don't ask it to know every construct
* that the core lexer knows. Therefore, we daren't lex more than the
* first token of a general SQL command. That will usually look like an
* IDENT token here, although some other cases are possible.
*/
bool
replication_scanner_is_replication_command(void)
{
int first_token = replication_yylex();
switch (first_token)
{
case K_IDENTIFY_SYSTEM:
case K_BASE_BACKUP:
case K_START_REPLICATION:
case K_CREATE_REPLICATION_SLOT:
case K_DROP_REPLICATION_SLOT:
case K_TIMELINE_HISTORY:
case K_SHOW:
/* Yes; push back the first token so we can parse later. */
repl_pushed_back_token = first_token;
return true;
default:
/* Nope; we don't bother to push back the token. */
return false;
}
}
...@@ -1520,7 +1520,8 @@ exec_replication_command(const char *cmd_string) ...@@ -1520,7 +1520,8 @@ exec_replication_command(const char *cmd_string)
*/ */
if (MyWalSnd->state == WALSNDSTATE_STOPPING) if (MyWalSnd->state == WALSNDSTATE_STOPPING)
ereport(ERROR, ereport(ERROR,
(errmsg("cannot execute new commands while WAL sender is in stopping mode"))); (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("cannot execute new commands while WAL sender is in stopping mode")));
/* /*
* CREATE_REPLICATION_SLOT ... LOGICAL exports a snapshot until the next * CREATE_REPLICATION_SLOT ... LOGICAL exports a snapshot until the next
...@@ -1531,7 +1532,7 @@ exec_replication_command(const char *cmd_string) ...@@ -1531,7 +1532,7 @@ exec_replication_command(const char *cmd_string)
CHECK_FOR_INTERRUPTS(); CHECK_FOR_INTERRUPTS();
/* /*
* Parse the command. * Prepare to parse and execute the command.
*/ */
cmd_context = AllocSetContextCreate(CurrentMemoryContext, cmd_context = AllocSetContextCreate(CurrentMemoryContext,
"Replication command context", "Replication command context",
...@@ -1539,33 +1540,41 @@ exec_replication_command(const char *cmd_string) ...@@ -1539,33 +1540,41 @@ exec_replication_command(const char *cmd_string)
old_context = MemoryContextSwitchTo(cmd_context); old_context = MemoryContextSwitchTo(cmd_context);
replication_scanner_init(cmd_string); replication_scanner_init(cmd_string);
parse_rc = replication_yyparse();
if (parse_rc != 0)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg_internal("replication command parser returned %d",
parse_rc)));
replication_scanner_finish();
cmd_node = replication_parse_result;
/* /*
* If it's a SQL command, just clean up our mess and return false; the * Is it a WalSender command?
* caller will take care of executing it.
*/ */
if (IsA(cmd_node, SQLCmd)) if (!replication_scanner_is_replication_command())
{ {
if (MyDatabaseId == InvalidOid) /* Nope; clean up and get out. */
ereport(ERROR, replication_scanner_finish();
(errmsg("cannot execute SQL commands in WAL sender for physical replication")));
MemoryContextSwitchTo(old_context); MemoryContextSwitchTo(old_context);
MemoryContextDelete(cmd_context); MemoryContextDelete(cmd_context);
/* XXX this is a pretty random place to make this check */
if (MyDatabaseId == InvalidOid)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot execute SQL commands in WAL sender for physical replication")));
/* Tell the caller that this wasn't a WalSender command. */ /* Tell the caller that this wasn't a WalSender command. */
return false; return false;
} }
/*
* Looks like a WalSender command, so parse it.
*/
parse_rc = replication_yyparse();
if (parse_rc != 0)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg_internal("replication command parser returned %d",
parse_rc)));
replication_scanner_finish();
cmd_node = replication_parse_result;
/* /*
* Report query to various monitoring facilities. For this purpose, we * Report query to various monitoring facilities. For this purpose, we
* report replication commands just like SQL commands. * report replication commands just like SQL commands.
......
...@@ -121,6 +121,7 @@ extern int replication_yylex(void); ...@@ -121,6 +121,7 @@ extern int replication_yylex(void);
extern void replication_yyerror(const char *str) pg_attribute_noreturn(); extern void replication_yyerror(const char *str) pg_attribute_noreturn();
extern void replication_scanner_init(const char *query_string); extern void replication_scanner_init(const char *query_string);
extern void replication_scanner_finish(void); extern void replication_scanner_finish(void);
extern bool replication_scanner_is_replication_command(void);
extern Node *replication_parse_result; extern Node *replication_parse_result;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment