Commit 9a56dc33 authored by Robert Haas's avatar Robert Haas

Fix various possible problems with synchronous replication.

1. Don't ignore query cancel interrupts.  Instead, if the user asks to
cancel the query after we've already committed it, but before it's on
the standby, just emit a warning and let the COMMIT finish.

2. Don't ignore die interrupts (pg_terminate_backend or fast shutdown).
Instead, emit a warning message and close the connection without
acknowledging the commit.  Other backends will still see the effect of
the commit, but there's no getting around that; it's too late to abort
at this point, and ignoring die interrupts altogether doesn't seem like
a good idea.

3. If synchronous_standby_names becomes empty, wake up all backends
waiting for synchronous replication to complete.  Without this, someone
attempting to shut synchronous replication off could easily wedge the
entire system instead.

4. Avoid depending on the assumption that if a walsender updates
MyProc->syncRepState, we'll see the change even if we read it without
holding the lock.  The window for this appears to be quite narrow (and
probably doesn't exist at all on machines with strong memory ordering)
but protecting against it is practically free, so do that.

5. Remove useless state SYNC_REP_MUST_DISCONNECT, which isn't needed and
doesn't actually do anything.

There's still some further work needed here to make the behavior of fast
shutdown plausible, but that looks complex, so I'm leaving it for a
separate commit.  Review by Fujii Masao.
parent e148443d
...@@ -2100,8 +2100,7 @@ SET ENABLE_SEQSCAN TO OFF; ...@@ -2100,8 +2100,7 @@ SET ENABLE_SEQSCAN TO OFF;
If a standby is removed from the list of servers then it will stop If a standby is removed from the list of servers then it will stop
being the synchronous standby, allowing another to take its place. being the synchronous standby, allowing another to take its place.
If the list is empty, synchronous replication will not be If the list is empty, synchronous replication will not be
possible, whatever the setting of <varname>synchronous_replication</>, possible, whatever the setting of <varname>synchronous_replication</>.
however, already waiting commits will continue to wait.
Standbys may also be added to the list without restarting the server. Standbys may also be added to the list without restarting the server.
</para> </para>
</listitem> </listitem>
......
...@@ -49,6 +49,7 @@ ...@@ -49,6 +49,7 @@
#include "libpq/pqsignal.h" #include "libpq/pqsignal.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "postmaster/walwriter.h" #include "postmaster/walwriter.h"
#include "replication/syncrep.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/fd.h" #include "storage/fd.h"
#include "storage/ipc.h" #include "storage/ipc.h"
...@@ -216,6 +217,9 @@ WalWriterMain(void) ...@@ -216,6 +217,9 @@ WalWriterMain(void)
*/ */
PG_SETMASK(&UnBlockSig); PG_SETMASK(&UnBlockSig);
/* Do this once before starting the loop, then just at SIGHUP time. */
SyncRepUpdateSyncStandbysDefined();
/* /*
* Loop forever * Loop forever
*/ */
...@@ -237,6 +241,8 @@ WalWriterMain(void) ...@@ -237,6 +241,8 @@ WalWriterMain(void)
{ {
got_SIGHUP = false; got_SIGHUP = false;
ProcessConfigFile(PGC_SIGHUP); ProcessConfigFile(PGC_SIGHUP);
/* update global shmem state for sync rep */
SyncRepUpdateSyncStandbysDefined();
} }
if (shutdown_requested) if (shutdown_requested)
{ {
......
This diff is collapsed.
...@@ -2643,6 +2643,9 @@ die(SIGNAL_ARGS) ...@@ -2643,6 +2643,9 @@ die(SIGNAL_ARGS)
InterruptHoldoffCount--; InterruptHoldoffCount--;
ProcessInterrupts(); ProcessInterrupts();
} }
/* Interrupt any sync rep wait which is currently in progress. */
SetLatch(&(MyProc->waitLatch));
} }
errno = save_errno; errno = save_errno;
...@@ -2681,6 +2684,9 @@ StatementCancelHandler(SIGNAL_ARGS) ...@@ -2681,6 +2684,9 @@ StatementCancelHandler(SIGNAL_ARGS)
InterruptHoldoffCount--; InterruptHoldoffCount--;
ProcessInterrupts(); ProcessInterrupts();
} }
/* Interrupt any sync rep wait which is currently in progress. */
SetLatch(&(MyProc->waitLatch));
} }
errno = save_errno; errno = save_errno;
......
...@@ -26,7 +26,6 @@ ...@@ -26,7 +26,6 @@
#define SYNC_REP_NOT_WAITING 0 #define SYNC_REP_NOT_WAITING 0
#define SYNC_REP_WAITING 1 #define SYNC_REP_WAITING 1
#define SYNC_REP_WAIT_COMPLETE 2 #define SYNC_REP_WAIT_COMPLETE 2
#define SYNC_REP_MUST_DISCONNECT 3
/* user-settable parameters for synchronous replication */ /* user-settable parameters for synchronous replication */
extern bool synchronous_replication; extern bool synchronous_replication;
...@@ -42,6 +41,9 @@ extern void SyncRepCleanupAtProcExit(int code, Datum arg); ...@@ -42,6 +41,9 @@ extern void SyncRepCleanupAtProcExit(int code, Datum arg);
extern void SyncRepInitConfig(void); extern void SyncRepInitConfig(void);
extern void SyncRepReleaseWaiters(void); extern void SyncRepReleaseWaiters(void);
/* called by wal writer */
extern void SyncRepUpdateSyncStandbysDefined(void);
/* called by various procs */ /* called by various procs */
extern int SyncRepWakeQueue(bool all); extern int SyncRepWakeQueue(bool all);
extern const char *assign_synchronous_standby_names(const char *newval, bool doit, GucSource source); extern const char *assign_synchronous_standby_names(const char *newval, bool doit, GucSource source);
......
...@@ -78,6 +78,13 @@ typedef struct ...@@ -78,6 +78,13 @@ typedef struct
*/ */
XLogRecPtr lsn; XLogRecPtr lsn;
/*
* Are any sync standbys defined? Waiting backends can't reload the
* config file safely, so WAL writer updates this value as needed.
* Protected by SyncRepLock.
*/
bool sync_standbys_defined;
WalSnd walsnds[1]; /* VARIABLE LENGTH ARRAY */ WalSnd walsnds[1]; /* VARIABLE LENGTH ARRAY */
} WalSndCtlData; } WalSndCtlData;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment