Commit 1bb25580 authored by Heikki Linnakangas's avatar Heikki Linnakangas

Make standby server continuously retry restoring the next WAL segment with

restore_command, if the connection to the primary server is lost. This
ensures that the standby can recover automatically, if the connection is
lost for a long time and standby falls behind so much that the required
WAL segments have been archived and deleted in the master.

This also makes standby_mode useful without streaming replication; the
server will keep retrying restore_command every few seconds until the
trigger file is found. That's the same basic functionality pg_standby
offers, but without the bells and whistles.

To implement that, refactor the ReadRecord/FetchRecord functions. The
FetchRecord() function introduced in the original streaming replication
patch is removed, and all the retry logic is now in a new function called
XLogReadPage(). XLogReadPage() is now responsible for executing
restore_command, launching walreceiver, and waiting for new WAL to arrive
from primary, as required.

This also changes the life cycle of walreceiver. When launched, it now only
tries to connect to the master once, and exits if the connection fails, or
is lost during streaming for any reason. The startup process detects the
death, and re-launches walreceiver if necessary.
parent ab13d1e9
This diff is collapsed.
......@@ -37,7 +37,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.601 2010/01/15 09:19:02 heikki Exp $
* $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.602 2010/01/27 15:27:50 heikki Exp $
*
* NOTES
*
......@@ -224,9 +224,6 @@ static int Shutdown = NoShutdown;
static bool FatalError = false; /* T if recovering from backend crash */
static bool RecoveryError = false; /* T if WAL recovery failed */
/* If WalReceiverActive is true, restart walreceiver if it dies */
static bool WalReceiverActive = false;
/*
* We use a simple state machine to control startup, shutdown, and
* crash recovery (which is rather like shutdown followed by startup).
......@@ -1469,11 +1466,6 @@ ServerLoop(void)
if (PgStatPID == 0 && pmState == PM_RUN)
PgStatPID = pgstat_start();
/* If we have lost walreceiver, try to start a new one */
if (WalReceiverPID == 0 && WalReceiverActive &&
(pmState == PM_RECOVERY || pmState == PM_RECOVERY_CONSISTENT))
WalReceiverPID = StartWalReceiver();
/* If we need to signal the autovacuum launcher, do so now */
if (avlauncher_needs_signal)
{
......@@ -4167,16 +4159,9 @@ sigusr1_handler(SIGNAL_ARGS)
WalReceiverPID == 0)
{
/* Startup Process wants us to start the walreceiver process. */
WalReceiverActive = true;
WalReceiverPID = StartWalReceiver();
}
if (CheckPostmasterSignal(PMSIGNAL_SHUTDOWN_WALRECEIVER))
{
/* The walreceiver process doesn't want to be restarted anymore */
WalReceiverActive = false;
}
PG_SETMASK(&UnBlockSig);
errno = save_errno;
......
......@@ -29,7 +29,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/replication/walreceiver.c,v 1.1 2010/01/20 09:16:24 heikki Exp $
* $PostgreSQL: pgsql/src/backend/replication/walreceiver.c,v 1.2 2010/01/27 15:27:51 heikki Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -134,8 +134,7 @@ static void WalRcvShutdownHandler(SIGNAL_ARGS);
static void WalRcvQuickDieHandler(SIGNAL_ARGS);
/* Prototypes for private functions */
static void InitWalRcv(void);
static void WalRcvKill(int code, Datum arg);
static void WalRcvDie(int code, Datum arg);
static void XLogWalRcvWrite(char *buf, Size nbytes, XLogRecPtr recptr);
static void XLogWalRcvFlush(void);
......@@ -153,21 +152,57 @@ static struct
void
WalReceiverMain(void)
{
sigjmp_buf local_sigjmp_buf;
MemoryContext walrcv_context;
char conninfo[MAXCONNINFO];
XLogRecPtr startpoint;
/* use volatile pointer to prevent code rearrangement */
volatile WalRcvData *walrcv = WalRcv;
/* Load the libpq-specific functions */
load_file("libpqwalreceiver", false);
if (walrcv_connect == NULL || walrcv_receive == NULL ||
walrcv_disconnect == NULL)
elog(ERROR, "libpqwalreceiver didn't initialize correctly");
/*
* WalRcv should be set up already (if we are a backend, we inherit
* this by fork() or EXEC_BACKEND mechanism from the postmaster).
*/
Assert(walrcv != NULL);
/*
* Mark walreceiver as running in shared memory.
*
* Do this as early as possible, so that if we fail later on, we'll
* set state to STOPPED. If we die before this, the startup process
* will keep waiting for us to start up, until it times out.
*/
SpinLockAcquire(&walrcv->mutex);
Assert(walrcv->pid == 0);
switch(walrcv->walRcvState)
{
case WALRCV_STOPPING:
/* If we've already been requested to stop, don't start up. */
walrcv->walRcvState = WALRCV_STOPPED;
/* fall through */
case WALRCV_STOPPED:
SpinLockRelease(&walrcv->mutex);
proc_exit(1);
break;
case WALRCV_STARTING:
/* The usual case */
break;
case WALRCV_RUNNING:
/* Shouldn't happen */
elog(PANIC, "walreceiver still running according to shared memory state");
}
/* Advertise our PID so that the startup process can kill us */
walrcv->pid = MyProcPid;
walrcv->walRcvState = WALRCV_RUNNING;
/* Fetch information required to start streaming */
strlcpy(conninfo, (char *) walrcv->conninfo, MAXCONNINFO);
startpoint = walrcv->receivedUpto;
SpinLockRelease(&walrcv->mutex);
/* Mark walreceiver in progress */
InitWalRcv();
/* Arrange to clean up at walreceiver exit */
on_shmem_exit(WalRcvDie, 0);
/*
* If possible, make this process a group leader, so that the postmaster
......@@ -200,81 +235,21 @@ WalReceiverMain(void)
/* We allow SIGQUIT (quickdie) at all times */
sigdelset(&BlockSig, SIGQUIT);
/* Load the libpq-specific functions */
load_file("libpqwalreceiver", false);
if (walrcv_connect == NULL || walrcv_receive == NULL ||
walrcv_disconnect == NULL)
elog(ERROR, "libpqwalreceiver didn't initialize correctly");
/*
* Create a resource owner to keep track of our resources (not clear that
* we need this, but may as well have one).
*/
CurrentResourceOwner = ResourceOwnerCreate(NULL, "Wal Receiver");
/*
* Create a memory context that we will do all our work in. We do this so
* that we can reset the context during error recovery and thereby avoid
* possible memory leaks.
*/
walrcv_context = AllocSetContextCreate(TopMemoryContext,
"Wal Receiver",
ALLOCSET_DEFAULT_MINSIZE,
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
MemoryContextSwitchTo(walrcv_context);
/*
* If an exception is encountered, processing resumes here.
*
* This code is heavily based on bgwriter.c, q.v.
*/
if (sigsetjmp(local_sigjmp_buf, 1) != 0)
{
/* Since not using PG_TRY, must reset error stack by hand */
error_context_stack = NULL;
/* Reset WalRcvImmediateInterruptOK */
DisableWalRcvImmediateExit();
/* Prevent interrupts while cleaning up */
HOLD_INTERRUPTS();
/* Report the error to the server log */
EmitErrorReport();
/* Disconnect any previous connection. */
EnableWalRcvImmediateExit();
walrcv_disconnect();
DisableWalRcvImmediateExit();
/*
* Now return to normal top-level context and clear ErrorContext for
* next time.
*/
MemoryContextSwitchTo(walrcv_context);
FlushErrorState();
/* Flush any leaked data in the top-level context */
MemoryContextResetAndDeleteChildren(walrcv_context);
/* Now we can allow interrupts again */
RESUME_INTERRUPTS();
/*
* Sleep at least 1 second after any error. A write error is likely
* to be repeated, and we don't want to be filling the error logs as
* fast as we can.
*/
pg_usleep(1000000L);
}
/* We can now handle ereport(ERROR) */
PG_exception_stack = &local_sigjmp_buf;
/* Unblock signals (they were blocked when the postmaster forked us) */
PG_SETMASK(&UnBlockSig);
/* Fetch connection information from shared memory */
SpinLockAcquire(&walrcv->mutex);
strlcpy(conninfo, (char *) walrcv->conninfo, MAXCONNINFO);
startpoint = walrcv->receivedUpto;
SpinLockRelease(&walrcv->mutex);
/* Establish the connection to the primary for XLOG streaming */
EnableWalRcvImmediateExit();
walrcv_connect(conninfo, startpoint);
......@@ -330,63 +305,24 @@ WalReceiverMain(void)
}
}
/* Advertise our pid in shared memory, so that startup process can kill us. */
static void
InitWalRcv(void)
{
/* use volatile pointer to prevent code rearrangement */
volatile WalRcvData *walrcv = WalRcv;
/*
* WalRcv should be set up already (if we are a backend, we inherit
* this by fork() or EXEC_BACKEND mechanism from the postmaster).
*/
if (walrcv == NULL)
elog(PANIC, "walreceiver control data uninitialized");
/* If we've already been requested to stop, don't start up */
SpinLockAcquire(&walrcv->mutex);
Assert(walrcv->pid == 0);
if (walrcv->walRcvState == WALRCV_STOPPED ||
walrcv->walRcvState == WALRCV_STOPPING)
{
walrcv->walRcvState = WALRCV_STOPPED;
SpinLockRelease(&walrcv->mutex);
proc_exit(1);
}
walrcv->pid = MyProcPid;
SpinLockRelease(&walrcv->mutex);
/* Arrange to clean up at walreceiver exit */
on_shmem_exit(WalRcvKill, 0);
}
/*
* Clear our pid from shared memory at exit.
* Mark us as STOPPED in shared memory at exit.
*/
static void
WalRcvKill(int code, Datum arg)
WalRcvDie(int code, Datum arg)
{
/* use volatile pointer to prevent code rearrangement */
volatile WalRcvData *walrcv = WalRcv;
bool stopped = false;
SpinLockAcquire(&walrcv->mutex);
if (walrcv->walRcvState == WALRCV_STOPPING ||
walrcv->walRcvState == WALRCV_STOPPED)
{
walrcv->walRcvState = WALRCV_STOPPED;
stopped = true;
elog(LOG, "walreceiver stopped");
}
Assert(walrcv->walRcvState == WALRCV_RUNNING ||
walrcv->walRcvState == WALRCV_STOPPING);
walrcv->walRcvState = WALRCV_STOPPED;
walrcv->pid = 0;
SpinLockRelease(&walrcv->mutex);
/* Terminate the connection gracefully. */
walrcv_disconnect();
/* If requested to stop, tell postmaster to not restart us. */
if (stopped)
SendPostmasterSignal(PMSIGNAL_SHUTDOWN_WALRECEIVER);
}
/* SIGHUP: set flag to re-read config file at next convenient time */
......
......@@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/replication/walreceiverfuncs.c,v 1.2 2010/01/20 09:16:24 heikki Exp $
* $PostgreSQL: pgsql/src/backend/replication/walreceiverfuncs.c,v 1.3 2010/01/27 15:27:51 heikki Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -18,6 +18,8 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <time.h>
#include <unistd.h>
#include <signal.h>
......@@ -30,8 +32,11 @@
WalRcvData *WalRcv = NULL;
static bool CheckForStandbyTrigger(void);
static void ShutdownWalRcv(void);
/*
* How long to wait for walreceiver to start up after requesting
* postmaster to launch it. In seconds.
*/
#define WALRCV_STARTUP_TIMEOUT 10
/* Report shared memory space needed by WalRcvShmemInit */
Size
......@@ -62,7 +67,7 @@ WalRcvShmemInit(void)
/* Initialize the data structures */
MemSet(WalRcv, 0, WalRcvShmemSize());
WalRcv->walRcvState = WALRCV_NOT_STARTED;
WalRcv->walRcvState = WALRCV_STOPPED;
SpinLockInit(&WalRcv->mutex);
}
......@@ -73,90 +78,51 @@ WalRcvInProgress(void)
/* use volatile pointer to prevent code rearrangement */
volatile WalRcvData *walrcv = WalRcv;
WalRcvState state;
pg_time_t startTime;
SpinLockAcquire(&walrcv->mutex);
state = walrcv->walRcvState;
SpinLockRelease(&walrcv->mutex);
if (state == WALRCV_RUNNING || state == WALRCV_STOPPING)
return true;
else
return false;
}
/*
* Wait for the XLOG record at given position to become available.
*
* 'recptr' indicates the byte position which caller wants to read the
* XLOG record up to. The byte position actually written and flushed
* by walreceiver is returned. It can be higher than the requested
* location, and the caller can safely read up to that point without
* calling WaitNextXLogAvailable() again.
*
* If WAL streaming is ended (because a trigger file is found), *finished
* is set to true and function returns immediately. The returned position
* can be lower than requested in that case.
*
* Called by the startup process during streaming recovery.
*/
XLogRecPtr
WaitNextXLogAvailable(XLogRecPtr recptr, bool *finished)
{
static XLogRecPtr receivedUpto = {0, 0};
*finished = false;
state = walrcv->walRcvState;
startTime = walrcv->startTime;
/* Quick exit if already known available */
if (XLByteLT(recptr, receivedUpto))
return receivedUpto;
SpinLockRelease(&walrcv->mutex);
for (;;)
/*
* If it has taken too long for walreceiver to start up, give up.
* Setting the state to STOPPED ensures that if walreceiver later
* does start up after all, it will see that it's not supposed to be
* running and die without doing anything.
*/
if (state == WALRCV_STARTING)
{
/* use volatile pointer to prevent code rearrangement */
volatile WalRcvData *walrcv = WalRcv;
/* Update local status */
SpinLockAcquire(&walrcv->mutex);
receivedUpto = walrcv->receivedUpto;
SpinLockRelease(&walrcv->mutex);
pg_time_t now = (pg_time_t) time(NULL);
/* If available already, leave here */
if (XLByteLT(recptr, receivedUpto))
return receivedUpto;
/* Check to see if the trigger file exists */
if (CheckForStandbyTrigger())
if ((now - startTime) > WALRCV_STARTUP_TIMEOUT)
{
*finished = true;
return receivedUpto;
}
SpinLockAcquire(&walrcv->mutex);
pg_usleep(100000L); /* 100ms */
/*
* This possibly-long loop needs to handle interrupts of startup
* process.
*/
HandleStartupProcInterrupts();
if (walrcv->walRcvState == WALRCV_STARTING)
state = walrcv->walRcvState = WALRCV_STOPPED;
/*
* Emergency bailout if postmaster has died. This is to avoid the
* necessity for manual cleanup of all postmaster children.
*/
if (!PostmasterIsAlive(true))
exit(1);
SpinLockRelease(&walrcv->mutex);
}
}
if (state != WALRCV_STOPPED)
return true;
else
return false;
}
/*
* Stop walreceiver and wait for it to die.
* Stop walreceiver (if running) and wait for it to die.
*/
static void
void
ShutdownWalRcv(void)
{
/* use volatile pointer to prevent code rearrangement */
volatile WalRcvData *walrcv = WalRcv;
pid_t walrcvpid;
pid_t walrcvpid = 0;
/*
* Request walreceiver to stop. Walreceiver will switch to WALRCV_STOPPED
......@@ -164,15 +130,25 @@ ShutdownWalRcv(void)
* restart itself.
*/
SpinLockAcquire(&walrcv->mutex);
Assert(walrcv->walRcvState == WALRCV_RUNNING);
walrcv->walRcvState = WALRCV_STOPPING;
walrcvpid = walrcv->pid;
switch(walrcv->walRcvState)
{
case WALRCV_STOPPED:
break;
case WALRCV_STARTING:
walrcv->walRcvState = WALRCV_STOPPED;
break;
case WALRCV_RUNNING:
walrcv->walRcvState = WALRCV_STOPPING;
/* fall through */
case WALRCV_STOPPING:
walrcvpid = walrcv->pid;
break;
}
SpinLockRelease(&walrcv->mutex);
/*
* Pid can be 0, if no walreceiver process is active right now.
* Postmaster should restart it, and when it does, it will see the
* STOPPING state.
* Signal walreceiver process if it was still running.
*/
if (walrcvpid != 0)
kill(walrcvpid, SIGTERM);
......@@ -193,30 +169,6 @@ ShutdownWalRcv(void)
}
}
/*
* Check to see if the trigger file exists. If it does, request postmaster
* to shut down walreceiver and wait for it to exit, and remove the trigger
* file.
*/
static bool
CheckForStandbyTrigger(void)
{
struct stat stat_buf;
if (TriggerFile == NULL)
return false;
if (stat(TriggerFile, &stat_buf) == 0)
{
ereport(LOG,
(errmsg("trigger file found: %s", TriggerFile)));
ShutdownWalRcv();
unlink(TriggerFile);
return true;
}
return false;
}
/*
* Request postmaster to start walreceiver.
*
......@@ -228,17 +180,30 @@ RequestXLogStreaming(XLogRecPtr recptr, const char *conninfo)
{
/* use volatile pointer to prevent code rearrangement */
volatile WalRcvData *walrcv = WalRcv;
pg_time_t now = (pg_time_t) time(NULL);
Assert(walrcv->walRcvState == WALRCV_NOT_STARTED);
/*
* We always start at the beginning of the segment.
* That prevents a broken segment (i.e., with no records in the
* first half of a segment) from being created by XLOG streaming,
* which might cause trouble later on if the segment is e.g
* archived.
*/
if (recptr.xrecoff % XLogSegSize != 0)
recptr.xrecoff -= recptr.xrecoff % XLogSegSize;
/* It better be stopped before we try to restart it */
Assert(walrcv->walRcvState == WALRCV_STOPPED);
/* locking is just pro forma here; walreceiver isn't started yet */
SpinLockAcquire(&walrcv->mutex);
walrcv->receivedUpto = recptr;
if (conninfo != NULL)
strlcpy((char *) walrcv->conninfo, conninfo, MAXCONNINFO);
else
walrcv->conninfo[0] = '\0';
walrcv->walRcvState = WALRCV_RUNNING;
walrcv->walRcvState = WALRCV_STARTING;
walrcv->startTime = now;
walrcv->receivedUpto = recptr;
SpinLockRelease(&walrcv->mutex);
SendPostmasterSignal(PMSIGNAL_START_WALRECEIVER);
......@@ -260,3 +225,4 @@ GetWalRcvWriteRecPtr(void)
return recptr;
}
......@@ -5,7 +5,7 @@
*
* Portions Copyright (c) 2010-2010, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/include/replication/walreceiver.h,v 1.4 2010/01/20 18:54:27 heikki Exp $
* $PostgreSQL: pgsql/src/include/replication/walreceiver.h,v 1.5 2010/01/27 15:27:51 heikki Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -27,10 +27,10 @@
*/
typedef enum
{
WALRCV_NOT_STARTED,
WALRCV_RUNNING, /* walreceiver has been started */
WALRCV_STOPPING, /* requested to stop, but still running */
WALRCV_STOPPED /* stopped and mustn't start up again */
WALRCV_STOPPED, /* stopped and mustn't start up again */
WALRCV_STARTING, /* launched, but the process hasn't initialized yet */
WALRCV_RUNNING, /* walreceiver is running */
WALRCV_STOPPING /* requested to stop, but still running */
} WalRcvState;
/* Shared memory area for management of walreceiver process */
......@@ -47,6 +47,7 @@ typedef struct
*/
pid_t pid;
WalRcvState walRcvState;
pg_time_t startTime;
/*
* receivedUpto-1 is the last byte position that has been already
......@@ -74,6 +75,7 @@ extern PGDLLIMPORT walrcv_disconnect_type walrcv_disconnect;
extern void WalReceiverMain(void);
extern Size WalRcvShmemSize(void);
extern void WalRcvShmemInit(void);
extern void ShutdownWalRcv(void);
extern bool WalRcvInProgress(void);
extern XLogRecPtr WaitNextXLogAvailable(XLogRecPtr recptr, bool *finished);
extern void RequestXLogStreaming(XLogRecPtr recptr, const char *conninfo);
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/storage/pmsignal.h,v 1.28 2010/01/15 09:19:09 heikki Exp $
* $PostgreSQL: pgsql/src/include/storage/pmsignal.h,v 1.29 2010/01/27 15:27:51 heikki Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -30,7 +30,6 @@ typedef enum
PMSIGNAL_START_AUTOVAC_LAUNCHER, /* start an autovacuum launcher */
PMSIGNAL_START_AUTOVAC_WORKER, /* start an autovacuum worker */
PMSIGNAL_START_WALRECEIVER, /* start a walreceiver */
PMSIGNAL_SHUTDOWN_WALRECEIVER, /* shut down a walreceiver */
NUM_PMSIGNALS /* Must be last value of enum! */
} PMSignalReason;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment