Commit bbe3c02d authored by Tom Lane's avatar Tom Lane

Revise postmaster startup/shutdown logic to eliminate the problem that a

constant flow of new connection requests could prevent the postmaster from
completing a shutdown or crash restart.  This is done by labeling child
processes that are "dead ends", that is, we know that they were launched only
to tell a client that it can't connect.  These processes are managed
separately so that they don't confuse us into thinking that we can't advance
to the next stage of a shutdown or restart sequence, until the very end
where we must wait for them to drain out so we can delete the shmem segment.
Per discussion of a misbehavior reported by Keaton Adams.

Since this code was baroque already, and my first attempt at fixing the
problem made it entirely impenetrable, I took the opportunity to rewrite it
in a state-machine style.  That eliminates some duplicated code sections and
hopefully makes everything a bit clearer.
parent c556b29a
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.539 2007/08/04 03:15:49 tgl Exp $ * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.540 2007/08/09 01:18:43 tgl Exp $
* *
* NOTES * NOTES
* *
...@@ -131,22 +131,31 @@ ...@@ -131,22 +131,31 @@
* *
* "Special" children such as the startup, bgwriter and autovacuum launcher * "Special" children such as the startup, bgwriter and autovacuum launcher
* tasks are not in this list. Autovacuum worker processes are in it. * tasks are not in this list. Autovacuum worker processes are in it.
* Also, "dead_end" children are in it: these are children launched just
* for the purpose of sending a friendly rejection message to a would-be
* client. We must track them because they are attached to shared memory,
* but we know they will never become live backends.
*/ */
typedef struct bkend typedef struct bkend
{ {
pid_t pid; /* process id of backend */ pid_t pid; /* process id of backend */
long cancel_key; /* cancel key for cancels for this backend */ long cancel_key; /* cancel key for cancels for this backend */
bool is_autovacuum; /* is it an autovacuum process? */ bool is_autovacuum; /* is it an autovacuum process? */
bool dead_end; /* is it going to send an error and quit? */
} Backend; } Backend;
static Dllist *BackendList; static Dllist *BackendList;
#ifdef EXEC_BACKEND #ifdef EXEC_BACKEND
/* /*
* Number of entries in the backend table. Twice the number of backends, * Number of entries in the shared-memory backend table. This table is used
* plus five other subprocesses (stats, bgwriter, walwriter, autovac, logger). * only for sending cancels, and therefore only includes children we allow
* cancels on: regular backends and autovac workers. In particular we exclude
* dead_end children, allowing the table to have a known maximum size, to wit
* the same too-many-children limit enforced by canAcceptConnections().
*/ */
#define NUM_BACKENDARRAY_ELEMS (2*MaxBackends + 5) #define NUM_BACKENDARRAY_ELEMS (2*MaxBackends)
static Backend *ShmemBackendArray; static Backend *ShmemBackendArray;
#endif #endif
...@@ -180,7 +189,7 @@ static char ExtraOptions[MAXPGPATH]; ...@@ -180,7 +189,7 @@ static char ExtraOptions[MAXPGPATH];
* backend dumps core. Normally, it kills all peers of the dead backend * backend dumps core. Normally, it kills all peers of the dead backend
* and reinitializes shared memory. By specifying -s or -n, we can have * and reinitializes shared memory. By specifying -s or -n, we can have
* the postmaster stop (rather than kill) peers and not reinitialize * the postmaster stop (rather than kill) peers and not reinitialize
* shared data structures. * shared data structures. (Reinit is currently dead code, though.)
*/ */
static bool Reinit = true; static bool Reinit = true;
static int SendStop = false; static int SendStop = false;
...@@ -216,10 +225,45 @@ static int Shutdown = NoShutdown; ...@@ -216,10 +225,45 @@ static int Shutdown = NoShutdown;
static bool FatalError = false; /* T if recovering from backend crash */ static bool FatalError = false; /* T if recovering from backend crash */
/*
* We use a simple state machine to control startup, shutdown, and
* crash recovery (which is rather like shutdown followed by startup).
*
* Normal child backends can only be launched when we are in PM_RUN state.
* In other states we handle connection requests by launching "dead_end"
* child processes, which will simply send the client an error message and
* quit. (We track these in the BackendList so that we can know when they
* are all gone; this is important because they're still connected to shared
* memory, and would interfere with an attempt to destroy the shmem segment,
* possibly leading to SHMALL failure when we try to make a new one.)
* In PM_WAIT_DEAD_END state we are waiting for all the dead_end children
* to drain out of the system, and therefore stop accepting connection
* requests at all until the last existing child has quit (which hopefully
* will not be very long).
*
* Notice that this state variable does not distinguish *why* we entered
* PM_WAIT_BACKENDS or later states --- Shutdown and FatalError must be
* consulted to find that out. FatalError is never true in PM_RUN state, nor
* in PM_SHUTDOWN state (because we don't enter that state when trying to
* recover from a crash). It can be true in PM_STARTUP state, because we
* don't clear it until we've successfully recovered.
*/
typedef enum {
PM_INIT, /* postmaster starting */
PM_STARTUP, /* waiting for startup subprocess */
PM_RUN, /* normal "database is alive" state */
PM_WAIT_BACKENDS, /* waiting for live backends to exit */
PM_SHUTDOWN, /* waiting for bgwriter to do shutdown ckpt */
PM_WAIT_DEAD_END, /* waiting for dead_end children to exit */
PM_NO_CHILDREN /* all important children have exited */
} PMState;
static PMState pmState = PM_INIT;
bool ClientAuthInProgress = false; /* T during new-client bool ClientAuthInProgress = false; /* T during new-client
* authentication */ * authentication */
bool redirection_done = false; bool redirection_done = false; /* stderr redirected for syslogger? */
/* received START_AUTOVAC_LAUNCHER signal */ /* received START_AUTOVAC_LAUNCHER signal */
static volatile sig_atomic_t start_autovac_launcher = false; static volatile sig_atomic_t start_autovac_launcher = false;
...@@ -262,6 +306,7 @@ static void CleanupBackend(int pid, int exitstatus); ...@@ -262,6 +306,7 @@ static void CleanupBackend(int pid, int exitstatus);
static void HandleChildCrash(int pid, int exitstatus, const char *procname); static void HandleChildCrash(int pid, int exitstatus, const char *procname);
static void LogChildExit(int lev, const char *procname, static void LogChildExit(int lev, const char *procname,
int pid, int exitstatus); int pid, int exitstatus);
static void PostmasterStateMachine(void);
static void BackendInitialize(Port *port); static void BackendInitialize(Port *port);
static int BackendRun(Port *port); static int BackendRun(Port *port);
static void ExitPostmaster(int status); static void ExitPostmaster(int status);
...@@ -275,8 +320,9 @@ static enum CAC_state canAcceptConnections(void); ...@@ -275,8 +320,9 @@ static enum CAC_state canAcceptConnections(void);
static long PostmasterRandom(void); static long PostmasterRandom(void);
static void RandomSalt(char *cryptSalt, char *md5Salt); static void RandomSalt(char *cryptSalt, char *md5Salt);
static void signal_child(pid_t pid, int signal); static void signal_child(pid_t pid, int signal);
static void SignalChildren(int signal);
static void SignalSomeChildren(int signal, bool only_autovac); static void SignalSomeChildren(int signal, bool only_autovac);
#define SignalChildren(sig) SignalSomeChildren(sig, false)
#define SignalAutovacWorkers(sig) SignalSomeChildren(sig, true)
static int CountChildren(void); static int CountChildren(void);
static bool CreateOptsFile(int argc, char *argv[], char *fullprogname); static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
static pid_t StartChildProcess(AuxProcType type); static pid_t StartChildProcess(AuxProcType type);
...@@ -888,6 +934,7 @@ PostmasterMain(int argc, char *argv[]) ...@@ -888,6 +934,7 @@ PostmasterMain(int argc, char *argv[])
ExitPostmaster(1); ExitPostmaster(1);
#ifdef EXEC_BACKEND #ifdef EXEC_BACKEND
/* Write out nondefault GUC settings for child processes to use */
write_nondefault_variables(PGC_POSTMASTER); write_nondefault_variables(PGC_POSTMASTER);
#endif #endif
...@@ -974,6 +1021,8 @@ PostmasterMain(int argc, char *argv[]) ...@@ -974,6 +1021,8 @@ PostmasterMain(int argc, char *argv[])
* We're ready to rock and roll... * We're ready to rock and roll...
*/ */
StartupPID = StartupDataBase(); StartupPID = StartupDataBase();
Assert(StartupPID != 0);
pmState = PM_STARTUP;
status = ServerLoop(); status = ServerLoop();
...@@ -1078,7 +1127,6 @@ checkDataDir(void) ...@@ -1078,7 +1127,6 @@ checkDataDir(void)
static void static void
reg_reply(DNSServiceRegistrationReplyErrorType errorCode, void *context) reg_reply(DNSServiceRegistrationReplyErrorType errorCode, void *context)
{ {
} }
#endif /* USE_BONJOUR */ #endif /* USE_BONJOUR */
...@@ -1110,7 +1158,8 @@ pmdaemonize(void) ...@@ -1110,7 +1158,8 @@ pmdaemonize(void)
MyStartTime = time(NULL); MyStartTime = time(NULL);
/* GH: If there's no setsid(), we hopefully don't need silent mode. /*
* GH: If there's no setsid(), we hopefully don't need silent mode.
* Until there's a better solution. * Until there's a better solution.
*/ */
#ifdef HAVE_SETSID #ifdef HAVE_SETSID
...@@ -1150,26 +1199,38 @@ ServerLoop(void) ...@@ -1150,26 +1199,38 @@ ServerLoop(void)
for (;;) for (;;)
{ {
Port *port;
fd_set rmask; fd_set rmask;
struct timeval timeout;
int selres; int selres;
int i;
/* /*
* Wait for something to happen. * Wait for a connection request to arrive.
* *
* We wait at most one minute, to ensure that the other background * We wait at most one minute, to ensure that the other background
* tasks handled below get done even when no requests are arriving. * tasks handled below get done even when no requests are arriving.
*
* If we are in PM_WAIT_DEAD_END state, then we don't want to
* accept any new connections, so we don't call select() at all;
* just sleep for a little bit with signals unblocked.
*/ */
memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set)); memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
PG_SETMASK(&UnBlockSig);
if (pmState == PM_WAIT_DEAD_END)
{
pg_usleep(100000L); /* 100 msec seems reasonable */
selres = 0;
}
else
{
/* must set timeout each time; some OSes change it! */
struct timeval timeout;
timeout.tv_sec = 60; timeout.tv_sec = 60;
timeout.tv_usec = 0; timeout.tv_usec = 0;
PG_SETMASK(&UnBlockSig);
selres = select(nSockets, &rmask, NULL, NULL, &timeout); selres = select(nSockets, &rmask, NULL, NULL, &timeout);
}
/* /*
* Block all signals until we wait again. (This makes it safe for our * Block all signals until we wait again. (This makes it safe for our
...@@ -1177,6 +1238,7 @@ ServerLoop(void) ...@@ -1177,6 +1238,7 @@ ServerLoop(void)
*/ */
PG_SETMASK(&BlockSig); PG_SETMASK(&BlockSig);
/* Now check the select() result */
if (selres < 0) if (selres < 0)
{ {
if (errno != EINTR && errno != EWOULDBLOCK) if (errno != EINTR && errno != EWOULDBLOCK)
...@@ -1194,12 +1256,16 @@ ServerLoop(void) ...@@ -1194,12 +1256,16 @@ ServerLoop(void)
*/ */
if (selres > 0) if (selres > 0)
{ {
int i;
for (i = 0; i < MAXLISTEN; i++) for (i = 0; i < MAXLISTEN; i++)
{ {
if (ListenSocket[i] == -1) if (ListenSocket[i] == -1)
break; break;
if (FD_ISSET(ListenSocket[i], &rmask)) if (FD_ISSET(ListenSocket[i], &rmask))
{ {
Port *port;
port = ConnCreate(ListenSocket[i]); port = ConnCreate(ListenSocket[i]);
if (port) if (port)
{ {
...@@ -1225,27 +1291,20 @@ ServerLoop(void) ...@@ -1225,27 +1291,20 @@ ServerLoop(void)
* state that prevents it, start one. It doesn't matter if this * state that prevents it, start one. It doesn't matter if this
* fails, we'll just try again later. * fails, we'll just try again later.
*/ */
if (BgWriterPID == 0 && StartupPID == 0 && !FatalError) if (BgWriterPID == 0 && pmState == PM_RUN)
{
BgWriterPID = StartBackgroundWriter(); BgWriterPID = StartBackgroundWriter();
/* If shutdown is pending, set it going */
if (Shutdown > NoShutdown && BgWriterPID != 0)
signal_child(BgWriterPID, SIGUSR2);
}
/* /*
* Likewise, if we have lost the walwriter process, try to start a * Likewise, if we have lost the walwriter process, try to start a
* new one. We don't need walwriter to complete a shutdown, so * new one.
* don't start it if shutdown already initiated.
*/ */
if (WalWriterPID == 0 && if (WalWriterPID == 0 && pmState == PM_RUN)
StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
WalWriterPID = StartWalWriter(); WalWriterPID = StartWalWriter();
/* If we have lost the autovacuum launcher, try to start a new one */ /* If we have lost the autovacuum launcher, try to start a new one */
if (AutoVacPID == 0 && if (AutoVacPID == 0 &&
(AutoVacuumingActive() || start_autovac_launcher) && (AutoVacuumingActive() || start_autovac_launcher) &&
StartupPID == 0 && !FatalError && Shutdown == NoShutdown) pmState == PM_RUN)
{ {
AutoVacPID = StartAutoVacLauncher(); AutoVacPID = StartAutoVacLauncher();
if (AutoVacPID != 0) if (AutoVacPID != 0)
...@@ -1253,13 +1312,11 @@ ServerLoop(void) ...@@ -1253,13 +1312,11 @@ ServerLoop(void)
} }
/* If we have lost the archiver, try to start a new one */ /* If we have lost the archiver, try to start a new one */
if (XLogArchivingActive() && PgArchPID == 0 && if (XLogArchivingActive() && PgArchPID == 0 && pmState == PM_RUN)
StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
PgArchPID = pgarch_start(); PgArchPID = pgarch_start();
/* If we have lost the stats collector, try to start a new one */ /* If we have lost the stats collector, try to start a new one */
if (PgStatPID == 0 && if (PgStatPID == 0 && pmState == PM_RUN)
StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
PgStatPID = pgstat_start(); PgStatPID = pgstat_start();
/* /*
...@@ -1285,7 +1342,7 @@ ServerLoop(void) ...@@ -1285,7 +1342,7 @@ ServerLoop(void)
static int static int
initMasks(fd_set *rmask) initMasks(fd_set *rmask)
{ {
int nsocks = -1; int maxsock = -1;
int i; int i;
FD_ZERO(rmask); FD_ZERO(rmask);
...@@ -1297,16 +1354,16 @@ initMasks(fd_set *rmask) ...@@ -1297,16 +1354,16 @@ initMasks(fd_set *rmask)
if (fd == -1) if (fd == -1)
break; break;
FD_SET(fd, rmask); FD_SET(fd, rmask);
if (fd > nsocks) if (fd > maxsock)
nsocks = fd; maxsock = fd;
} }
return nsocks + 1; return maxsock + 1;
} }
/* /*
* Read the startup packet and do something according to it. * Read a client's startup packet and do something according to it.
* *
* Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and * Returns STATUS_OK or STATUS_ERROR, or might call ereport(FATAL) and
* not return at all. * not return at all.
...@@ -1594,7 +1651,6 @@ retry1: ...@@ -1594,7 +1651,6 @@ retry1:
errmsg("sorry, too many clients already"))); errmsg("sorry, too many clients already")));
break; break;
case CAC_OK: case CAC_OK:
default:
break; break;
} }
...@@ -1670,12 +1726,14 @@ static enum CAC_state ...@@ -1670,12 +1726,14 @@ static enum CAC_state
canAcceptConnections(void) canAcceptConnections(void)
{ {
/* Can't start backends when in startup/shutdown/recovery state. */ /* Can't start backends when in startup/shutdown/recovery state. */
if (pmState != PM_RUN)
{
if (Shutdown > NoShutdown) if (Shutdown > NoShutdown)
return CAC_SHUTDOWN; return CAC_SHUTDOWN; /* shutdown is pending */
if (StartupPID) if (pmState == PM_STARTUP && !FatalError)
return CAC_STARTUP; return CAC_STARTUP; /* normal startup */
if (FatalError) return CAC_RECOVERY; /* else must be crash recovery */
return CAC_RECOVERY; }
/* /*
* Don't start too many children. * Don't start too many children.
...@@ -1685,6 +1743,9 @@ canAcceptConnections(void) ...@@ -1685,6 +1743,9 @@ canAcceptConnections(void)
* backend might exit before the auth cycle is completed. The exact * backend might exit before the auth cycle is completed. The exact
* MaxBackends limit is enforced when a new backend tries to join the * MaxBackends limit is enforced when a new backend tries to join the
* shared-inval backend array. * shared-inval backend array.
*
* In the EXEC_BACKEND case, the limit here must match the size of the
* ShmemBackendArray, since all these processes will have cancel codes.
*/ */
if (CountChildren() >= 2 * MaxBackends) if (CountChildren() >= 2 * MaxBackends)
return CAC_TOOMANY; return CAC_TOOMANY;
...@@ -1895,36 +1956,24 @@ pmdie(SIGNAL_ARGS) ...@@ -1895,36 +1956,24 @@ pmdie(SIGNAL_ARGS)
ereport(LOG, ereport(LOG,
(errmsg("received smart shutdown request"))); (errmsg("received smart shutdown request")));
if (pmState == PM_RUN)
{
/* autovacuum workers are told to shut down immediately */ /* autovacuum workers are told to shut down immediately */
if (DLGetHead(BackendList)) SignalAutovacWorkers(SIGTERM);
SignalSomeChildren(SIGTERM, true);
/* and the autovac launcher too */ /* and the autovac launcher too */
if (AutoVacPID != 0) if (AutoVacPID != 0)
signal_child(AutoVacPID, SIGTERM); signal_child(AutoVacPID, SIGTERM);
/* and the walwriter too */ /* and the walwriter too */
if (WalWriterPID != 0) if (WalWriterPID != 0)
signal_child(WalWriterPID, SIGTERM); signal_child(WalWriterPID, SIGTERM);
pmState = PM_WAIT_BACKENDS;
if (DLGetHead(BackendList) || AutoVacPID != 0 || WalWriterPID != 0) }
break; /* let reaper() handle this */
/* /*
* No children left. Begin shutdown of data base system. * Now wait for backends to exit. If there are none,
* PostmasterStateMachine will take the next step.
*/ */
if (StartupPID != 0 || FatalError) PostmasterStateMachine();
break; /* let reaper() handle this */
/* Start the bgwriter if not running */
if (BgWriterPID == 0)
BgWriterPID = StartBackgroundWriter();
/* And tell it to shut down */
if (BgWriterPID != 0)
signal_child(BgWriterPID, SIGUSR2);
/* Tell pgarch to shut down too; nothing left for it to do */
if (PgArchPID != 0)
signal_child(PgArchPID, SIGQUIT);
/* Tell pgstat to shut down too; nothing left for it to do */
if (PgStatPID != 0)
signal_child(PgStatPID, SIGQUIT);
break; break;
case SIGINT: case SIGINT:
...@@ -1941,48 +1990,28 @@ pmdie(SIGNAL_ARGS) ...@@ -1941,48 +1990,28 @@ pmdie(SIGNAL_ARGS)
ereport(LOG, ereport(LOG,
(errmsg("received fast shutdown request"))); (errmsg("received fast shutdown request")));
if (DLGetHead(BackendList) || AutoVacPID != 0 || WalWriterPID != 0) if (StartupPID != 0)
{ signal_child(StartupPID, SIGTERM);
if (!FatalError) if (pmState == PM_RUN)
{ {
ereport(LOG, ereport(LOG,
(errmsg("aborting any active transactions"))); (errmsg("aborting any active transactions")));
/* shut down all backends and autovac workers */
SignalChildren(SIGTERM); SignalChildren(SIGTERM);
/* and the autovac launcher too */
if (AutoVacPID != 0) if (AutoVacPID != 0)
signal_child(AutoVacPID, SIGTERM); signal_child(AutoVacPID, SIGTERM);
/* and the walwriter too */
if (WalWriterPID != 0) if (WalWriterPID != 0)
signal_child(WalWriterPID, SIGTERM); signal_child(WalWriterPID, SIGTERM);
/* reaper() does the rest */ pmState = PM_WAIT_BACKENDS;
}
break;
} }
/* /*
* No children left. Begin shutdown of data base system. * Now wait for backends to exit. If there are none,
* * PostmasterStateMachine will take the next step.
* Note: if we previously got SIGTERM then we may send SIGUSR2 to
* the bgwriter a second time here. This should be harmless.
* Ditto for the signals to the other special children.
*/ */
if (StartupPID != 0) PostmasterStateMachine();
{
signal_child(StartupPID, SIGTERM);
break; /* let reaper() do the rest */
}
if (FatalError)
break; /* let reaper() handle this case */
/* Start the bgwriter if not running */
if (BgWriterPID == 0)
BgWriterPID = StartBackgroundWriter();
/* And tell it to shut down */
if (BgWriterPID != 0)
signal_child(BgWriterPID, SIGUSR2);
/* Tell pgarch to shut down too; nothing left for it to do */
if (PgArchPID != 0)
signal_child(PgArchPID, SIGQUIT);
/* Tell pgstat to shut down too; nothing left for it to do */
if (PgStatPID != 0)
signal_child(PgStatPID, SIGQUIT);
break; break;
case SIGQUIT: case SIGQUIT:
...@@ -1995,6 +2024,7 @@ pmdie(SIGNAL_ARGS) ...@@ -1995,6 +2024,7 @@ pmdie(SIGNAL_ARGS)
*/ */
ereport(LOG, ereport(LOG,
(errmsg("received immediate shutdown request"))); (errmsg("received immediate shutdown request")));
SignalChildren(SIGQUIT);
if (StartupPID != 0) if (StartupPID != 0)
signal_child(StartupPID, SIGQUIT); signal_child(StartupPID, SIGQUIT);
if (BgWriterPID != 0) if (BgWriterPID != 0)
...@@ -2007,8 +2037,6 @@ pmdie(SIGNAL_ARGS) ...@@ -2007,8 +2037,6 @@ pmdie(SIGNAL_ARGS)
signal_child(PgArchPID, SIGQUIT); signal_child(PgArchPID, SIGQUIT);
if (PgStatPID != 0) if (PgStatPID != 0)
signal_child(PgStatPID, SIGQUIT); signal_child(PgStatPID, SIGQUIT);
if (DLGetHead(BackendList))
SignalChildren(SIGQUIT);
ExitPostmaster(0); ExitPostmaster(0);
break; break;
} }
...@@ -2019,55 +2047,54 @@ pmdie(SIGNAL_ARGS) ...@@ -2019,55 +2047,54 @@ pmdie(SIGNAL_ARGS)
} }
/* /*
* Reaper -- signal handler to cleanup after a backend (child) dies. * Reaper -- signal handler to cleanup after a child process dies.
*/ */
static void static void
reaper(SIGNAL_ARGS) reaper(SIGNAL_ARGS)
{ {
int save_errno = errno; int save_errno = errno;
int pid; /* process id of dead child process */
int exitstatus; /* its exit status */
/* These macros hide platform variations in getting child status */
#ifdef HAVE_WAITPID #ifdef HAVE_WAITPID
int status; /* backend exit status */ int status; /* child exit status */
#else #define LOOPTEST() ((pid = waitpid(-1, &status, WNOHANG)) > 0)
#define LOOPHEADER() (exitstatus = status)
#else /* !HAVE_WAITPID */
#ifndef WIN32 #ifndef WIN32
union wait status; /* backend exit status */ union wait status; /* child exit status */
#endif #define LOOPTEST() ((pid = wait3(&status, WNOHANG, NULL)) > 0)
#endif #define LOOPHEADER() (exitstatus = status.w_status)
int exitstatus; #else /* WIN32 */
int pid; /* process id of dead backend */ #define LOOPTEST() ((pid = win32_waitpid(&exitstatus)) > 0)
PG_SETMASK(&BlockSig);
ereport(DEBUG4,
(errmsg_internal("reaping dead processes")));
#ifdef HAVE_WAITPID
while ((pid = waitpid(-1, &status, WNOHANG)) > 0)
{
exitstatus = status;
#else
#ifndef WIN32
while ((pid = wait3(&status, WNOHANG, NULL)) > 0)
{
exitstatus = status.w_status;
#else
while ((pid = win32_waitpid(&exitstatus)) > 0)
{
/* /*
* We need to do this here, and not in CleanupBackend, since this is * We need to do this here, and not in CleanupBackend, since this is
* to be called on all children when we are done with them. Could move * to be called on all children when we are done with them. Could move
* to LogChildExit, but that seems like asking for future trouble... * to LogChildExit, but that seems like asking for future trouble...
*/ */
win32_RemoveChild(pid); #define LOOPHEADER() (win32_RemoveChild(pid))
#endif /* WIN32 */ #endif /* WIN32 */
#endif /* HAVE_WAITPID */ #endif /* HAVE_WAITPID */
PG_SETMASK(&BlockSig);
ereport(DEBUG4,
(errmsg_internal("reaping dead processes")));
while (LOOPTEST())
{
LOOPHEADER();
/* /*
* Check if this child was a startup process. * Check if this child was a startup process.
*/ */
if (StartupPID != 0 && pid == StartupPID) if (pid == StartupPID)
{ {
StartupPID = 0; StartupPID = 0;
/* Note: FATAL exit of startup is treated as catastrophic */ Assert(pmState == PM_STARTUP);
/* FATAL exit of startup is treated as catastrophic */
if (!EXIT_STATUS_0(exitstatus)) if (!EXIT_STATUS_0(exitstatus))
{ {
LogChildExit(LOG, _("startup process"), LogChildExit(LOG, _("startup process"),
...@@ -2083,6 +2110,21 @@ reaper(SIGNAL_ARGS) ...@@ -2083,6 +2110,21 @@ reaper(SIGNAL_ARGS)
*/ */
FatalError = false; FatalError = false;
/*
* Go to shutdown mode if a shutdown request was pending.
*/
if (Shutdown > NoShutdown)
{
pmState = PM_WAIT_BACKENDS;
/* PostmasterStateMachine logic does the rest */
continue;
}
/*
* Otherwise, commence normal operations.
*/
pmState = PM_RUN;
/* /*
* Load the flat authorization file into postmaster's cache. The * Load the flat authorization file into postmaster's cache. The
* startup process has recomputed this from the database contents, * startup process has recomputed this from the database contents,
...@@ -2098,26 +2140,21 @@ reaper(SIGNAL_ARGS) ...@@ -2098,26 +2140,21 @@ reaper(SIGNAL_ARGS)
BgWriterPID = StartBackgroundWriter(); BgWriterPID = StartBackgroundWriter();
/* /*
* Go to shutdown mode if a shutdown request was pending. * Likewise, start other special children as needed. In a restart
* Otherwise, try to start the other special children. * situation, some of them may be alive already.
*/ */
if (Shutdown > NoShutdown && BgWriterPID != 0)
signal_child(BgWriterPID, SIGUSR2);
else if (Shutdown == NoShutdown)
{
if (WalWriterPID == 0) if (WalWriterPID == 0)
WalWriterPID = StartWalWriter(); WalWriterPID = StartWalWriter();
if (AutoVacuumingActive() && AutoVacPID == 0)
AutoVacPID = StartAutoVacLauncher();
if (XLogArchivingActive() && PgArchPID == 0) if (XLogArchivingActive() && PgArchPID == 0)
PgArchPID = pgarch_start(); PgArchPID = pgarch_start();
if (PgStatPID == 0) if (PgStatPID == 0)
PgStatPID = pgstat_start(); PgStatPID = pgstat_start();
if (AutoVacuumingActive() && AutoVacPID == 0)
AutoVacPID = StartAutoVacLauncher();
/* at this point we are really open for business */ /* at this point we are really open for business */
ereport(LOG, ereport(LOG,
(errmsg("database system is ready to accept connections"))); (errmsg("database system is ready to accept connections")));
}
continue; continue;
} }
...@@ -2125,50 +2162,34 @@ reaper(SIGNAL_ARGS) ...@@ -2125,50 +2162,34 @@ reaper(SIGNAL_ARGS)
/* /*
* Was it the bgwriter? * Was it the bgwriter?
*/ */
if (BgWriterPID != 0 && pid == BgWriterPID) if (pid == BgWriterPID)
{ {
BgWriterPID = 0; BgWriterPID = 0;
if (EXIT_STATUS_0(exitstatus) && if (EXIT_STATUS_0(exitstatus) && pmState == PM_SHUTDOWN)
Shutdown > NoShutdown && !FatalError &&
!DLGetHead(BackendList) &&
WalWriterPID == 0 && AutoVacPID == 0)
{ {
/* /*
* Normal postmaster exit is here: we've seen normal exit of * OK, we saw normal exit of the bgwriter after it's been
* the bgwriter after it's been told to shut down. We expect * told to shut down. We expect that it wrote a shutdown
* that it wrote a shutdown checkpoint. (If for some reason * checkpoint. (If for some reason it didn't, recovery will
* it didn't, recovery will occur on next postmaster start.) * occur on next postmaster start.)
* *
* Note: we do not wait around for exit of the archiver or * At this point we should have no normal children left
* stats processes. They've been sent SIGQUIT by this point, * (else we'd not be in PM_SHUTDOWN state) but we might have
* and in any case contain logic to commit hara-kiri if they * dead_end children.
* notice the postmaster is gone.
*/ */
ExitPostmaster(0); Assert(Shutdown > NoShutdown);
pmState = PM_WAIT_DEAD_END;
} }
else
{
/* /*
* Any unexpected exit of the bgwriter (including FATAL exit) * Any unexpected exit of the bgwriter (including FATAL exit)
* is treated as a crash. * is treated as a crash.
*/ */
HandleChildCrash(pid, exitstatus, HandleChildCrash(pid, exitstatus,
_("background writer process")); _("background writer process"));
/*
* If the bgwriter crashed while trying to write the shutdown
* checkpoint, we may as well just stop here; any recovery
* required will happen on next postmaster start.
*/
if (Shutdown > NoShutdown &&
!DLGetHead(BackendList) &&
WalWriterPID == 0 && AutoVacPID == 0)
{
ereport(LOG,
(errmsg("abnormal database system shutdown")));
ExitPostmaster(1);
} }
/* Else, proceed as in normal crash recovery */
continue; continue;
} }
...@@ -2177,7 +2198,7 @@ reaper(SIGNAL_ARGS) ...@@ -2177,7 +2198,7 @@ reaper(SIGNAL_ARGS)
* start a new one at the next iteration of the postmaster's main loop, * start a new one at the next iteration of the postmaster's main loop,
* if necessary. Any other exit condition is treated as a crash. * if necessary. Any other exit condition is treated as a crash.
*/ */
if (WalWriterPID != 0 && pid == WalWriterPID) if (pid == WalWriterPID)
{ {
WalWriterPID = 0; WalWriterPID = 0;
if (!EXIT_STATUS_0(exitstatus)) if (!EXIT_STATUS_0(exitstatus))
...@@ -2191,7 +2212,7 @@ reaper(SIGNAL_ARGS) ...@@ -2191,7 +2212,7 @@ reaper(SIGNAL_ARGS)
* start a new one at the next iteration of the postmaster's main loop, * start a new one at the next iteration of the postmaster's main loop,
* if necessary. Any other exit condition is treated as a crash. * if necessary. Any other exit condition is treated as a crash.
*/ */
if (AutoVacPID != 0 && pid == AutoVacPID) if (pid == AutoVacPID)
{ {
AutoVacPID = 0; AutoVacPID = 0;
if (!EXIT_STATUS_0(exitstatus)) if (!EXIT_STATUS_0(exitstatus))
...@@ -2205,14 +2226,13 @@ reaper(SIGNAL_ARGS) ...@@ -2205,14 +2226,13 @@ reaper(SIGNAL_ARGS)
* to force reset of the rest of the system. (If fail, we'll try * to force reset of the rest of the system. (If fail, we'll try
* again in future cycles of the main loop.) * again in future cycles of the main loop.)
*/ */
if (PgArchPID != 0 && pid == PgArchPID) if (pid == PgArchPID)
{ {
PgArchPID = 0; PgArchPID = 0;
if (!EXIT_STATUS_0(exitstatus)) if (!EXIT_STATUS_0(exitstatus))
LogChildExit(LOG, _("archiver process"), LogChildExit(LOG, _("archiver process"),
pid, exitstatus); pid, exitstatus);
if (XLogArchivingActive() && if (XLogArchivingActive() && pmState == PM_RUN)
StartupPID == 0 && !FatalError && Shutdown == NoShutdown)
PgArchPID = pgarch_start(); PgArchPID = pgarch_start();
continue; continue;
} }
...@@ -2222,19 +2242,19 @@ reaper(SIGNAL_ARGS) ...@@ -2222,19 +2242,19 @@ reaper(SIGNAL_ARGS)
* one; no need to force reset of the rest of the system. (If fail, * one; no need to force reset of the rest of the system. (If fail,
* we'll try again in future cycles of the main loop.) * we'll try again in future cycles of the main loop.)
*/ */
if (PgStatPID != 0 && pid == PgStatPID) if (pid == PgStatPID)
{ {
PgStatPID = 0; PgStatPID = 0;
if (!EXIT_STATUS_0(exitstatus)) if (!EXIT_STATUS_0(exitstatus))
LogChildExit(LOG, _("statistics collector process"), LogChildExit(LOG, _("statistics collector process"),
pid, exitstatus); pid, exitstatus);
if (StartupPID == 0 && !FatalError && Shutdown == NoShutdown) if (pmState == PM_RUN)
PgStatPID = pgstat_start(); PgStatPID = pgstat_start();
continue; continue;
} }
/* Was it the system logger? try to start a new one */ /* Was it the system logger? If so, try to start a new one */
if (SysLoggerPID != 0 && pid == SysLoggerPID) if (pid == SysLoggerPID)
{ {
SysLoggerPID = 0; SysLoggerPID = 0;
/* for safety's sake, launch new logger *first* */ /* for safety's sake, launch new logger *first* */
...@@ -2251,48 +2271,13 @@ reaper(SIGNAL_ARGS) ...@@ -2251,48 +2271,13 @@ reaper(SIGNAL_ARGS)
CleanupBackend(pid, exitstatus); CleanupBackend(pid, exitstatus);
} /* loop over pending child-death reports */ } /* loop over pending child-death reports */
if (FatalError)
{
/* /*
* Wait for all important children to exit, then reset shmem and * After cleaning out the SIGCHLD queue, see if we have any state changes
* StartupDataBase. (We can ignore the archiver and stats processes * or actions to make.
* here since they are not connected to shmem.)
*/ */
if (DLGetHead(BackendList) || StartupPID != 0 || PostmasterStateMachine();
BgWriterPID != 0 || WalWriterPID != 0 ||
AutoVacPID != 0)
goto reaper_done;
ereport(LOG,
(errmsg("all server processes terminated; reinitializing")));
shmem_exit(0);
reset_shared(PostPortNumber);
StartupPID = StartupDataBase();
goto reaper_done;
}
if (Shutdown > NoShutdown) /* Done with signal handler */
{
if (DLGetHead(BackendList) || StartupPID != 0 || AutoVacPID != 0 ||
WalWriterPID != 0)
goto reaper_done;
/* Start the bgwriter if not running */
if (BgWriterPID == 0)
BgWriterPID = StartBackgroundWriter();
/* And tell it to shut down */
if (BgWriterPID != 0)
signal_child(BgWriterPID, SIGUSR2);
/* Tell pgarch to shut down too; nothing left for it to do */
if (PgArchPID != 0)
signal_child(PgArchPID, SIGQUIT);
/* Tell pgstat to shut down too; nothing left for it to do */
if (PgStatPID != 0)
signal_child(PgStatPID, SIGQUIT);
}
reaper_done:
PG_SETMASK(&UnBlockSig); PG_SETMASK(&UnBlockSig);
errno = save_errno; errno = save_errno;
...@@ -2330,12 +2315,13 @@ CleanupBackend(int pid, ...@@ -2330,12 +2315,13 @@ CleanupBackend(int pid,
if (bp->pid == pid) if (bp->pid == pid)
{ {
DLRemove(curr);
free(bp);
DLFreeElem(curr);
#ifdef EXEC_BACKEND #ifdef EXEC_BACKEND
if (!bp->dead_end)
ShmemBackendArrayRemove(pid); ShmemBackendArrayRemove(pid);
#endif #endif
DLRemove(curr);
free(bp);
DLFreeElem(curr);
break; break;
} }
} }
...@@ -2376,12 +2362,13 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) ...@@ -2376,12 +2362,13 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
/* /*
* Found entry for freshly-dead backend, so remove it. * Found entry for freshly-dead backend, so remove it.
*/ */
DLRemove(curr);
free(bp);
DLFreeElem(curr);
#ifdef EXEC_BACKEND #ifdef EXEC_BACKEND
if (!bp->dead_end)
ShmemBackendArrayRemove(pid); ShmemBackendArrayRemove(pid);
#endif #endif
DLRemove(curr);
free(bp);
DLFreeElem(curr);
/* Keep looping so we can signal remaining backends */ /* Keep looping so we can signal remaining backends */
} }
else else
...@@ -2394,6 +2381,9 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) ...@@ -2394,6 +2381,9 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
* and let the user know what's going on. But if SendStop is set * and let the user know what's going on. But if SendStop is set
* (-s on command line), then we send SIGSTOP instead, so that we * (-s on command line), then we send SIGSTOP instead, so that we
* can get core dumps from all backends by hand. * can get core dumps from all backends by hand.
*
* We could exclude dead_end children here, but at least in the
* SIGSTOP case it seems better to include them.
*/ */
if (!FatalError) if (!FatalError)
{ {
...@@ -2442,8 +2432,12 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) ...@@ -2442,8 +2432,12 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
signal_child(AutoVacPID, (SendStop ? SIGSTOP : SIGQUIT)); signal_child(AutoVacPID, (SendStop ? SIGSTOP : SIGQUIT));
} }
/* Force a power-cycle of the pgarch process too */ /*
/* (Shouldn't be necessary, but just for luck) */ * Force a power-cycle of the pgarch process too. (This isn't absolutely
* necessary, but it seems like a good idea for robustness, and it
* simplifies the state-machine logic in the case where a shutdown
* request arrives during crash processing.)
*/
if (PgArchPID != 0 && !FatalError) if (PgArchPID != 0 && !FatalError)
{ {
ereport(DEBUG2, ereport(DEBUG2,
...@@ -2453,8 +2447,12 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) ...@@ -2453,8 +2447,12 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
signal_child(PgArchPID, SIGQUIT); signal_child(PgArchPID, SIGQUIT);
} }
/* Force a power-cycle of the pgstat process too */ /*
/* (Shouldn't be necessary, but just for luck) */ * Force a power-cycle of the pgstat process too. (This isn't absolutely
* necessary, but it seems like a good idea for robustness, and it
* simplifies the state-machine logic in the case where a shutdown
* request arrives during crash processing.)
*/
if (PgStatPID != 0 && !FatalError) if (PgStatPID != 0 && !FatalError)
{ {
ereport(DEBUG2, ereport(DEBUG2,
...@@ -2468,6 +2466,9 @@ HandleChildCrash(int pid, int exitstatus, const char *procname) ...@@ -2468,6 +2466,9 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
/* We do NOT restart the syslogger */ /* We do NOT restart the syslogger */
FatalError = true; FatalError = true;
/* We now transit into a state of waiting for children to die */
if (pmState == PM_RUN || pmState == PM_SHUTDOWN)
pmState = PM_WAIT_BACKENDS;
} }
/* /*
...@@ -2523,6 +2524,151 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus) ...@@ -2523,6 +2524,151 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus)
procname, pid, exitstatus))); procname, pid, exitstatus)));
} }
/*
* Advance the postmaster's state machine and take actions as appropriate
*
* This is common code for pmdie() and reaper(), which receive the signals
* that might mean we need to change state.
*/
static void
PostmasterStateMachine(void)
{
/*
* If we are in a state-machine state that implies waiting for backends
* to exit, see if they're all gone, and change state if so.
*/
if (pmState == PM_WAIT_BACKENDS)
{
/*
* PM_WAIT_BACKENDS state ends when we have no regular backends
* (including autovac workers) and no walwriter or autovac launcher.
* If we are doing crash recovery then we expect the bgwriter to
* exit too, otherwise not. The archiver, stats, and syslogger
* processes are disregarded since they are not connected to shared
* memory; we also disregard dead_end children here.
*/
if (CountChildren() == 0 &&
StartupPID == 0 &&
(BgWriterPID == 0 || !FatalError) &&
WalWriterPID == 0 &&
AutoVacPID == 0)
{
if (FatalError)
{
/*
* Start waiting for dead_end children to die. This state
* change causes ServerLoop to stop creating new ones.
*/
pmState = PM_WAIT_DEAD_END;
}
else
{
/*
* If we get here, we are proceeding with normal shutdown.
* All the regular children are gone, and it's time to tell
* the bgwriter to do a shutdown checkpoint.
*/
Assert(Shutdown > NoShutdown);
/* Start the bgwriter if not running */
if (BgWriterPID == 0)
BgWriterPID = StartBackgroundWriter();
/* And tell it to shut down */
if (BgWriterPID != 0)
{
signal_child(BgWriterPID, SIGUSR2);
pmState = PM_SHUTDOWN;
}
else
{
/*
* If we failed to fork a bgwriter, just shut down.
* Any required cleanup will happen at next restart.
* We set FatalError so that an "abnormal shutdown"
* message gets logged when we exit.
*/
FatalError = true;
pmState = PM_WAIT_DEAD_END;
}
/* Tell pgarch to shut down too; nothing left for it to do */
if (PgArchPID != 0)
signal_child(PgArchPID, SIGQUIT);
/* Tell pgstat to shut down too; nothing left for it to do */
if (PgStatPID != 0)
signal_child(PgStatPID, SIGQUIT);
}
}
}
if (pmState == PM_WAIT_DEAD_END)
{
/*
* PM_WAIT_DEAD_END state ends when the BackendList is entirely
* empty (ie, no dead_end children remain).
*/
if (!DLGetHead(BackendList))
{
/* These other guys should be dead already */
Assert(StartupPID == 0);
Assert(BgWriterPID == 0);
Assert(WalWriterPID == 0);
Assert(AutoVacPID == 0);
/* archiver, stats, and syslogger are not considered here */
pmState = PM_NO_CHILDREN;
}
}
/*
* If we've been told to shut down, we exit as soon as there are no
* remaining children. If there was a crash, cleanup will occur at the
* next startup. (Before PostgreSQL 8.3, we tried to recover from the
* crash before exiting, but that seems unwise if we are quitting because
* we got SIGTERM from init --- there may well not be time for recovery
* before init decides to SIGKILL us.)
*
* Note: we do not wait around for exit of the archiver or stats
* processes. They've been sent SIGQUIT by this point (either when we
* entered PM_SHUTDOWN state, or when we set FatalError, and at least one
* of those must have happened by now). In any case they contain logic to
* commit hara-kiri if they notice the postmaster is gone. Since they
* aren't connected to shared memory, they pose no problem for shutdown.
* The syslogger is not considered either, since it's intended to survive
* till the postmaster exits.
*/
if (Shutdown > NoShutdown && pmState == PM_NO_CHILDREN)
{
if (FatalError)
{
ereport(LOG, (errmsg("abnormal database system shutdown")));
ExitPostmaster(1);
}
else
{
/* Normal exit from the postmaster is here */
ExitPostmaster(0);
}
}
/*
* If we need to recover from a crash, wait for all shmem-connected
* children to exit, then reset shmem and StartupDataBase. (We can ignore
* the archiver and stats processes here since they are not connected to
* shmem.)
*/
if (FatalError && pmState == PM_NO_CHILDREN)
{
ereport(LOG,
(errmsg("all server processes terminated; reinitializing")));
shmem_exit(0);
reset_shared(PostPortNumber);
StartupPID = StartupDataBase();
Assert(StartupPID != 0);
pmState = PM_STARTUP;
}
}
/* /*
* Send a signal to a postmaster child process * Send a signal to a postmaster child process
* *
...@@ -2561,19 +2707,9 @@ signal_child(pid_t pid, int signal) ...@@ -2561,19 +2707,9 @@ signal_child(pid_t pid, int signal)
} }
/* /*
* Send a signal to all backend children, including autovacuum workers (but NOT * Send a signal to all backend children, including autovacuum workers
* special children). * (but NOT special children; dead_end children are never signaled, either).
*/ * If only_autovac is TRUE, only the autovacuum worker processes are signalled.
static void
SignalChildren(int signal)
{
SignalSomeChildren(signal, false);
}
/*
* Send a signal to all backend children, including autovacuum workers (but NOT
* special children). If only_autovac is TRUE, only the autovacuum worker
* processes are signalled.
*/ */
static void static void
SignalSomeChildren(int signal, bool only_autovac) SignalSomeChildren(int signal, bool only_autovac)
...@@ -2584,6 +2720,8 @@ SignalSomeChildren(int signal, bool only_autovac) ...@@ -2584,6 +2720,8 @@ SignalSomeChildren(int signal, bool only_autovac)
{ {
Backend *bp = (Backend *) DLE_VAL(curr); Backend *bp = (Backend *) DLE_VAL(curr);
if (bp->dead_end)
continue;
if (only_autovac && !bp->is_autovacuum) if (only_autovac && !bp->is_autovacuum)
continue; continue;
...@@ -2688,8 +2826,10 @@ BackendStartup(Port *port) ...@@ -2688,8 +2826,10 @@ BackendStartup(Port *port)
bn->pid = pid; bn->pid = pid;
bn->cancel_key = MyCancelKey; bn->cancel_key = MyCancelKey;
bn->is_autovacuum = false; bn->is_autovacuum = false;
bn->dead_end = (port->canAcceptConnections != CAC_OK);
DLAddHead(BackendList, DLNewElem(bn)); DLAddHead(BackendList, DLNewElem(bn));
#ifdef EXEC_BACKEND #ifdef EXEC_BACKEND
if (!bn->dead_end)
ShmemBackendArrayAdd(bn); ShmemBackendArrayAdd(bn);
#endif #endif
...@@ -3647,7 +3787,7 @@ sigusr1_handler(SIGNAL_ARGS) ...@@ -3647,7 +3787,7 @@ sigusr1_handler(SIGNAL_ARGS)
} }
if (CheckPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER) && if (CheckPostmasterSignal(PMSIGNAL_WAKEN_ARCHIVER) &&
PgArchPID != 0 && Shutdown == NoShutdown) PgArchPID != 0 && Shutdown <= SmartShutdown)
{ {
/* /*
* Send SIGUSR1 to archiver process, to wake it up and begin archiving * Send SIGUSR1 to archiver process, to wake it up and begin archiving
...@@ -3790,7 +3930,8 @@ PostmasterRandom(void) ...@@ -3790,7 +3930,8 @@ PostmasterRandom(void)
} }
/* /*
* Count up number of child processes (regular backends only) * Count up number of child processes (excluding special children and
* dead_end children)
*/ */
static int static int
CountChildren(void) CountChildren(void)
...@@ -3799,7 +3940,12 @@ CountChildren(void) ...@@ -3799,7 +3940,12 @@ CountChildren(void)
int cnt = 0; int cnt = 0;
for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr)) for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
{
Backend *bp = (Backend *) DLE_VAL(curr);
if (!bp->dead_end)
cnt++; cnt++;
}
return cnt; return cnt;
} }
...@@ -3918,13 +4064,14 @@ StartAutovacuumWorker(void) ...@@ -3918,13 +4064,14 @@ StartAutovacuumWorker(void)
Backend *bn; Backend *bn;
/* /*
* do nothing if not in condition to run a process. This should not * If not in condition to run a process, don't try, but handle it like a
* actually happen, since the signal is only supposed to be sent by * fork failure. This does not normally happen, since the signal is only
* autovacuum launcher when it's OK to do it, but test for it just in case. * supposed to be sent by autovacuum launcher when it's OK to do it, but
* we have to check to avoid race-condition problems during DB state
* changes.
*/ */
if (StartupPID != 0 || FatalError || Shutdown != NoShutdown) if (canAcceptConnections() == CAC_OK)
return; {
/* /*
* Compute the cancel key that will be assigned to this session. * Compute the cancel key that will be assigned to this session.
* We probably don't need cancel keys for autovac workers, but we'd * We probably don't need cancel keys for autovac workers, but we'd
...@@ -3941,6 +4088,7 @@ StartAutovacuumWorker(void) ...@@ -3941,6 +4088,7 @@ StartAutovacuumWorker(void)
{ {
bn->cancel_key = MyCancelKey; bn->cancel_key = MyCancelKey;
bn->is_autovacuum = true; bn->is_autovacuum = true;
bn->dead_end = false;
DLAddHead(BackendList, DLNewElem(bn)); DLAddHead(BackendList, DLNewElem(bn));
#ifdef EXEC_BACKEND #ifdef EXEC_BACKEND
ShmemBackendArrayAdd(bn); ShmemBackendArrayAdd(bn);
...@@ -3959,11 +4107,18 @@ StartAutovacuumWorker(void) ...@@ -3959,11 +4107,18 @@ StartAutovacuumWorker(void)
ereport(LOG, ereport(LOG,
(errcode(ERRCODE_OUT_OF_MEMORY), (errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory"))); errmsg("out of memory")));
}
/* report the failure to the launcher */ /*
AutoVacWorkerFailed(); * Report the failure to the launcher, if it's running. (If it's not,
* we might not even be connected to shared memory, so don't try to
* call AutoVacWorkerFailed.)
*/
if (AutoVacPID != 0) if (AutoVacPID != 0)
{
AutoVacWorkerFailed();
kill(AutoVacPID, SIGUSR1); kill(AutoVacPID, SIGUSR1);
}
} }
/* /*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment