Commit bf261798 authored by Alvaro Herrera's avatar Alvaro Herrera

Fix various bugs in postmaster SIGKILL processing

Clamp the minimum sleep time during immediate shutdown or crash to a
minimum of zero, not a maximum of one second.  The previous code could
result in a negative sleep time, leading to failure in select() calls.

Also, on crash recovery, reset AbortStartTime as soon as SIGKILL is sent
or abort processing has commenced instead of waiting until the startup
process completes.  Reset AbortStartTime as soon as SIGKILL is sent,
too, to avoid doing that repeatedly.

Per trouble report from Jeff Janes on
CAMkU=1xd3=wFqZwwuXPWe4BQs3h1seYo8LV9JtSjW5RodoPxMg@mail.gmail.com

Author: MauMau
parent 2d6c0f10
...@@ -1422,9 +1422,9 @@ DetermineSleepTime(struct timeval * timeout) ...@@ -1422,9 +1422,9 @@ DetermineSleepTime(struct timeval * timeout)
{ {
if (AbortStartTime > 0) if (AbortStartTime > 0)
{ {
/* remaining time, but at least 1 second */ /* time left to abort; clamp to 0 in case it already expired */
timeout->tv_sec = Min(SIGKILL_CHILDREN_AFTER_SECS - timeout->tv_sec = Max(SIGKILL_CHILDREN_AFTER_SECS -
(time(NULL) - AbortStartTime), 1); (time(NULL) - AbortStartTime), 0);
timeout->tv_usec = 0; timeout->tv_usec = 0;
} }
else else
...@@ -1676,10 +1676,13 @@ ServerLoop(void) ...@@ -1676,10 +1676,13 @@ ServerLoop(void)
* Note we also do this during recovery from a process crash. * Note we also do this during recovery from a process crash.
*/ */
if ((Shutdown >= ImmediateShutdown || (FatalError && !SendStop)) && if ((Shutdown >= ImmediateShutdown || (FatalError && !SendStop)) &&
AbortStartTime > 0 &&
now - AbortStartTime >= SIGKILL_CHILDREN_AFTER_SECS) now - AbortStartTime >= SIGKILL_CHILDREN_AFTER_SECS)
{ {
/* We were gentle with them before. Not anymore */ /* We were gentle with them before. Not anymore */
TerminateChildren(SIGKILL); TerminateChildren(SIGKILL);
/* reset flag so we don't SIGKILL again */
AbortStartTime = 0;
/* /*
* Additionally, unless we're recovering from a process crash, it's * Additionally, unless we're recovering from a process crash, it's
...@@ -2584,7 +2587,7 @@ reaper(SIGNAL_ARGS) ...@@ -2584,7 +2587,7 @@ reaper(SIGNAL_ARGS)
* Startup succeeded, commence normal operations * Startup succeeded, commence normal operations
*/ */
FatalError = false; FatalError = false;
AbortStartTime = 0; Assert(AbortStartTime == 0);
ReachedNormalRunning = true; ReachedNormalRunning = true;
pmState = PM_RUN; pmState = PM_RUN;
...@@ -3544,6 +3547,8 @@ PostmasterStateMachine(void) ...@@ -3544,6 +3547,8 @@ PostmasterStateMachine(void)
StartupPID = StartupDataBase(); StartupPID = StartupDataBase();
Assert(StartupPID != 0); Assert(StartupPID != 0);
pmState = PM_STARTUP; pmState = PM_STARTUP;
/* crash recovery started, reset SIGKILL flag */
AbortStartTime = 0;
} }
} }
...@@ -4737,7 +4742,7 @@ sigusr1_handler(SIGNAL_ARGS) ...@@ -4737,7 +4742,7 @@ sigusr1_handler(SIGNAL_ARGS)
{ {
/* WAL redo has started. We're out of reinitialization. */ /* WAL redo has started. We're out of reinitialization. */
FatalError = false; FatalError = false;
AbortStartTime = 0; Assert(AbortStartTime == 0);
/* /*
* Crank up the background tasks. It doesn't matter if this fails, * Crank up the background tasks. It doesn't matter if this fails,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment