Commit 89fd72cb authored by Heikki Linnakangas's avatar Heikki Linnakangas

Introduce a pipe between postmaster and each backend, which can be used to

detect postmaster death. Postmaster keeps the write-end of the pipe open,
so when it dies, children get EOF in the read-end. That can conveniently
be waited for in select(), which allows eliminating some of the polling
loops that check for postmaster death. This patch doesn't yet change all
the loops to use the new mechanism, expect a follow-on patch to do that.

This changes the interface to WaitLatch, so that it takes as argument a
bitmask of events that it waits for. Possible events are latch set, timeout,
postmaster death, and socket becoming readable or writeable.

The pipe method behaves slightly differently from the kill() method
previously used in PostmasterIsAlive() in the case that postmaster has died,
but its parent has not yet read its exit code with waitpid(). The pipe
returns EOF as soon as the process dies, but kill() continues to return
true until waitpid() has been called (IOW while the process is a zombie).
Because of that, change PostmasterIsAlive() to use the pipe too, otherwise
WaitLatch() would return immediately with WL_POSTMASTER_DEATH, while
PostmasterIsAlive() would claim it's still alive. That could easily lead to
busy-waiting while postmaster is in zombie state.

Peter Geoghegan with further changes by me, reviewed by Fujii Masao and
Florian Pflug.
parent 9598afa3
...@@ -9938,7 +9938,7 @@ HandleStartupProcInterrupts(void) ...@@ -9938,7 +9938,7 @@ HandleStartupProcInterrupts(void)
* Emergency bailout if postmaster has died. This is to avoid the * Emergency bailout if postmaster has died. This is to avoid the
* necessity for manual cleanup of all postmaster children. * necessity for manual cleanup of all postmaster children.
*/ */
if (IsUnderPostmaster && !PostmasterIsAlive(true)) if (IsUnderPostmaster && !PostmasterIsAlive())
exit(1); exit(1);
} }
...@@ -10165,7 +10165,7 @@ retry: ...@@ -10165,7 +10165,7 @@ retry:
/* /*
* Wait for more WAL to arrive, or timeout to be reached * Wait for more WAL to arrive, or timeout to be reached
*/ */
WaitLatch(&XLogCtl->recoveryWakeupLatch, 5000000L); WaitLatch(&XLogCtl->recoveryWakeupLatch, WL_LATCH_SET | WL_TIMEOUT, 5000000L);
ResetLatch(&XLogCtl->recoveryWakeupLatch); ResetLatch(&XLogCtl->recoveryWakeupLatch);
} }
else else
......
...@@ -93,6 +93,7 @@ ...@@ -93,6 +93,7 @@
#endif #endif
#include "miscadmin.h" #include "miscadmin.h"
#include "postmaster/postmaster.h"
#include "storage/latch.h" #include "storage/latch.h"
#include "storage/shmem.h" #include "storage/shmem.h"
...@@ -176,34 +177,44 @@ DisownLatch(volatile Latch *latch) ...@@ -176,34 +177,44 @@ DisownLatch(volatile Latch *latch)
} }
/* /*
* Wait for given latch to be set or until timeout is exceeded. * Wait for a given latch to be set, postmaster death, or until timeout is
* If the latch is already set, the function returns immediately. * exceeded. 'wakeEvents' is a bitmask that specifies which of those events
* to wait for. If the latch is already set (and WL_LATCH_SET is given), the
* function returns immediately.
* *
* The 'timeout' is given in microseconds, and -1 means wait forever. * The 'timeout' is given in microseconds. It must be >= 0 if WL_TIMEOUT
* On some platforms, signals cause the timeout to be restarted, so beware * event is given, otherwise it is ignored. On some platforms, signals cause
* that the function can sleep for several times longer than the specified * the timeout to be restarted, so beware that the function can sleep for
* timeout. * several times longer than the specified timeout.
* *
* The latch must be owned by the current process, ie. it must be a * The latch must be owned by the current process, ie. it must be a
* backend-local latch initialized with InitLatch, or a shared latch * backend-local latch initialized with InitLatch, or a shared latch
* associated with the current process by calling OwnLatch. * associated with the current process by calling OwnLatch.
* *
* Returns 'true' if the latch was set, or 'false' if timeout was reached. * Returns bit field indicating which condition(s) caused the wake-up. Note
* that if multiple wake-up conditions are true, there is no guarantee that
* we return all of them in one call, but we will return at least one. Also,
* according to the select(2) man page on Linux, select(2) may spuriously
* return and report a file descriptor as readable, when it's not. We use
* select(2), so WaitLatch can also spuriously claim that a socket is
* readable, or postmaster has died, even when none of the wake conditions
* have been satisfied. That should be rare in practice, but the caller
* should not use the return value for anything critical, re-checking the
* situation with PostmasterIsAlive() or read() on a socket if necessary.
*/ */
bool int
WaitLatch(volatile Latch *latch, long timeout) WaitLatch(volatile Latch *latch, int wakeEvents, long timeout)
{ {
return WaitLatchOrSocket(latch, PGINVALID_SOCKET, false, false, timeout) > 0; return WaitLatchOrSocket(latch, wakeEvents, PGINVALID_SOCKET, timeout);
} }
/* /*
* Like WaitLatch, but will also return when there's data available in * Like WaitLatch, but with an extra socket argument for WL_SOCKET_*
* 'sock' for reading or writing. Returns 0 if timeout was reached, * conditions.
* 1 if the latch was set, 2 if the socket became readable or writable.
*/ */
int int
WaitLatchOrSocket(volatile Latch *latch, pgsocket sock, bool forRead, WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
bool forWrite, long timeout) long timeout)
{ {
struct timeval tv, struct timeval tv,
*tvp = NULL; *tvp = NULL;
...@@ -212,19 +223,26 @@ WaitLatchOrSocket(volatile Latch *latch, pgsocket sock, bool forRead, ...@@ -212,19 +223,26 @@ WaitLatchOrSocket(volatile Latch *latch, pgsocket sock, bool forRead,
int rc; int rc;
int result = 0; int result = 0;
if (latch->owner_pid != MyProcPid) /* Ignore WL_SOCKET_* events if no valid socket is given */
if (sock == PGINVALID_SOCKET)
wakeEvents &= ~(WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE);
Assert(wakeEvents != 0); /* must have at least one wake event */
if ((wakeEvents & WL_LATCH_SET) && latch->owner_pid != MyProcPid)
elog(ERROR, "cannot wait on a latch owned by another process"); elog(ERROR, "cannot wait on a latch owned by another process");
/* Initialize timeout */ /* Initialize timeout */
if (timeout >= 0) if (wakeEvents & WL_TIMEOUT)
{ {
Assert(timeout >= 0);
tv.tv_sec = timeout / 1000000L; tv.tv_sec = timeout / 1000000L;
tv.tv_usec = timeout % 1000000L; tv.tv_usec = timeout % 1000000L;
tvp = &tv; tvp = &tv;
} }
waiting = true; waiting = true;
for (;;) do
{ {
int hifd; int hifd;
...@@ -235,16 +253,28 @@ WaitLatchOrSocket(volatile Latch *latch, pgsocket sock, bool forRead, ...@@ -235,16 +253,28 @@ WaitLatchOrSocket(volatile Latch *latch, pgsocket sock, bool forRead,
* do that), and the select() will return immediately. * do that), and the select() will return immediately.
*/ */
drainSelfPipe(); drainSelfPipe();
if (latch->is_set) if ((wakeEvents & WL_LATCH_SET) && latch->is_set)
{ {
result = 1; result |= WL_LATCH_SET;
/*
* Leave loop immediately, avoid blocking again. We don't attempt
* to report any other events that might also be satisfied.
*/
break; break;
} }
FD_ZERO(&input_mask); FD_ZERO(&input_mask);
FD_SET(selfpipe_readfd, &input_mask); FD_SET(selfpipe_readfd, &input_mask);
hifd = selfpipe_readfd; hifd = selfpipe_readfd;
if (sock != PGINVALID_SOCKET && forRead)
if (wakeEvents & WL_POSTMASTER_DEATH)
{
FD_SET(postmaster_alive_fds[POSTMASTER_FD_WATCH], &input_mask);
if (postmaster_alive_fds[POSTMASTER_FD_WATCH] > hifd)
hifd = postmaster_alive_fds[POSTMASTER_FD_WATCH];
}
if (wakeEvents & WL_SOCKET_READABLE)
{ {
FD_SET(sock, &input_mask); FD_SET(sock, &input_mask);
if (sock > hifd) if (sock > hifd)
...@@ -252,14 +282,17 @@ WaitLatchOrSocket(volatile Latch *latch, pgsocket sock, bool forRead, ...@@ -252,14 +282,17 @@ WaitLatchOrSocket(volatile Latch *latch, pgsocket sock, bool forRead,
} }
FD_ZERO(&output_mask); FD_ZERO(&output_mask);
if (sock != PGINVALID_SOCKET && forWrite) if (wakeEvents & WL_SOCKET_WRITEABLE)
{ {
FD_SET(sock, &output_mask); FD_SET(sock, &output_mask);
if (sock > hifd) if (sock > hifd)
hifd = sock; hifd = sock;
} }
/* Sleep */
rc = select(hifd + 1, &input_mask, &output_mask, NULL, tvp); rc = select(hifd + 1, &input_mask, &output_mask, NULL, tvp);
/* Check return code */
if (rc < 0) if (rc < 0)
{ {
if (errno == EINTR) if (errno == EINTR)
...@@ -268,20 +301,26 @@ WaitLatchOrSocket(volatile Latch *latch, pgsocket sock, bool forRead, ...@@ -268,20 +301,26 @@ WaitLatchOrSocket(volatile Latch *latch, pgsocket sock, bool forRead,
(errcode_for_socket_access(), (errcode_for_socket_access(),
errmsg("select() failed: %m"))); errmsg("select() failed: %m")));
} }
if (rc == 0) if (rc == 0 && (wakeEvents & WL_TIMEOUT))
{ {
/* timeout exceeded */ /* timeout exceeded */
result = 0; result |= WL_TIMEOUT;
break; }
if ((wakeEvents & WL_SOCKET_READABLE) && FD_ISSET(sock, &input_mask))
{
/* data available in socket */
result |= WL_SOCKET_READABLE;
} }
if (sock != PGINVALID_SOCKET && if ((wakeEvents & WL_SOCKET_WRITEABLE) && FD_ISSET(sock, &output_mask))
((forRead && FD_ISSET(sock, &input_mask)) ||
(forWrite && FD_ISSET(sock, &output_mask))))
{ {
result = 2; result |= WL_SOCKET_WRITEABLE;
break; /* data available in socket */
} }
if ((wakeEvents & WL_POSTMASTER_DEATH) &&
FD_ISSET(postmaster_alive_fds[POSTMASTER_FD_WATCH], &input_mask))
{
result |= WL_POSTMASTER_DEATH;
} }
} while(result == 0);
waiting = false; waiting = false;
return result; return result;
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include <unistd.h> #include <unistd.h>
#include "miscadmin.h" #include "miscadmin.h"
#include "postmaster/postmaster.h"
#include "replication/walsender.h" #include "replication/walsender.h"
#include "storage/latch.h" #include "storage/latch.h"
#include "storage/shmem.h" #include "storage/shmem.h"
...@@ -81,43 +82,67 @@ DisownLatch(volatile Latch *latch) ...@@ -81,43 +82,67 @@ DisownLatch(volatile Latch *latch)
latch->owner_pid = 0; latch->owner_pid = 0;
} }
bool int
WaitLatch(volatile Latch *latch, long timeout) WaitLatch(volatile Latch *latch, int wakeEvents, long timeout)
{ {
return WaitLatchOrSocket(latch, PGINVALID_SOCKET, false, false, timeout) > 0; return WaitLatchOrSocket(latch, wakeEvents, PGINVALID_SOCKET, timeout);
} }
int int
WaitLatchOrSocket(volatile Latch *latch, SOCKET sock, bool forRead, WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, SOCKET sock,
bool forWrite, long timeout) long timeout)
{ {
DWORD rc; DWORD rc;
HANDLE events[3]; HANDLE events[4];
HANDLE latchevent; HANDLE latchevent;
HANDLE sockevent = WSA_INVALID_EVENT; /* silence compiler */ HANDLE sockevent = WSA_INVALID_EVENT;
int numevents; int numevents;
int result = 0; int result = 0;
int pmdeath_eventno;
long timeout_ms;
Assert(wakeEvents != 0);
/* Ignore WL_SOCKET_* events if no valid socket is given */
if (sock == PGINVALID_SOCKET)
wakeEvents &= ~(WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE);
/* Convert timeout to milliseconds for WaitForMultipleObjects() */
if (wakeEvents & WL_TIMEOUT)
{
Assert(timeout >= 0);
timeout_ms = timeout / 1000;
}
else
timeout_ms = INFINITE;
/* Construct an array of event handles for WaitforMultipleObjects() */
latchevent = latch->event; latchevent = latch->event;
events[0] = latchevent; events[0] = latchevent;
events[1] = pgwin32_signal_event; events[1] = pgwin32_signal_event;
numevents = 2; numevents = 2;
if (sock != PGINVALID_SOCKET && (forRead || forWrite)) if (((wakeEvents & WL_SOCKET_READABLE) ||
(wakeEvents & WL_SOCKET_WRITEABLE)))
{ {
int flags = 0; int flags = 0;
if (forRead) if (wakeEvents & WL_SOCKET_READABLE)
flags |= FD_READ; flags |= FD_READ;
if (forWrite) if (wakeEvents & WL_SOCKET_WRITEABLE)
flags |= FD_WRITE; flags |= FD_WRITE;
sockevent = WSACreateEvent(); sockevent = WSACreateEvent();
WSAEventSelect(sock, sockevent, flags); WSAEventSelect(sock, sockevent, flags);
events[numevents++] = sockevent; events[numevents++] = sockevent;
} }
if (wakeEvents & WL_POSTMASTER_DEATH)
{
pmdeath_eventno = numevents;
events[numevents++] = PostmasterHandle;
}
for (;;) do
{ {
/* /*
* Reset the event, and check if the latch is set already. If someone * Reset the event, and check if the latch is set already. If someone
...@@ -127,45 +152,64 @@ WaitLatchOrSocket(volatile Latch *latch, SOCKET sock, bool forRead, ...@@ -127,45 +152,64 @@ WaitLatchOrSocket(volatile Latch *latch, SOCKET sock, bool forRead,
*/ */
if (!ResetEvent(latchevent)) if (!ResetEvent(latchevent))
elog(ERROR, "ResetEvent failed: error code %d", (int) GetLastError()); elog(ERROR, "ResetEvent failed: error code %d", (int) GetLastError());
if (latch->is_set) if (latch->is_set && (wakeEvents & WL_LATCH_SET))
{ {
result = 1; result |= WL_LATCH_SET;
/*
* Leave loop immediately, avoid blocking again. We don't attempt
* to report any other events that might also be satisfied.
*/
break; break;
} }
rc = WaitForMultipleObjects(numevents, events, FALSE, rc = WaitForMultipleObjects(numevents, events, FALSE, timeout_ms);
(timeout >= 0) ? (timeout / 1000) : INFINITE);
if (rc == WAIT_FAILED) if (rc == WAIT_FAILED)
elog(ERROR, "WaitForMultipleObjects() failed: error code %d", (int) GetLastError()); elog(ERROR, "WaitForMultipleObjects() failed: error code %d", (int) GetLastError());
/* Participate in Windows signal emulation */
else if (rc == WAIT_OBJECT_0 + 1)
pgwin32_dispatch_queued_signals();
else if ((wakeEvents & WL_POSTMASTER_DEATH) &&
rc == WAIT_OBJECT_0 + pmdeath_eventno)
{
/* Postmaster died */
result |= WL_POSTMASTER_DEATH;
}
else if (rc == WAIT_TIMEOUT) else if (rc == WAIT_TIMEOUT)
{ {
result = 0; result |= WL_TIMEOUT;
break;
} }
else if (rc == WAIT_OBJECT_0 + 1) else if ((wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) != 0 &&
pgwin32_dispatch_queued_signals(); rc == WAIT_OBJECT_0 + 2) /* socket is at event slot 2 */
else if (rc == WAIT_OBJECT_0 + 2)
{ {
WSANETWORKEVENTS resEvents; WSANETWORKEVENTS resEvents;
Assert(sock != PGINVALID_SOCKET);
ZeroMemory(&resEvents, sizeof(resEvents)); ZeroMemory(&resEvents, sizeof(resEvents));
if (WSAEnumNetworkEvents(sock, sockevent, &resEvents) == SOCKET_ERROR) if (WSAEnumNetworkEvents(sock, sockevent, &resEvents) == SOCKET_ERROR)
ereport(FATAL, ereport(FATAL,
(errmsg_internal("failed to enumerate network events: %i", (int) GetLastError()))); (errmsg_internal("failed to enumerate network events: %i", (int) GetLastError())));
if ((forRead && resEvents.lNetworkEvents & FD_READ) || if ((wakeEvents & WL_SOCKET_READABLE) &&
(forWrite && resEvents.lNetworkEvents & FD_WRITE)) (resEvents.lNetworkEvents & FD_READ))
result = 2; {
break; result |= WL_SOCKET_READABLE;
}
if ((wakeEvents & WL_SOCKET_WRITEABLE) &&
(resEvents.lNetworkEvents & FD_WRITE))
{
result |= WL_SOCKET_WRITEABLE;
}
} }
/* Otherwise it must be the latch event */
else if (rc != WAIT_OBJECT_0) else if (rc != WAIT_OBJECT_0)
elog(ERROR, "unexpected return code from WaitForMultipleObjects(): %d", (int) rc); elog(ERROR, "unexpected return code from WaitForMultipleObjects(): %d", (int) rc);
} }
while(result == 0);
/* Clean up the handle we created for the socket */ /* Clean up the handle we created for the socket */
if (sock != PGINVALID_SOCKET && (forRead || forWrite)) if (sockevent != WSA_INVALID_EVENT)
{ {
WSAEventSelect(sock, sockevent, 0); WSAEventSelect(sock, sockevent, 0);
WSACloseEvent(sockevent); WSACloseEvent(sockevent);
......
...@@ -556,7 +556,7 @@ AutoVacLauncherMain(int argc, char *argv[]) ...@@ -556,7 +556,7 @@ AutoVacLauncherMain(int argc, char *argv[])
* Emergency bailout if postmaster has died. This is to avoid the * Emergency bailout if postmaster has died. This is to avoid the
* necessity for manual cleanup of all postmaster children. * necessity for manual cleanup of all postmaster children.
*/ */
if (!PostmasterIsAlive(true)) if (!PostmasterIsAlive())
proc_exit(1); proc_exit(1);
launcher_determine_sleep((AutoVacuumShmem->av_freeWorkers != NULL), launcher_determine_sleep((AutoVacuumShmem->av_freeWorkers != NULL),
...@@ -593,7 +593,7 @@ AutoVacLauncherMain(int argc, char *argv[]) ...@@ -593,7 +593,7 @@ AutoVacLauncherMain(int argc, char *argv[])
* Emergency bailout if postmaster has died. This is to avoid the * Emergency bailout if postmaster has died. This is to avoid the
* necessity for manual cleanup of all postmaster children. * necessity for manual cleanup of all postmaster children.
*/ */
if (!PostmasterIsAlive(true)) if (!PostmasterIsAlive())
proc_exit(1); proc_exit(1);
if (got_SIGTERM || got_SIGHUP || got_SIGUSR2) if (got_SIGTERM || got_SIGHUP || got_SIGUSR2)
......
...@@ -381,7 +381,7 @@ BackgroundWriterMain(void) ...@@ -381,7 +381,7 @@ BackgroundWriterMain(void)
* Emergency bailout if postmaster has died. This is to avoid the * Emergency bailout if postmaster has died. This is to avoid the
* necessity for manual cleanup of all postmaster children. * necessity for manual cleanup of all postmaster children.
*/ */
if (!PostmasterIsAlive(true)) if (!PostmasterIsAlive())
exit(1); exit(1);
/* /*
......
...@@ -40,6 +40,7 @@ ...@@ -40,6 +40,7 @@
#include "postmaster/postmaster.h" #include "postmaster/postmaster.h"
#include "storage/fd.h" #include "storage/fd.h"
#include "storage/ipc.h" #include "storage/ipc.h"
#include "storage/latch.h"
#include "storage/pg_shmem.h" #include "storage/pg_shmem.h"
#include "storage/pmsignal.h" #include "storage/pmsignal.h"
#include "utils/guc.h" #include "utils/guc.h"
...@@ -87,6 +88,11 @@ static volatile sig_atomic_t got_SIGTERM = false; ...@@ -87,6 +88,11 @@ static volatile sig_atomic_t got_SIGTERM = false;
static volatile sig_atomic_t wakened = false; static volatile sig_atomic_t wakened = false;
static volatile sig_atomic_t ready_to_stop = false; static volatile sig_atomic_t ready_to_stop = false;
/*
* Latch used by signal handlers to wake up the sleep in the main loop.
*/
static Latch mainloop_latch;
/* ---------- /* ----------
* Local function forward declarations * Local function forward declarations
* ---------- * ----------
...@@ -228,6 +234,8 @@ PgArchiverMain(int argc, char *argv[]) ...@@ -228,6 +234,8 @@ PgArchiverMain(int argc, char *argv[])
MyProcPid = getpid(); /* reset MyProcPid */ MyProcPid = getpid(); /* reset MyProcPid */
InitLatch(&mainloop_latch); /* initialize latch used in main loop */
MyStartTime = time(NULL); /* record Start Time for logging */ MyStartTime = time(NULL); /* record Start Time for logging */
/* /*
...@@ -282,6 +290,8 @@ ArchSigHupHandler(SIGNAL_ARGS) ...@@ -282,6 +290,8 @@ ArchSigHupHandler(SIGNAL_ARGS)
{ {
/* set flag to re-read config file at next convenient time */ /* set flag to re-read config file at next convenient time */
got_SIGHUP = true; got_SIGHUP = true;
/* let the waiting loop iterate */
SetLatch(&mainloop_latch);
} }
/* SIGTERM signal handler for archiver process */ /* SIGTERM signal handler for archiver process */
...@@ -295,6 +305,8 @@ ArchSigTermHandler(SIGNAL_ARGS) ...@@ -295,6 +305,8 @@ ArchSigTermHandler(SIGNAL_ARGS)
* archive commands. * archive commands.
*/ */
got_SIGTERM = true; got_SIGTERM = true;
/* let the waiting loop iterate */
SetLatch(&mainloop_latch);
} }
/* SIGUSR1 signal handler for archiver process */ /* SIGUSR1 signal handler for archiver process */
...@@ -303,6 +315,8 @@ pgarch_waken(SIGNAL_ARGS) ...@@ -303,6 +315,8 @@ pgarch_waken(SIGNAL_ARGS)
{ {
/* set flag that there is work to be done */ /* set flag that there is work to be done */
wakened = true; wakened = true;
/* let the waiting loop iterate */
SetLatch(&mainloop_latch);
} }
/* SIGUSR2 signal handler for archiver process */ /* SIGUSR2 signal handler for archiver process */
...@@ -311,6 +325,8 @@ pgarch_waken_stop(SIGNAL_ARGS) ...@@ -311,6 +325,8 @@ pgarch_waken_stop(SIGNAL_ARGS)
{ {
/* set flag to do a final cycle and shut down afterwards */ /* set flag to do a final cycle and shut down afterwards */
ready_to_stop = true; ready_to_stop = true;
/* let the waiting loop iterate */
SetLatch(&mainloop_latch);
} }
/* /*
...@@ -321,7 +337,7 @@ pgarch_waken_stop(SIGNAL_ARGS) ...@@ -321,7 +337,7 @@ pgarch_waken_stop(SIGNAL_ARGS)
static void static void
pgarch_MainLoop(void) pgarch_MainLoop(void)
{ {
time_t last_copy_time = 0; pg_time_t last_copy_time = 0;
bool time_to_stop; bool time_to_stop;
/* /*
...@@ -332,8 +348,15 @@ pgarch_MainLoop(void) ...@@ -332,8 +348,15 @@ pgarch_MainLoop(void)
*/ */
wakened = true; wakened = true;
/*
* There shouldn't be anything for the archiver to do except to wait
* for a signal ... however, the archiver exists to protect our data,
* so she wakes up occasionally to allow herself to be proactive.
*/
do do
{ {
ResetLatch(&mainloop_latch);
/* When we get SIGUSR2, we do one more archive cycle, then exit */ /* When we get SIGUSR2, we do one more archive cycle, then exit */
time_to_stop = ready_to_stop; time_to_stop = ready_to_stop;
...@@ -371,24 +394,26 @@ pgarch_MainLoop(void) ...@@ -371,24 +394,26 @@ pgarch_MainLoop(void)
} }
/* /*
* There shouldn't be anything for the archiver to do except to wait * Sleep until a signal is received, or until a poll is forced by
* for a signal ... however, the archiver exists to protect our data, * PGARCH_AUTOWAKE_INTERVAL having passed since last_copy_time, or
* so she wakes up occasionally to allow herself to be proactive. * until postmaster dies.
*
* On some platforms, signals won't interrupt the sleep. To ensure we
* respond reasonably promptly when someone signals us, break down the
* sleep into 1-second increments, and check for interrupts after each
* nap.
*/ */
while (!(wakened || ready_to_stop || got_SIGHUP || if (!time_to_stop) /* Don't wait during last iteration */
!PostmasterIsAlive(true)))
{ {
time_t curtime; pg_time_t curtime = (pg_time_t) time(NULL);
int timeout;
pg_usleep(1000000L); timeout = PGARCH_AUTOWAKE_INTERVAL - (curtime - last_copy_time);
curtime = time(NULL); if (timeout > 0)
if ((unsigned int) (curtime - last_copy_time) >= {
(unsigned int) PGARCH_AUTOWAKE_INTERVAL) int rc;
rc = WaitLatch(&mainloop_latch,
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
timeout * 1000000L);
if (rc & WL_TIMEOUT)
wakened = true;
}
else
wakened = true; wakened = true;
} }
...@@ -397,7 +422,7 @@ pgarch_MainLoop(void) ...@@ -397,7 +422,7 @@ pgarch_MainLoop(void)
* or after completing one more archiving cycle after receiving * or after completing one more archiving cycle after receiving
* SIGUSR2. * SIGUSR2.
*/ */
} while (PostmasterIsAlive(true) && !time_to_stop); } while (PostmasterIsAlive() && !time_to_stop);
} }
/* /*
...@@ -429,7 +454,7 @@ pgarch_ArchiverCopyLoop(void) ...@@ -429,7 +454,7 @@ pgarch_ArchiverCopyLoop(void)
* command, and the second is to avoid conflicts with another * command, and the second is to avoid conflicts with another
* archiver spawned by a newer postmaster. * archiver spawned by a newer postmaster.
*/ */
if (got_SIGTERM || !PostmasterIsAlive(true)) if (got_SIGTERM || !PostmasterIsAlive())
return; return;
/* /*
......
...@@ -3111,7 +3111,7 @@ PgstatCollectorMain(int argc, char *argv[]) ...@@ -3111,7 +3111,7 @@ PgstatCollectorMain(int argc, char *argv[])
* We can only get here if the select/poll timeout elapsed. Check * We can only get here if the select/poll timeout elapsed. Check
* for postmaster death. * for postmaster death.
*/ */
if (!PostmasterIsAlive(true)) if (!PostmasterIsAlive())
break; break;
} }
} /* end of message-processing loop */ } /* end of message-processing loop */
......
...@@ -368,6 +368,7 @@ static int CountChildren(int target); ...@@ -368,6 +368,7 @@ static int CountChildren(int target);
static bool CreateOptsFile(int argc, char *argv[], char *fullprogname); static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
static pid_t StartChildProcess(AuxProcType type); static pid_t StartChildProcess(AuxProcType type);
static void StartAutovacuumWorker(void); static void StartAutovacuumWorker(void);
static void InitPostmasterDeathWatchHandle(void);
#ifdef EXEC_BACKEND #ifdef EXEC_BACKEND
...@@ -383,8 +384,6 @@ typedef struct ...@@ -383,8 +384,6 @@ typedef struct
HANDLE procHandle; HANDLE procHandle;
DWORD procId; DWORD procId;
} win32_deadchild_waitinfo; } win32_deadchild_waitinfo;
HANDLE PostmasterHandle;
#endif #endif
static pid_t backend_forkexec(Port *port); static pid_t backend_forkexec(Port *port);
...@@ -439,6 +438,7 @@ typedef struct ...@@ -439,6 +438,7 @@ typedef struct
HANDLE initial_signal_pipe; HANDLE initial_signal_pipe;
HANDLE syslogPipe[2]; HANDLE syslogPipe[2];
#else #else
int postmaster_alive_fds[2];
int syslogPipe[2]; int syslogPipe[2];
#endif #endif
char my_exec_path[MAXPGPATH]; char my_exec_path[MAXPGPATH];
...@@ -469,6 +469,16 @@ static void ShmemBackendArrayRemove(Backend *bn); ...@@ -469,6 +469,16 @@ static void ShmemBackendArrayRemove(Backend *bn);
#define EXIT_STATUS_0(st) ((st) == 0) #define EXIT_STATUS_0(st) ((st) == 0)
#define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1) #define EXIT_STATUS_1(st) (WIFEXITED(st) && WEXITSTATUS(st) == 1)
#ifndef WIN32
/*
* File descriptors for pipe used to monitor if postmaster is alive.
* First is POSTMASTER_FD_WATCH, second is POSTMASTER_FD_OWN.
*/
int postmaster_alive_fds[2] = { -1, -1 };
#else
/* Process handle of postmaster used for the same purpose on Windows */
HANDLE PostmasterHandle;
#endif
/* /*
* Postmaster main entry point * Postmaster main entry point
...@@ -962,8 +972,13 @@ PostmasterMain(int argc, char *argv[]) ...@@ -962,8 +972,13 @@ PostmasterMain(int argc, char *argv[])
*/ */
BackendList = DLNewList(); BackendList = DLNewList();
#ifdef WIN32 /*
* Initialize pipe (or process handle on Windows) that allows children to
* wake up from sleep on postmaster death.
*/
InitPostmasterDeathWatchHandle();
#ifdef WIN32
/* /*
* Initialize I/O completion port used to deliver list of dead children. * Initialize I/O completion port used to deliver list of dead children.
*/ */
...@@ -971,21 +986,6 @@ PostmasterMain(int argc, char *argv[]) ...@@ -971,21 +986,6 @@ PostmasterMain(int argc, char *argv[])
if (win32ChildQueue == NULL) if (win32ChildQueue == NULL)
ereport(FATAL, ereport(FATAL,
(errmsg("could not create I/O completion port for child queue"))); (errmsg("could not create I/O completion port for child queue")));
/*
* Set up a handle that child processes can use to check whether the
* postmaster is still running.
*/
if (DuplicateHandle(GetCurrentProcess(),
GetCurrentProcess(),
GetCurrentProcess(),
&PostmasterHandle,
0,
TRUE,
DUPLICATE_SAME_ACCESS) == 0)
ereport(FATAL,
(errmsg_internal("could not duplicate postmaster handle: error code %d",
(int) GetLastError())));
#endif #endif
/* /*
...@@ -1965,6 +1965,19 @@ ClosePostmasterPorts(bool am_syslogger) ...@@ -1965,6 +1965,19 @@ ClosePostmasterPorts(bool am_syslogger)
{ {
int i; int i;
#ifndef WIN32
/*
* Close the write end of postmaster death watch pipe. It's important to
* do this as early as possible, so that if postmaster dies, others won't
* think that it's still running because we're holding the pipe open.
*/
if (close(postmaster_alive_fds[POSTMASTER_FD_OWN]))
ereport(FATAL,
(errcode_for_file_access(),
errmsg_internal("could not close postmaster death monitoring pipe in child process: %m")));
postmaster_alive_fds[POSTMASTER_FD_OWN] = -1;
#endif
/* Close the listen sockets */ /* Close the listen sockets */
for (i = 0; i < MAXLISTEN; i++) for (i = 0; i < MAXLISTEN; i++)
{ {
...@@ -4643,6 +4656,9 @@ save_backend_variables(BackendParameters *param, Port *port, ...@@ -4643,6 +4656,9 @@ save_backend_variables(BackendParameters *param, Port *port,
pgwin32_create_signal_listener(childPid), pgwin32_create_signal_listener(childPid),
childProcess)) childProcess))
return false; return false;
#else
memcpy(&param->postmaster_alive_fds, &postmaster_alive_fds,
sizeof(postmaster_alive_fds));
#endif #endif
memcpy(&param->syslogPipe, &syslogPipe, sizeof(syslogPipe)); memcpy(&param->syslogPipe, &syslogPipe, sizeof(syslogPipe));
...@@ -4858,6 +4874,9 @@ restore_backend_variables(BackendParameters *param, Port *port) ...@@ -4858,6 +4874,9 @@ restore_backend_variables(BackendParameters *param, Port *port)
#ifdef WIN32 #ifdef WIN32
PostmasterHandle = param->PostmasterHandle; PostmasterHandle = param->PostmasterHandle;
pgwin32_initial_signal_pipe = param->initial_signal_pipe; pgwin32_initial_signal_pipe = param->initial_signal_pipe;
#else
memcpy(&postmaster_alive_fds, &param->postmaster_alive_fds,
sizeof(postmaster_alive_fds));
#endif #endif
memcpy(&syslogPipe, &param->syslogPipe, sizeof(syslogPipe)); memcpy(&syslogPipe, &param->syslogPipe, sizeof(syslogPipe));
...@@ -4979,3 +4998,54 @@ pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired) ...@@ -4979,3 +4998,54 @@ pgwin32_deadchild_callback(PVOID lpParameter, BOOLEAN TimerOrWaitFired)
} }
#endif /* WIN32 */ #endif /* WIN32 */
/*
* Initialize one and only handle for monitoring postmaster death.
*
* Called once in the postmaster, so that child processes can subsequently
* monitor if their parent is dead.
*/
static void
InitPostmasterDeathWatchHandle(void)
{
#ifndef WIN32
/*
* Create a pipe. Postmaster holds the write end of the pipe open
* (POSTMASTER_FD_OWN), and children hold the read end. Children can
* pass the read file descriptor to select() to wake up in case postmaster
* dies, or check for postmaster death with a (read() == 0). Children must
* close the write end as soon as possible after forking, because EOF
* won't be signaled in the read end until all processes have closed the
* write fd. That is taken care of in ClosePostmasterPorts().
*/
Assert(MyProcPid == PostmasterPid);
if (pipe(postmaster_alive_fds))
ereport(FATAL,
(errcode_for_file_access(),
errmsg_internal("could not create pipe to monitor postmaster death: %m")));
/*
* Set O_NONBLOCK to allow testing for the fd's presence with a read()
* call.
*/
if (fcntl(postmaster_alive_fds[POSTMASTER_FD_WATCH], F_SETFL, O_NONBLOCK))
ereport(FATAL,
(errcode_for_socket_access(),
errmsg_internal("could not set postmaster death monitoring pipe to non-blocking mode: %m")));
#else
/*
* On Windows, we use a process handle for the same purpose.
*/
if (DuplicateHandle(GetCurrentProcess(),
GetCurrentProcess(),
GetCurrentProcess(),
&PostmasterHandle,
0,
TRUE,
DUPLICATE_SAME_ACCESS) == 0)
ereport(FATAL,
(errmsg_internal("could not duplicate postmaster handle: error code %d",
(int) GetLastError())));
#endif /* WIN32 */
}
...@@ -227,7 +227,7 @@ WalWriterMain(void) ...@@ -227,7 +227,7 @@ WalWriterMain(void)
* Emergency bailout if postmaster has died. This is to avoid the * Emergency bailout if postmaster has died. This is to avoid the
* necessity for manual cleanup of all postmaster children. * necessity for manual cleanup of all postmaster children.
*/ */
if (!PostmasterIsAlive(true)) if (!PostmasterIsAlive())
exit(1); exit(1);
/* /*
......
...@@ -171,7 +171,7 @@ SyncRepWaitForLSN(XLogRecPtr XactCommitLSN) ...@@ -171,7 +171,7 @@ SyncRepWaitForLSN(XLogRecPtr XactCommitLSN)
* postmaster death regularly while waiting. Note that timeout here * postmaster death regularly while waiting. Note that timeout here
* does not necessarily release from loop. * does not necessarily release from loop.
*/ */
WaitLatch(&MyProc->waitLatch, 60000000L); WaitLatch(&MyProc->waitLatch, WL_LATCH_SET | WL_TIMEOUT, 60000000L);
/* Must reset the latch before testing state. */ /* Must reset the latch before testing state. */
ResetLatch(&MyProc->waitLatch); ResetLatch(&MyProc->waitLatch);
...@@ -239,7 +239,7 @@ SyncRepWaitForLSN(XLogRecPtr XactCommitLSN) ...@@ -239,7 +239,7 @@ SyncRepWaitForLSN(XLogRecPtr XactCommitLSN)
* acknowledgement, because all the wal sender processes will exit. So * acknowledgement, because all the wal sender processes will exit. So
* just bail out. * just bail out.
*/ */
if (!PostmasterIsAlive(true)) if (!PostmasterIsAlive())
{ {
ProcDiePending = true; ProcDiePending = true;
whereToSendOutput = DestNone; whereToSendOutput = DestNone;
......
...@@ -287,7 +287,7 @@ WalReceiverMain(void) ...@@ -287,7 +287,7 @@ WalReceiverMain(void)
* Emergency bailout if postmaster has died. This is to avoid the * Emergency bailout if postmaster has died. This is to avoid the
* necessity for manual cleanup of all postmaster children. * necessity for manual cleanup of all postmaster children.
*/ */
if (!PostmasterIsAlive(true)) if (!PostmasterIsAlive())
exit(1); exit(1);
/* /*
......
...@@ -212,7 +212,7 @@ WalSndHandshake(void) ...@@ -212,7 +212,7 @@ WalSndHandshake(void)
* Emergency bailout if postmaster has died. This is to avoid the * Emergency bailout if postmaster has died. This is to avoid the
* necessity for manual cleanup of all postmaster children. * necessity for manual cleanup of all postmaster children.
*/ */
if (!PostmasterIsAlive(true)) if (!PostmasterIsAlive())
exit(1); exit(1);
/* /*
...@@ -713,7 +713,7 @@ WalSndLoop(void) ...@@ -713,7 +713,7 @@ WalSndLoop(void)
* Emergency bailout if postmaster has died. This is to avoid the * Emergency bailout if postmaster has died. This is to avoid the
* necessity for manual cleanup of all postmaster children. * necessity for manual cleanup of all postmaster children.
*/ */
if (!PostmasterIsAlive(true)) if (!PostmasterIsAlive())
exit(1); exit(1);
/* Process any requests or signals received recently */ /* Process any requests or signals received recently */
...@@ -779,6 +779,7 @@ WalSndLoop(void) ...@@ -779,6 +779,7 @@ WalSndLoop(void)
{ {
TimestampTz finish_time = 0; TimestampTz finish_time = 0;
long sleeptime; long sleeptime;
int wakeEvents;
/* Reschedule replication timeout */ /* Reschedule replication timeout */
if (replication_timeout > 0) if (replication_timeout > 0)
...@@ -805,9 +806,11 @@ WalSndLoop(void) ...@@ -805,9 +806,11 @@ WalSndLoop(void)
} }
/* Sleep */ /* Sleep */
WaitLatchOrSocket(&MyWalSnd->latch, MyProcPort->sock, wakeEvents = WL_LATCH_SET | WL_SOCKET_READABLE | WL_TIMEOUT;
true, pq_is_send_pending(), if (pq_is_send_pending())
sleeptime * 1000L); wakeEvents |= WL_SOCKET_WRITEABLE;
WaitLatchOrSocket(&MyWalSnd->latch, wakeEvents,
MyProcPort->sock, sleeptime * 1000L);
/* Check for replication timeout */ /* Check for replication timeout */
if (replication_timeout > 0 && if (replication_timeout > 0 &&
......
...@@ -267,42 +267,27 @@ MarkPostmasterChildInactive(void) ...@@ -267,42 +267,27 @@ MarkPostmasterChildInactive(void)
/* /*
* PostmasterIsAlive - check whether postmaster process is still alive * PostmasterIsAlive - check whether postmaster process is still alive
*
* amDirectChild should be passed as "true" by code that knows it is
* executing in a direct child process of the postmaster; pass "false"
* if an indirect child or not sure. The "true" case uses a faster and
* more reliable test, so use it when possible.
*/ */
bool bool
PostmasterIsAlive(bool amDirectChild) PostmasterIsAlive(void)
{ {
#ifndef WIN32 #ifndef WIN32
if (amDirectChild) char c;
{ ssize_t rc;
pid_t ppid = getppid();
/* If the postmaster is still our parent, it must be alive. */ rc = read(postmaster_alive_fds[POSTMASTER_FD_WATCH], &c, 1);
if (ppid == PostmasterPid) if (rc < 0)
{
if (errno == EAGAIN || errno == EWOULDBLOCK)
return true; return true;
else
elog(FATAL, "read on postmaster death monitoring pipe failed: %m");
}
else if (rc > 0)
elog(FATAL, "unexpected data in postmaster death monitoring pipe");
/* If the init process is our parent, postmaster must be dead. */
if (ppid == 1)
return false; return false;
/*
* If we get here, our parent process is neither the postmaster nor
* init. This can occur on BSD and MacOS systems if a debugger has
* been attached. We fall through to the less-reliable kill() method.
*/
}
/*
* Use kill() to see if the postmaster is still alive. This can sometimes
* give a false positive result, since the postmaster's PID may get
* recycled, but it is good enough for existing uses by indirect children
* and in debugging environments.
*/
return (kill(PostmasterPid, 0) == 0);
#else /* WIN32 */ #else /* WIN32 */
return (WaitForSingleObject(PostmasterHandle, 0) == WAIT_TIMEOUT); return (WaitForSingleObject(PostmasterHandle, 0) == WAIT_TIMEOUT);
#endif /* WIN32 */ #endif /* WIN32 */
......
...@@ -32,6 +32,14 @@ extern bool restart_after_crash; ...@@ -32,6 +32,14 @@ extern bool restart_after_crash;
#ifdef WIN32 #ifdef WIN32
extern HANDLE PostmasterHandle; extern HANDLE PostmasterHandle;
#else
extern int postmaster_alive_fds[2];
/*
* Constants that represent which of postmaster_alive_fds is held by
* postmaster, and which is used in children to check for postmaster death.
*/
#define POSTMASTER_FD_WATCH 0 /* used in children to check for postmaster death */
#define POSTMASTER_FD_OWN 1 /* kept open by postmaster only */
#endif #endif
extern const char *progname; extern const char *progname;
......
...@@ -31,6 +31,13 @@ typedef struct ...@@ -31,6 +31,13 @@ typedef struct
#endif #endif
} Latch; } Latch;
/* Bitmasks for events that may wake-up WaitLatch() clients */
#define WL_LATCH_SET (1 << 0)
#define WL_SOCKET_READABLE (1 << 1)
#define WL_SOCKET_WRITEABLE (1 << 2)
#define WL_TIMEOUT (1 << 3)
#define WL_POSTMASTER_DEATH (1 << 4)
/* /*
* prototypes for functions in latch.c * prototypes for functions in latch.c
*/ */
...@@ -38,9 +45,9 @@ extern void InitLatch(volatile Latch *latch); ...@@ -38,9 +45,9 @@ extern void InitLatch(volatile Latch *latch);
extern void InitSharedLatch(volatile Latch *latch); extern void InitSharedLatch(volatile Latch *latch);
extern void OwnLatch(volatile Latch *latch); extern void OwnLatch(volatile Latch *latch);
extern void DisownLatch(volatile Latch *latch); extern void DisownLatch(volatile Latch *latch);
extern bool WaitLatch(volatile Latch *latch, long timeout); extern int WaitLatch(volatile Latch *latch, int wakeEvents, long timeout);
extern int WaitLatchOrSocket(volatile Latch *latch, pgsocket sock, extern int WaitLatchOrSocket(volatile Latch *latch, int wakeEvents,
bool forRead, bool forWrite, long timeout); pgsocket sock, long timeout);
extern void SetLatch(volatile Latch *latch); extern void SetLatch(volatile Latch *latch);
extern void ResetLatch(volatile Latch *latch); extern void ResetLatch(volatile Latch *latch);
......
...@@ -50,6 +50,6 @@ extern bool IsPostmasterChildWalSender(int slot); ...@@ -50,6 +50,6 @@ extern bool IsPostmasterChildWalSender(int slot);
extern void MarkPostmasterChildActive(void); extern void MarkPostmasterChildActive(void);
extern void MarkPostmasterChildInactive(void); extern void MarkPostmasterChildInactive(void);
extern void MarkPostmasterChildWalSender(void); extern void MarkPostmasterChildWalSender(void);
extern bool PostmasterIsAlive(bool amDirectChild); extern bool PostmasterIsAlive(void);
#endif /* PMSIGNAL_H */ #endif /* PMSIGNAL_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment