Commit 3e7fdcff authored by Tom Lane's avatar Tom Lane

Fix WaitLatch() to return promptly when the requested timeout expires.

If the sleep is interrupted by a signal, we must recompute the remaining
time to wait; otherwise, a steady stream of non-wait-terminating interrupts
could delay return from WaitLatch indefinitely.  This has been shown to be
a problem for the autovacuum launcher, and there may well be other places
now or in the future with similar issues.  So we'd better make the function
robust, even though this'll add at least one gettimeofday call per wait.

Back-patch to 9.2.  We might eventually need to fix 9.1 as well, but the
code is quite different there, and the usage of WaitLatch in 9.1 is so
limited that it's not clearly important to do so.

Reported and diagnosed by Jeff Janes, though I rewrote his patch rather
heavily.
parent dcc55dd2
...@@ -48,6 +48,7 @@ ...@@ -48,6 +48,7 @@
#endif #endif
#include "miscadmin.h" #include "miscadmin.h"
#include "portability/instr_time.h"
#include "postmaster/postmaster.h" #include "postmaster/postmaster.h"
#include "storage/latch.h" #include "storage/latch.h"
#include "storage/pmsignal.h" #include "storage/pmsignal.h"
...@@ -176,12 +177,8 @@ DisownLatch(volatile Latch *latch) ...@@ -176,12 +177,8 @@ DisownLatch(volatile Latch *latch)
* function returns immediately. * function returns immediately.
* *
* The 'timeout' is given in milliseconds. It must be >= 0 if WL_TIMEOUT flag * The 'timeout' is given in milliseconds. It must be >= 0 if WL_TIMEOUT flag
* is given. On some platforms, signals do not interrupt the wait, or even * is given. Note that some extra overhead is incurred when WL_TIMEOUT is
* cause the timeout to be restarted, so beware that the function can sleep * given, so avoid using a timeout if possible.
* for several times longer than the requested timeout. However, this
* difficulty is not so great as it seems, because the signal handlers for any
* signals that the caller should respond to ought to be programmed to end the
* wait by calling SetLatch. Ideally, the timeout parameter is vestigial.
* *
* The latch must be owned by the current process, ie. it must be a * The latch must be owned by the current process, ie. it must be a
* backend-local latch initialized with InitLatch, or a shared latch * backend-local latch initialized with InitLatch, or a shared latch
...@@ -211,13 +208,16 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, ...@@ -211,13 +208,16 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
{ {
int result = 0; int result = 0;
int rc; int rc;
instr_time start_time,
cur_time;
long cur_timeout;
#ifdef HAVE_POLL #ifdef HAVE_POLL
struct pollfd pfds[3]; struct pollfd pfds[3];
int nfds; int nfds;
#else #else
struct timeval tv, struct timeval tv,
*tvp = NULL; *tvp;
fd_set input_mask; fd_set input_mask;
fd_set output_mask; fd_set output_mask;
int hifd; int hifd;
...@@ -234,21 +234,30 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, ...@@ -234,21 +234,30 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
if ((wakeEvents & WL_LATCH_SET) && latch->owner_pid != MyProcPid) if ((wakeEvents & WL_LATCH_SET) && latch->owner_pid != MyProcPid)
elog(ERROR, "cannot wait on a latch owned by another process"); elog(ERROR, "cannot wait on a latch owned by another process");
/* Initialize timeout */ /*
* Initialize timeout if requested. We must record the current time so
* that we can determine the remaining timeout if the poll() or select()
* is interrupted. (On some platforms, select() will update the contents
* of "tv" for us, but unfortunately we can't rely on that.)
*/
if (wakeEvents & WL_TIMEOUT) if (wakeEvents & WL_TIMEOUT)
{ {
INSTR_TIME_SET_CURRENT(start_time);
Assert(timeout >= 0); Assert(timeout >= 0);
cur_timeout = timeout;
#ifndef HAVE_POLL #ifndef HAVE_POLL
tv.tv_sec = timeout / 1000L; tv.tv_sec = cur_timeout / 1000L;
tv.tv_usec = (timeout % 1000L) * 1000L; tv.tv_usec = (cur_timeout % 1000L) * 1000L;
tvp = &tv; tvp = &tv;
#endif #endif
} }
else else
{ {
#ifdef HAVE_POLL cur_timeout = -1;
/* make sure poll() agrees there is no timeout */
timeout = -1; #ifndef HAVE_POLL
tvp = NULL;
#endif #endif
} }
...@@ -311,23 +320,29 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, ...@@ -311,23 +320,29 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
} }
/* Sleep */ /* Sleep */
rc = poll(pfds, nfds, (int) timeout); rc = poll(pfds, nfds, (int) cur_timeout);
/* Check return code */ /* Check return code */
if (rc < 0) if (rc < 0)
{ {
if (errno == EINTR) /* EINTR is okay, otherwise complain */
continue; if (errno != EINTR)
{
waiting = false; waiting = false;
ereport(ERROR, ereport(ERROR,
(errcode_for_socket_access(), (errcode_for_socket_access(),
errmsg("poll() failed: %m"))); errmsg("poll() failed: %m")));
} }
if (rc == 0 && (wakeEvents & WL_TIMEOUT)) }
else if (rc == 0)
{ {
/* timeout exceeded */ /* timeout exceeded */
if (wakeEvents & WL_TIMEOUT)
result |= WL_TIMEOUT; result |= WL_TIMEOUT;
} }
else
{
/* at least one event occurred, so check revents values */
if ((wakeEvents & WL_SOCKET_READABLE) && if ((wakeEvents & WL_SOCKET_READABLE) &&
(pfds[0].revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL))) (pfds[0].revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL)))
{ {
...@@ -341,9 +356,9 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, ...@@ -341,9 +356,9 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
} }
/* /*
* We expect a POLLHUP when the remote end is closed, but because we * We expect a POLLHUP when the remote end is closed, but because
* don't expect the pipe to become readable or to have any errors * we don't expect the pipe to become readable or to have any
* either, treat those as postmaster death, too. * errors either, treat those cases as postmaster death, too.
*/ */
if ((wakeEvents & WL_POSTMASTER_DEATH) && if ((wakeEvents & WL_POSTMASTER_DEATH) &&
(pfds[nfds - 1].revents & (POLLHUP | POLLIN | POLLERR | POLLNVAL))) (pfds[nfds - 1].revents & (POLLHUP | POLLIN | POLLERR | POLLNVAL)))
...@@ -351,15 +366,17 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, ...@@ -351,15 +366,17 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
/* /*
* According to the select(2) man page on Linux, select(2) may * According to the select(2) man page on Linux, select(2) may
* spuriously return and report a file descriptor as readable, * spuriously return and report a file descriptor as readable,
* when it's not; and presumably so can poll(2). It's not clear * when it's not; and presumably so can poll(2). It's not
* that the relevant cases would ever apply to the postmaster * clear that the relevant cases would ever apply to the
* pipe, but since the consequences of falsely returning * postmaster pipe, but since the consequences of falsely
* WL_POSTMASTER_DEATH could be pretty unpleasant, we take the * returning WL_POSTMASTER_DEATH could be pretty unpleasant,
* trouble to positively verify EOF with PostmasterIsAlive(). * we take the trouble to positively verify EOF with
* PostmasterIsAlive().
*/ */
if (!PostmasterIsAlive()) if (!PostmasterIsAlive())
result |= WL_POSTMASTER_DEATH; result |= WL_POSTMASTER_DEATH;
} }
}
#else /* !HAVE_POLL */ #else /* !HAVE_POLL */
FD_ZERO(&input_mask); FD_ZERO(&input_mask);
...@@ -395,18 +412,24 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, ...@@ -395,18 +412,24 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
/* Check return code */ /* Check return code */
if (rc < 0) if (rc < 0)
{ {
if (errno == EINTR) /* EINTR is okay, otherwise complain */
continue; if (errno != EINTR)
{
waiting = false; waiting = false;
ereport(ERROR, ereport(ERROR,
(errcode_for_socket_access(), (errcode_for_socket_access(),
errmsg("select() failed: %m"))); errmsg("select() failed: %m")));
} }
if (rc == 0 && (wakeEvents & WL_TIMEOUT)) }
else if (rc == 0)
{ {
/* timeout exceeded */ /* timeout exceeded */
if (wakeEvents & WL_TIMEOUT)
result |= WL_TIMEOUT; result |= WL_TIMEOUT;
} }
else
{
/* at least one event occurred, so check masks */
if ((wakeEvents & WL_SOCKET_READABLE) && FD_ISSET(sock, &input_mask)) if ((wakeEvents & WL_SOCKET_READABLE) && FD_ISSET(sock, &input_mask))
{ {
/* data available in socket, or EOF */ /* data available in socket, or EOF */
...@@ -422,16 +445,33 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, ...@@ -422,16 +445,33 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
/* /*
* According to the select(2) man page on Linux, select(2) may * According to the select(2) man page on Linux, select(2) may
* spuriously return and report a file descriptor as readable, * spuriously return and report a file descriptor as readable,
* when it's not; and presumably so can poll(2). It's not clear * when it's not; and presumably so can poll(2). It's not
* that the relevant cases would ever apply to the postmaster * clear that the relevant cases would ever apply to the
* pipe, but since the consequences of falsely returning * postmaster pipe, but since the consequences of falsely
* WL_POSTMASTER_DEATH could be pretty unpleasant, we take the * returning WL_POSTMASTER_DEATH could be pretty unpleasant,
* trouble to positively verify EOF with PostmasterIsAlive(). * we take the trouble to positively verify EOF with
* PostmasterIsAlive().
*/ */
if (!PostmasterIsAlive()) if (!PostmasterIsAlive())
result |= WL_POSTMASTER_DEATH; result |= WL_POSTMASTER_DEATH;
} }
}
#endif /* HAVE_POLL */ #endif /* HAVE_POLL */
/* If we're not done, update cur_timeout for next iteration */
if (result == 0 && cur_timeout >= 0)
{
INSTR_TIME_SET_CURRENT(cur_time);
INSTR_TIME_SUBTRACT(cur_time, start_time);
cur_timeout = timeout - (long) INSTR_TIME_GET_MILLISEC(cur_time);
if (cur_timeout < 0)
cur_timeout = 0;
#ifndef HAVE_POLL
tv.tv_sec = cur_timeout / 1000L;
tv.tv_usec = (cur_timeout % 1000L) * 1000L;
#endif
}
} while (result == 0); } while (result == 0);
waiting = false; waiting = false;
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <unistd.h> #include <unistd.h>
#include "miscadmin.h" #include "miscadmin.h"
#include "portability/instr_time.h"
#include "postmaster/postmaster.h" #include "postmaster/postmaster.h"
#include "storage/latch.h" #include "storage/latch.h"
#include "storage/pmsignal.h" #include "storage/pmsignal.h"
...@@ -100,6 +101,9 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, ...@@ -100,6 +101,9 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
long timeout) long timeout)
{ {
DWORD rc; DWORD rc;
instr_time start_time,
cur_time;
long cur_timeout;
HANDLE events[4]; HANDLE events[4];
HANDLE latchevent; HANDLE latchevent;
HANDLE sockevent = WSA_INVALID_EVENT; HANDLE sockevent = WSA_INVALID_EVENT;
...@@ -118,11 +122,19 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, ...@@ -118,11 +122,19 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
if ((wakeEvents & WL_LATCH_SET) && latch->owner_pid != MyProcPid) if ((wakeEvents & WL_LATCH_SET) && latch->owner_pid != MyProcPid)
elog(ERROR, "cannot wait on a latch owned by another process"); elog(ERROR, "cannot wait on a latch owned by another process");
/* Convert timeout to form used by WaitForMultipleObjects() */ /*
* Initialize timeout if requested. We must record the current time so
* that we can determine the remaining timeout if WaitForMultipleObjects
* is interrupted.
*/
if (wakeEvents & WL_TIMEOUT) if (wakeEvents & WL_TIMEOUT)
{
INSTR_TIME_SET_CURRENT(start_time);
Assert(timeout >= 0); Assert(timeout >= 0);
cur_timeout = timeout;
}
else else
timeout = INFINITE; cur_timeout = INFINITE;
/* /*
* Construct an array of event handles for WaitforMultipleObjects(). * Construct an array of event handles for WaitforMultipleObjects().
...@@ -187,7 +199,7 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, ...@@ -187,7 +199,7 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
break; break;
} }
rc = WaitForMultipleObjects(numevents, events, FALSE, timeout); rc = WaitForMultipleObjects(numevents, events, FALSE, cur_timeout);
if (rc == WAIT_FAILED) if (rc == WAIT_FAILED)
elog(ERROR, "WaitForMultipleObjects() failed: error code %lu", elog(ERROR, "WaitForMultipleObjects() failed: error code %lu",
...@@ -203,7 +215,11 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, ...@@ -203,7 +215,11 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
} }
else if (rc == WAIT_OBJECT_0 + 1) else if (rc == WAIT_OBJECT_0 + 1)
{ {
/* Latch is set, we'll handle that on next iteration of loop */ /*
* Latch is set. We'll handle that on next iteration of loop, but
* let's not waste the cycles to update cur_timeout below.
*/
continue;
} }
else if ((wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) && else if ((wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) &&
rc == WAIT_OBJECT_0 + 2) /* socket is at event slot 2 */ rc == WAIT_OBJECT_0 + 2) /* socket is at event slot 2 */
...@@ -240,8 +256,17 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock, ...@@ -240,8 +256,17 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
} }
else else
elog(ERROR, "unexpected return code from WaitForMultipleObjects(): %lu", rc); elog(ERROR, "unexpected return code from WaitForMultipleObjects(): %lu", rc);
/* If we're not done, update cur_timeout for next iteration */
if (result == 0 && cur_timeout != INFINITE)
{
INSTR_TIME_SET_CURRENT(cur_time);
INSTR_TIME_SUBTRACT(cur_time, start_time);
cur_timeout = timeout - (long) INSTR_TIME_GET_MILLISEC(cur_time);
if (cur_timeout < 0)
cur_timeout = 0;
} }
while (result == 0); } while (result == 0);
/* Clean up the event object we created for the socket */ /* Clean up the event object we created for the socket */
if (sockevent != WSA_INVALID_EVENT) if (sockevent != WSA_INVALID_EVENT)
......
...@@ -33,8 +33,8 @@ ...@@ -33,8 +33,8 @@
* ResetLatch - Clears the latch, allowing it to be set again * ResetLatch - Clears the latch, allowing it to be set again
* WaitLatch - Waits for the latch to become set * WaitLatch - Waits for the latch to become set
* *
* WaitLatch includes a provision for timeouts (which should hopefully not * WaitLatch includes a provision for timeouts (which should be avoided
* be necessary once the code is fully latch-ified) and a provision for * when possible, as they incur extra overhead) and a provision for
* postmaster child processes to wake up immediately on postmaster death. * postmaster child processes to wake up immediately on postmaster death.
* See unix_latch.c for detailed specifications for the exported functions. * See unix_latch.c for detailed specifications for the exported functions.
* *
...@@ -64,14 +64,15 @@ ...@@ -64,14 +64,15 @@
* will be lifted in future by inserting suitable memory barriers into * will be lifted in future by inserting suitable memory barriers into
* SetLatch and ResetLatch. * SetLatch and ResetLatch.
* *
* On some platforms, signals will not interrupt the latch wait primitive
* by themselves. Therefore, it is critical that any signal handler that
* is meant to terminate a WaitLatch wait calls SetLatch.
*
* Note that use of the process latch (PGPROC.procLatch) is generally better * Note that use of the process latch (PGPROC.procLatch) is generally better
* than an ad-hoc shared latch for signaling auxiliary processes. This is * than an ad-hoc shared latch for signaling auxiliary processes. This is
* because generic signal handlers will call SetLatch on the process latch * because generic signal handlers will call SetLatch on the process latch
* only, so using any latch other than the process latch effectively precludes * only, so using any latch other than the process latch effectively precludes
* ever registering a generic handler. Since signals have the potential to * use of any generic handler.
* invalidate the latch timeout on some platforms, resulting in a
* denial-of-service, it is important to verify that all signal handlers
* within all WaitLatch-calling processes call SetLatch.
* *
* *
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment