Commit 3e7fdcff authored by Tom Lane's avatar Tom Lane

Fix WaitLatch() to return promptly when the requested timeout expires.

If the sleep is interrupted by a signal, we must recompute the remaining
time to wait; otherwise, a steady stream of non-wait-terminating interrupts
could delay return from WaitLatch indefinitely.  This has been shown to be
a problem for the autovacuum launcher, and there may well be other places
now or in the future with similar issues.  So we'd better make the function
robust, even though this'll add at least one gettimeofday call per wait.

Back-patch to 9.2.  We might eventually need to fix 9.1 as well, but the
code is quite different there, and the usage of WaitLatch in 9.1 is so
limited that it's not clearly important to do so.

Reported and diagnosed by Jeff Janes, though I rewrote his patch rather
heavily.
parent dcc55dd2
......@@ -48,6 +48,7 @@
#endif
#include "miscadmin.h"
#include "portability/instr_time.h"
#include "postmaster/postmaster.h"
#include "storage/latch.h"
#include "storage/pmsignal.h"
......@@ -176,12 +177,8 @@ DisownLatch(volatile Latch *latch)
* function returns immediately.
*
* The 'timeout' is given in milliseconds. It must be >= 0 if WL_TIMEOUT flag
* is given. On some platforms, signals do not interrupt the wait, or even
* cause the timeout to be restarted, so beware that the function can sleep
* for several times longer than the requested timeout. However, this
* difficulty is not so great as it seems, because the signal handlers for any
* signals that the caller should respond to ought to be programmed to end the
* wait by calling SetLatch. Ideally, the timeout parameter is vestigial.
* is given. Note that some extra overhead is incurred when WL_TIMEOUT is
* given, so avoid using a timeout if possible.
*
* The latch must be owned by the current process, ie. it must be a
* backend-local latch initialized with InitLatch, or a shared latch
......@@ -211,13 +208,16 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
{
int result = 0;
int rc;
instr_time start_time,
cur_time;
long cur_timeout;
#ifdef HAVE_POLL
struct pollfd pfds[3];
int nfds;
#else
struct timeval tv,
*tvp = NULL;
*tvp;
fd_set input_mask;
fd_set output_mask;
int hifd;
......@@ -234,21 +234,30 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
if ((wakeEvents & WL_LATCH_SET) && latch->owner_pid != MyProcPid)
elog(ERROR, "cannot wait on a latch owned by another process");
/* Initialize timeout */
/*
* Initialize timeout if requested. We must record the current time so
* that we can determine the remaining timeout if the poll() or select()
* is interrupted. (On some platforms, select() will update the contents
* of "tv" for us, but unfortunately we can't rely on that.)
*/
if (wakeEvents & WL_TIMEOUT)
{
INSTR_TIME_SET_CURRENT(start_time);
Assert(timeout >= 0);
cur_timeout = timeout;
#ifndef HAVE_POLL
tv.tv_sec = timeout / 1000L;
tv.tv_usec = (timeout % 1000L) * 1000L;
tv.tv_sec = cur_timeout / 1000L;
tv.tv_usec = (cur_timeout % 1000L) * 1000L;
tvp = &tv;
#endif
}
else
{
#ifdef HAVE_POLL
/* make sure poll() agrees there is no timeout */
timeout = -1;
cur_timeout = -1;
#ifndef HAVE_POLL
tvp = NULL;
#endif
}
......@@ -311,54 +320,62 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
}
/* Sleep */
rc = poll(pfds, nfds, (int) timeout);
rc = poll(pfds, nfds, (int) cur_timeout);
/* Check return code */
if (rc < 0)
{
if (errno == EINTR)
continue;
waiting = false;
ereport(ERROR,
(errcode_for_socket_access(),
errmsg("poll() failed: %m")));
/* EINTR is okay, otherwise complain */
if (errno != EINTR)
{
waiting = false;
ereport(ERROR,
(errcode_for_socket_access(),
errmsg("poll() failed: %m")));
}
}
if (rc == 0 && (wakeEvents & WL_TIMEOUT))
else if (rc == 0)
{
/* timeout exceeded */
result |= WL_TIMEOUT;
if (wakeEvents & WL_TIMEOUT)
result |= WL_TIMEOUT;
}
if ((wakeEvents & WL_SOCKET_READABLE) &&
(pfds[0].revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL)))
else
{
/* data available in socket, or EOF/error condition */
result |= WL_SOCKET_READABLE;
}
if ((wakeEvents & WL_SOCKET_WRITEABLE) &&
(pfds[0].revents & POLLOUT))
{
result |= WL_SOCKET_WRITEABLE;
}
/* at least one event occurred, so check revents values */
if ((wakeEvents & WL_SOCKET_READABLE) &&
(pfds[0].revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL)))
{
/* data available in socket, or EOF/error condition */
result |= WL_SOCKET_READABLE;
}
if ((wakeEvents & WL_SOCKET_WRITEABLE) &&
(pfds[0].revents & POLLOUT))
{
result |= WL_SOCKET_WRITEABLE;
}
/*
* We expect a POLLHUP when the remote end is closed, but because we
* don't expect the pipe to become readable or to have any errors
* either, treat those as postmaster death, too.
*/
if ((wakeEvents & WL_POSTMASTER_DEATH) &&
(pfds[nfds - 1].revents & (POLLHUP | POLLIN | POLLERR | POLLNVAL)))
{
/*
* According to the select(2) man page on Linux, select(2) may
* spuriously return and report a file descriptor as readable,
* when it's not; and presumably so can poll(2). It's not clear
* that the relevant cases would ever apply to the postmaster
* pipe, but since the consequences of falsely returning
* WL_POSTMASTER_DEATH could be pretty unpleasant, we take the
* trouble to positively verify EOF with PostmasterIsAlive().
* We expect a POLLHUP when the remote end is closed, but because
* we don't expect the pipe to become readable or to have any
* errors either, treat those cases as postmaster death, too.
*/
if (!PostmasterIsAlive())
result |= WL_POSTMASTER_DEATH;
if ((wakeEvents & WL_POSTMASTER_DEATH) &&
(pfds[nfds - 1].revents & (POLLHUP | POLLIN | POLLERR | POLLNVAL)))
{
/*
* According to the select(2) man page on Linux, select(2) may
* spuriously return and report a file descriptor as readable,
* when it's not; and presumably so can poll(2). It's not
* clear that the relevant cases would ever apply to the
* postmaster pipe, but since the consequences of falsely
* returning WL_POSTMASTER_DEATH could be pretty unpleasant,
* we take the trouble to positively verify EOF with
* PostmasterIsAlive().
*/
if (!PostmasterIsAlive())
result |= WL_POSTMASTER_DEATH;
}
}
#else /* !HAVE_POLL */
......@@ -395,43 +412,66 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
/* Check return code */
if (rc < 0)
{
if (errno == EINTR)
continue;
waiting = false;
ereport(ERROR,
(errcode_for_socket_access(),
errmsg("select() failed: %m")));
/* EINTR is okay, otherwise complain */
if (errno != EINTR)
{
waiting = false;
ereport(ERROR,
(errcode_for_socket_access(),
errmsg("select() failed: %m")));
}
}
if (rc == 0 && (wakeEvents & WL_TIMEOUT))
else if (rc == 0)
{
/* timeout exceeded */
result |= WL_TIMEOUT;
}
if ((wakeEvents & WL_SOCKET_READABLE) && FD_ISSET(sock, &input_mask))
{
/* data available in socket, or EOF */
result |= WL_SOCKET_READABLE;
if (wakeEvents & WL_TIMEOUT)
result |= WL_TIMEOUT;
}
if ((wakeEvents & WL_SOCKET_WRITEABLE) && FD_ISSET(sock, &output_mask))
else
{
result |= WL_SOCKET_WRITEABLE;
}
if ((wakeEvents & WL_POSTMASTER_DEATH) &&
/* at least one event occurred, so check masks */
if ((wakeEvents & WL_SOCKET_READABLE) && FD_ISSET(sock, &input_mask))
{
/* data available in socket, or EOF */
result |= WL_SOCKET_READABLE;
}
if ((wakeEvents & WL_SOCKET_WRITEABLE) && FD_ISSET(sock, &output_mask))
{
result |= WL_SOCKET_WRITEABLE;
}
if ((wakeEvents & WL_POSTMASTER_DEATH) &&
FD_ISSET(postmaster_alive_fds[POSTMASTER_FD_WATCH], &input_mask))
{
/*
* According to the select(2) man page on Linux, select(2) may
* spuriously return and report a file descriptor as readable,
* when it's not; and presumably so can poll(2). It's not clear
* that the relevant cases would ever apply to the postmaster
* pipe, but since the consequences of falsely returning
* WL_POSTMASTER_DEATH could be pretty unpleasant, we take the
* trouble to positively verify EOF with PostmasterIsAlive().
*/
if (!PostmasterIsAlive())
result |= WL_POSTMASTER_DEATH;
{
/*
* According to the select(2) man page on Linux, select(2) may
* spuriously return and report a file descriptor as readable,
* when it's not; and presumably so can poll(2). It's not
* clear that the relevant cases would ever apply to the
* postmaster pipe, but since the consequences of falsely
* returning WL_POSTMASTER_DEATH could be pretty unpleasant,
* we take the trouble to positively verify EOF with
* PostmasterIsAlive().
*/
if (!PostmasterIsAlive())
result |= WL_POSTMASTER_DEATH;
}
}
#endif /* HAVE_POLL */
/* If we're not done, update cur_timeout for next iteration */
if (result == 0 && cur_timeout >= 0)
{
INSTR_TIME_SET_CURRENT(cur_time);
INSTR_TIME_SUBTRACT(cur_time, start_time);
cur_timeout = timeout - (long) INSTR_TIME_GET_MILLISEC(cur_time);
if (cur_timeout < 0)
cur_timeout = 0;
#ifndef HAVE_POLL
tv.tv_sec = cur_timeout / 1000L;
tv.tv_usec = (cur_timeout % 1000L) * 1000L;
#endif
}
} while (result == 0);
waiting = false;
......
......@@ -24,6 +24,7 @@
#include <unistd.h>
#include "miscadmin.h"
#include "portability/instr_time.h"
#include "postmaster/postmaster.h"
#include "storage/latch.h"
#include "storage/pmsignal.h"
......@@ -100,6 +101,9 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
long timeout)
{
DWORD rc;
instr_time start_time,
cur_time;
long cur_timeout;
HANDLE events[4];
HANDLE latchevent;
HANDLE sockevent = WSA_INVALID_EVENT;
......@@ -118,11 +122,19 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
if ((wakeEvents & WL_LATCH_SET) && latch->owner_pid != MyProcPid)
elog(ERROR, "cannot wait on a latch owned by another process");
/* Convert timeout to form used by WaitForMultipleObjects() */
/*
* Initialize timeout if requested. We must record the current time so
* that we can determine the remaining timeout if WaitForMultipleObjects
* is interrupted.
*/
if (wakeEvents & WL_TIMEOUT)
{
INSTR_TIME_SET_CURRENT(start_time);
Assert(timeout >= 0);
cur_timeout = timeout;
}
else
timeout = INFINITE;
cur_timeout = INFINITE;
/*
* Construct an array of event handles for WaitforMultipleObjects().
......@@ -187,7 +199,7 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
break;
}
rc = WaitForMultipleObjects(numevents, events, FALSE, timeout);
rc = WaitForMultipleObjects(numevents, events, FALSE, cur_timeout);
if (rc == WAIT_FAILED)
elog(ERROR, "WaitForMultipleObjects() failed: error code %lu",
......@@ -203,7 +215,11 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
}
else if (rc == WAIT_OBJECT_0 + 1)
{
/* Latch is set, we'll handle that on next iteration of loop */
/*
* Latch is set. We'll handle that on next iteration of loop, but
* let's not waste the cycles to update cur_timeout below.
*/
continue;
}
else if ((wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) &&
rc == WAIT_OBJECT_0 + 2) /* socket is at event slot 2 */
......@@ -240,8 +256,17 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
}
else
elog(ERROR, "unexpected return code from WaitForMultipleObjects(): %lu", rc);
}
while (result == 0);
/* If we're not done, update cur_timeout for next iteration */
if (result == 0 && cur_timeout != INFINITE)
{
INSTR_TIME_SET_CURRENT(cur_time);
INSTR_TIME_SUBTRACT(cur_time, start_time);
cur_timeout = timeout - (long) INSTR_TIME_GET_MILLISEC(cur_time);
if (cur_timeout < 0)
cur_timeout = 0;
}
} while (result == 0);
/* Clean up the event object we created for the socket */
if (sockevent != WSA_INVALID_EVENT)
......
......@@ -33,8 +33,8 @@
* ResetLatch - Clears the latch, allowing it to be set again
* WaitLatch - Waits for the latch to become set
*
* WaitLatch includes a provision for timeouts (which should hopefully not
* be necessary once the code is fully latch-ified) and a provision for
* WaitLatch includes a provision for timeouts (which should be avoided
* when possible, as they incur extra overhead) and a provision for
* postmaster child processes to wake up immediately on postmaster death.
* See unix_latch.c for detailed specifications for the exported functions.
*
......@@ -64,14 +64,15 @@
* will be lifted in future by inserting suitable memory barriers into
* SetLatch and ResetLatch.
*
* On some platforms, signals will not interrupt the latch wait primitive
* by themselves. Therefore, it is critical that any signal handler that
* is meant to terminate a WaitLatch wait calls SetLatch.
*
* Note that use of the process latch (PGPROC.procLatch) is generally better
* than an ad-hoc shared latch for signaling auxiliary processes. This is
* because generic signal handlers will call SetLatch on the process latch
* only, so using any latch other than the process latch effectively precludes
* ever registering a generic handler. Since signals have the potential to
* invalidate the latch timeout on some platforms, resulting in a
* denial-of-service, it is important to verify that all signal handlers
* within all WaitLatch-calling processes call SetLatch.
* use of any generic handler.
*
*
* Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment