Commit 4bad60e3 authored by Andres Freund's avatar Andres Freund

Allow latches to wait for socket writability without waiting for readability.

So far WaitLatchOrSocket() required to pass in WL_SOCKET_READABLE as
that solely was used to indicate error conditions, like EOF. Waiting
for WL_SOCKET_WRITEABLE would have meant to busy wait upon socket
errors.

Adjust the API to signal errors by returning the socket as readable,
writable or both, depending on WL_SOCKET_READABLE/WL_SOCKET_WRITEABLE
being specified.  It would arguably be nicer to return WL_SOCKET_ERROR
but that's not possible on platforms and would probably also result in
more complex callsites.

This previously had explicitly been forbidden in e42a21b9, as
there was no strong use case at that point. We now are looking into
making FE/BE communication use latches, so changing this makes sense.

There also are some portability concerns because there cases of older
platforms where select(2) is known to, in violation of POSIX, not
return a socket as writable after the peer has closed it.  So far the
platforms where that's the case provide a working poll(2). If we find
one where that's not the case, we'll need to add a workaround for that
platform.

Discussion: 20140927191243.GD5423@alap3.anarazel.de
Reviewed-By: Heikki Linnakangas, Noah Misch
parent 3dfce376
......@@ -200,9 +200,9 @@ WaitLatch(volatile Latch *latch, int wakeEvents, long timeout)
* Like WaitLatch, but with an extra socket argument for WL_SOCKET_*
* conditions.
*
* When waiting on a socket, WL_SOCKET_READABLE *must* be included in
* 'wakeEvents'; WL_SOCKET_WRITEABLE is optional. The reason for this is
* that EOF and error conditions are reported only via WL_SOCKET_READABLE.
* When waiting on a socket, EOF and error conditions are reported by
* returning the socket as readable/writable or both, depending on
* WL_SOCKET_READABLE/WL_SOCKET_WRITEABLE being specified.
*/
int
WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
......@@ -230,8 +230,6 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
wakeEvents &= ~(WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE);
Assert(wakeEvents != 0); /* must have at least one wake event */
/* Cannot specify WL_SOCKET_WRITEABLE without WL_SOCKET_READABLE */
Assert((wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) != WL_SOCKET_WRITEABLE);
if ((wakeEvents & WL_LATCH_SET) && latch->owner_pid != MyProcPid)
elog(ERROR, "cannot wait on a latch owned by another process");
......@@ -291,7 +289,16 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
break;
}
/* Must wait ... we use poll(2) if available, otherwise select(2) */
/*
* Must wait ... we use poll(2) if available, otherwise select(2).
*
* On at least older linux kernels select(), in violation of POSIX,
* doesn't reliably return a socket as writable if closed - but we
* rely on that. So far all the known cases of this problem are on
* platforms that also provide a poll() implementation without that
* bug. If we find one where that's not the case, we'll need to add a
* workaround.
*/
#ifdef HAVE_POLL
nfds = 0;
if (wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
......@@ -346,7 +353,7 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
{
/* at least one event occurred, so check revents values */
if ((wakeEvents & WL_SOCKET_READABLE) &&
(pfds[0].revents & (POLLIN | POLLHUP | POLLERR | POLLNVAL)))
(pfds[0].revents & POLLIN))
{
/* data available in socket, or EOF/error condition */
result |= WL_SOCKET_READABLE;
......@@ -354,8 +361,17 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
if ((wakeEvents & WL_SOCKET_WRITEABLE) &&
(pfds[0].revents & POLLOUT))
{
/* socket is writable */
result |= WL_SOCKET_WRITEABLE;
}
if (pfds[0].revents & (POLLHUP | POLLERR | POLLNVAL))
{
/* EOF/error condition */
if (wakeEvents & WL_SOCKET_READABLE)
result |= WL_SOCKET_READABLE;
if (wakeEvents & WL_SOCKET_WRITEABLE)
result |= WL_SOCKET_WRITEABLE;
}
/*
* We expect a POLLHUP when the remote end is closed, but because
......@@ -439,6 +455,7 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
}
if ((wakeEvents & WL_SOCKET_WRITEABLE) && FD_ISSET(sock, &output_mask))
{
/* socket is writable, or EOF */
result |= WL_SOCKET_WRITEABLE;
}
if ((wakeEvents & WL_POSTMASTER_DEATH) &&
......
......@@ -117,8 +117,6 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
wakeEvents &= ~(WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE);
Assert(wakeEvents != 0); /* must have at least one wake event */
/* Cannot specify WL_SOCKET_WRITEABLE without WL_SOCKET_READABLE */
Assert((wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE)) != WL_SOCKET_WRITEABLE);
if ((wakeEvents & WL_LATCH_SET) && latch->owner_pid != MyProcPid)
elog(ERROR, "cannot wait on a latch owned by another process");
......@@ -152,10 +150,10 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
if (wakeEvents & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
{
/* Need an event object to represent events on the socket */
int flags = 0;
int flags = FD_CLOSE; /* always check for errors/EOF */
if (wakeEvents & WL_SOCKET_READABLE)
flags |= (FD_READ | FD_CLOSE);
flags |= FD_READ;
if (wakeEvents & WL_SOCKET_WRITEABLE)
flags |= FD_WRITE;
......@@ -232,7 +230,7 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
elog(ERROR, "failed to enumerate network events: error code %u",
WSAGetLastError());
if ((wakeEvents & WL_SOCKET_READABLE) &&
(resEvents.lNetworkEvents & (FD_READ | FD_CLOSE)))
(resEvents.lNetworkEvents & FD_READ))
{
result |= WL_SOCKET_READABLE;
}
......@@ -241,6 +239,13 @@ WaitLatchOrSocket(volatile Latch *latch, int wakeEvents, pgsocket sock,
{
result |= WL_SOCKET_WRITEABLE;
}
if (resEvents.lNetworkEvents & FD_CLOSE)
{
if (wakeEvents & WL_SOCKET_READABLE)
result |= WL_SOCKET_READABLE;
if (wakeEvents & WL_SOCKET_WRITEABLE)
result |= WL_SOCKET_WRITEABLE;
}
}
else if ((wakeEvents & WL_POSTMASTER_DEATH) &&
rc == WAIT_OBJECT_0 + pmdeath_eventno)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment