Commit 61c21dda authored by Andres Freund's avatar Andres Freund

Remove select(2) backed latch implementation.

poll(2) is required by Single Unix Spec v2, the usual baseline for
postgres (leaving windows aside).  There's not been any buildfarm
animals without poll(2) for a long while, leaving the select(2)
implementation to be largely untested.

On windows, including mingw, poll() is not available, but we have a
special case implementation for windows anyway.

Author: Andres Freund
Discussion: https://postgr.es/m/20170420003611.7r2sdvehesdyiz2i@alap3.anarazel.de
parent 546c13e1
...@@ -3,27 +3,24 @@ ...@@ -3,27 +3,24 @@
* latch.c * latch.c
* Routines for inter-process latches * Routines for inter-process latches
* *
* The Unix implementation uses the so-called self-pipe trick to overcome * The Unix implementation uses the so-called self-pipe trick to overcome the
* the race condition involved with select() and setting a global flag * race condition involved with poll() (or epoll_wait() on linux) and setting
* in the signal handler. When a latch is set and the current process * a global flag in the signal handler. When a latch is set and the current
* is waiting for it, the signal handler wakes up the select() in * process is waiting for it, the signal handler wakes up the poll() in
* WaitLatch by writing a byte to a pipe. A signal by itself doesn't * WaitLatch by writing a byte to a pipe. A signal by itself doesn't interrupt
* interrupt select() on all platforms, and even on platforms where it * poll() on all platforms, and even on platforms where it does, a signal that
* does, a signal that arrives just before the select() call does not * arrives just before the poll() call does not prevent poll() from entering
* prevent the select() from entering sleep. An incoming byte on a pipe * sleep. An incoming byte on a pipe however reliably interrupts the sleep,
* however reliably interrupts the sleep, and causes select() to return * and causes poll() to return immediately even if the signal arrives before
* immediately even if the signal arrives before select() begins. * poll() begins.
*
* (Actually, we prefer epoll_wait() over poll() over select() where
* available, but the same comments apply.)
* *
* When SetLatch is called from the same process that owns the latch, * When SetLatch is called from the same process that owns the latch,
* SetLatch writes the byte directly to the pipe. If it's owned by another * SetLatch writes the byte directly to the pipe. If it's owned by another
* process, SIGUSR1 is sent and the signal handler in the waiting process * process, SIGUSR1 is sent and the signal handler in the waiting process
* writes the byte to the pipe on behalf of the signaling process. * writes the byte to the pipe on behalf of the signaling process.
* *
* The Windows implementation uses Windows events that are inherited by * The Windows implementation uses Windows events that are inherited by all
* all postmaster child processes. * postmaster child processes. There's no need for the self-pipe trick there.
* *
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
...@@ -39,7 +36,6 @@ ...@@ -39,7 +36,6 @@
#include <limits.h> #include <limits.h>
#include <signal.h> #include <signal.h>
#include <unistd.h> #include <unistd.h>
#include <sys/time.h>
#ifdef HAVE_SYS_EPOLL_H #ifdef HAVE_SYS_EPOLL_H
#include <sys/epoll.h> #include <sys/epoll.h>
#endif #endif
...@@ -49,9 +45,6 @@ ...@@ -49,9 +45,6 @@
#ifdef HAVE_SYS_POLL_H #ifdef HAVE_SYS_POLL_H
#include <sys/poll.h> #include <sys/poll.h>
#endif #endif
#ifdef HAVE_SYS_SELECT_H
#include <sys/select.h>
#endif
#include "miscadmin.h" #include "miscadmin.h"
#include "pgstat.h" #include "pgstat.h"
...@@ -69,14 +62,12 @@ ...@@ -69,14 +62,12 @@
* define somewhere before this block. * define somewhere before this block.
*/ */
#if defined(WAIT_USE_EPOLL) || defined(WAIT_USE_POLL) || \ #if defined(WAIT_USE_EPOLL) || defined(WAIT_USE_POLL) || \
defined(WAIT_USE_SELECT) || defined(WAIT_USE_WIN32) defined(WAIT_USE_WIN32)
/* don't overwrite manual choice */ /* don't overwrite manual choice */
#elif defined(HAVE_SYS_EPOLL_H) #elif defined(HAVE_SYS_EPOLL_H)
#define WAIT_USE_EPOLL #define WAIT_USE_EPOLL
#elif defined(HAVE_POLL) #elif defined(HAVE_POLL)
#define WAIT_USE_POLL #define WAIT_USE_POLL
#elif HAVE_SYS_SELECT_H
#define WAIT_USE_SELECT
#elif WIN32 #elif WIN32
#define WAIT_USE_WIN32 #define WAIT_USE_WIN32
#else #else
...@@ -162,8 +153,8 @@ InitializeLatchSupport(void) ...@@ -162,8 +153,8 @@ InitializeLatchSupport(void)
/* /*
* Set up the self-pipe that allows a signal handler to wake up the * Set up the self-pipe that allows a signal handler to wake up the
* select() in WaitLatch. Make the write-end non-blocking, so that * poll()/epoll_wait() in WaitLatch. Make the write-end non-blocking, so
* SetLatch won't block if the event has already been set many times * that SetLatch won't block if the event has already been set many times
* filling the kernel buffer. Make the read-end non-blocking too, so that * filling the kernel buffer. Make the read-end non-blocking too, so that
* we can easily clear the pipe by reading until EAGAIN or EWOULDBLOCK. * we can easily clear the pipe by reading until EAGAIN or EWOULDBLOCK.
*/ */
...@@ -401,8 +392,9 @@ SetLatch(volatile Latch *latch) ...@@ -401,8 +392,9 @@ SetLatch(volatile Latch *latch)
/* /*
* See if anyone's waiting for the latch. It can be the current process if * See if anyone's waiting for the latch. It can be the current process if
* we're in a signal handler. We use the self-pipe to wake up the select() * we're in a signal handler. We use the self-pipe to wake up the
* in that case. If it's another process, send a signal. * poll()/epoll_wait() in that case. If it's another process, send a
* signal.
* *
* Fetch owner_pid only once, in case the latch is concurrently getting * Fetch owner_pid only once, in case the latch is concurrently getting
* owned or disowned. XXX: This assumes that pid_t is atomic, which isn't * owned or disowned. XXX: This assumes that pid_t is atomic, which isn't
...@@ -666,8 +658,6 @@ AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch, ...@@ -666,8 +658,6 @@ AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch,
WaitEventAdjustEpoll(set, event, EPOLL_CTL_ADD); WaitEventAdjustEpoll(set, event, EPOLL_CTL_ADD);
#elif defined(WAIT_USE_POLL) #elif defined(WAIT_USE_POLL)
WaitEventAdjustPoll(set, event); WaitEventAdjustPoll(set, event);
#elif defined(WAIT_USE_SELECT)
/* nothing to do */
#elif defined(WAIT_USE_WIN32) #elif defined(WAIT_USE_WIN32)
WaitEventAdjustWin32(set, event); WaitEventAdjustWin32(set, event);
#endif #endif
...@@ -724,8 +714,6 @@ ModifyWaitEvent(WaitEventSet *set, int pos, uint32 events, Latch *latch) ...@@ -724,8 +714,6 @@ ModifyWaitEvent(WaitEventSet *set, int pos, uint32 events, Latch *latch)
WaitEventAdjustEpoll(set, event, EPOLL_CTL_MOD); WaitEventAdjustEpoll(set, event, EPOLL_CTL_MOD);
#elif defined(WAIT_USE_POLL) #elif defined(WAIT_USE_POLL)
WaitEventAdjustPoll(set, event); WaitEventAdjustPoll(set, event);
#elif defined(WAIT_USE_SELECT)
/* nothing to do */
#elif defined(WAIT_USE_WIN32) #elif defined(WAIT_USE_WIN32)
WaitEventAdjustWin32(set, event); WaitEventAdjustWin32(set, event);
#endif #endif
...@@ -1055,9 +1043,11 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout, ...@@ -1055,9 +1043,11 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
* because we don't expect the pipe to become readable or to have * because we don't expect the pipe to become readable or to have
* any errors either, treat those cases as postmaster death, too. * any errors either, treat those cases as postmaster death, too.
* *
* As explained in the WAIT_USE_SELECT implementation, select(2) * Be paranoid about a spurious event signalling the postmaster as
* may spuriously return. Be paranoid about that here too, a * being dead. There have been reports about that happening with
* spurious WL_POSTMASTER_DEATH would be painful. * older primitives (select(2) to be specific), and a spurious
* WL_POSTMASTER_DEATH event would be painful. Re-checking doesn't
* cost much.
*/ */
if (!PostmasterIsAlive()) if (!PostmasterIsAlive())
{ {
...@@ -1171,9 +1161,11 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout, ...@@ -1171,9 +1161,11 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
* we don't expect the pipe to become readable or to have any * we don't expect the pipe to become readable or to have any
* errors either, treat those cases as postmaster death, too. * errors either, treat those cases as postmaster death, too.
* *
* As explained in the WAIT_USE_SELECT implementation, select(2) * Be paranoid about a spurious event signalling the postmaster as
* may spuriously return. Be paranoid about that here too, a * being dead. There have been reports about that happening with
* spurious WL_POSTMASTER_DEATH would be painful. * older primitives (select(2) to be specific), and a spurious
* WL_POSTMASTER_DEATH event would be painful. Re-checking doesn't
* cost much.
*/ */
if (!PostmasterIsAlive()) if (!PostmasterIsAlive())
{ {
...@@ -1214,163 +1206,6 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout, ...@@ -1214,163 +1206,6 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
return returned_events; return returned_events;
} }
#elif defined(WAIT_USE_SELECT)
/*
* Wait using select(2).
*
* XXX: On at least older linux kernels select(), in violation of POSIX,
* doesn't reliably return a socket as writable if closed - but we rely on
* that. So far all the known cases of this problem are on platforms that also
* provide a poll() implementation without that bug. If we find one where
* that's not the case, we'll need to add a workaround.
*/
static inline int
WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
WaitEvent *occurred_events, int nevents)
{
int returned_events = 0;
int rc;
WaitEvent *cur_event;
fd_set input_mask;
fd_set output_mask;
int hifd;
struct timeval tv;
struct timeval *tvp = NULL;
FD_ZERO(&input_mask);
FD_ZERO(&output_mask);
/*
* Prepare input/output masks. We do so every loop iteration as there's no
* entirely portable way to copy fd_sets.
*/
for (cur_event = set->events;
cur_event < (set->events + set->nevents);
cur_event++)
{
if (cur_event->events == WL_LATCH_SET)
FD_SET(cur_event->fd, &input_mask);
else if (cur_event->events == WL_POSTMASTER_DEATH)
FD_SET(cur_event->fd, &input_mask);
else
{
Assert(cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE));
if (cur_event->events == WL_SOCKET_READABLE)
FD_SET(cur_event->fd, &input_mask);
else if (cur_event->events == WL_SOCKET_WRITEABLE)
FD_SET(cur_event->fd, &output_mask);
}
if (cur_event->fd > hifd)
hifd = cur_event->fd;
}
/* Sleep */
if (cur_timeout >= 0)
{
tv.tv_sec = cur_timeout / 1000L;
tv.tv_usec = (cur_timeout % 1000L) * 1000L;
tvp = &tv;
}
rc = select(hifd + 1, &input_mask, &output_mask, NULL, tvp);
/* Check return code */
if (rc < 0)
{
/* EINTR is okay, otherwise complain */
if (errno != EINTR)
{
waiting = false;
ereport(ERROR,
(errcode_for_socket_access(),
errmsg("select() failed: %m")));
}
return 0; /* retry */
}
else if (rc == 0)
{
/* timeout exceeded */
return -1;
}
/*
* To associate events with select's masks, we have to check the status of
* the file descriptors associated with an event; by looping through all
* events.
*/
for (cur_event = set->events;
cur_event < (set->events + set->nevents)
&& returned_events < nevents;
cur_event++)
{
occurred_events->pos = cur_event->pos;
occurred_events->user_data = cur_event->user_data;
occurred_events->events = 0;
if (cur_event->events == WL_LATCH_SET &&
FD_ISSET(cur_event->fd, &input_mask))
{
/* There's data in the self-pipe, clear it. */
drainSelfPipe();
if (set->latch->is_set)
{
occurred_events->fd = PGINVALID_SOCKET;
occurred_events->events = WL_LATCH_SET;
occurred_events++;
returned_events++;
}
}
else if (cur_event->events == WL_POSTMASTER_DEATH &&
FD_ISSET(cur_event->fd, &input_mask))
{
/*
* According to the select(2) man page on Linux, select(2) may
* spuriously return and report a file descriptor as readable,
* when it's not; and presumably so can poll(2). It's not clear
* that the relevant cases would ever apply to the postmaster
* pipe, but since the consequences of falsely returning
* WL_POSTMASTER_DEATH could be pretty unpleasant, we take the
* trouble to positively verify EOF with PostmasterIsAlive().
*/
if (!PostmasterIsAlive())
{
occurred_events->fd = PGINVALID_SOCKET;
occurred_events->events = WL_POSTMASTER_DEATH;
occurred_events++;
returned_events++;
}
}
else if (cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
{
Assert(cur_event->fd != PGINVALID_SOCKET);
if ((cur_event->events & WL_SOCKET_READABLE) &&
FD_ISSET(cur_event->fd, &input_mask))
{
/* data available in socket, or EOF */
occurred_events->events |= WL_SOCKET_READABLE;
}
if ((cur_event->events & WL_SOCKET_WRITEABLE) &&
FD_ISSET(cur_event->fd, &output_mask))
{
/* socket is writeable, or EOF */
occurred_events->events |= WL_SOCKET_WRITEABLE;
}
if (occurred_events->events != 0)
{
occurred_events->fd = cur_event->fd;
occurred_events++;
returned_events++;
}
}
}
return returned_events;
}
#elif defined(WAIT_USE_WIN32) #elif defined(WAIT_USE_WIN32)
/* /*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment