Commit 61c21dda authored by Andres Freund's avatar Andres Freund

Remove select(2) backed latch implementation.

poll(2) is required by Single Unix Spec v2, the usual baseline for
postgres (leaving windows aside).  There's not been any buildfarm
animals without poll(2) for a long while, leaving the select(2)
implementation to be largely untested.

On windows, including mingw, poll() is not available, but we have a
special case implementation for windows anyway.

Author: Andres Freund
Discussion: https://postgr.es/m/20170420003611.7r2sdvehesdyiz2i@alap3.anarazel.de
parent 546c13e1
......@@ -3,27 +3,24 @@
* latch.c
* Routines for inter-process latches
*
* The Unix implementation uses the so-called self-pipe trick to overcome
* the race condition involved with select() and setting a global flag
* in the signal handler. When a latch is set and the current process
* is waiting for it, the signal handler wakes up the select() in
* WaitLatch by writing a byte to a pipe. A signal by itself doesn't
* interrupt select() on all platforms, and even on platforms where it
* does, a signal that arrives just before the select() call does not
* prevent the select() from entering sleep. An incoming byte on a pipe
* however reliably interrupts the sleep, and causes select() to return
* immediately even if the signal arrives before select() begins.
*
* (Actually, we prefer epoll_wait() over poll() over select() where
* available, but the same comments apply.)
* The Unix implementation uses the so-called self-pipe trick to overcome the
* race condition involved with poll() (or epoll_wait() on linux) and setting
* a global flag in the signal handler. When a latch is set and the current
* process is waiting for it, the signal handler wakes up the poll() in
* WaitLatch by writing a byte to a pipe. A signal by itself doesn't interrupt
* poll() on all platforms, and even on platforms where it does, a signal that
* arrives just before the poll() call does not prevent poll() from entering
* sleep. An incoming byte on a pipe however reliably interrupts the sleep,
* and causes poll() to return immediately even if the signal arrives before
* poll() begins.
*
* When SetLatch is called from the same process that owns the latch,
* SetLatch writes the byte directly to the pipe. If it's owned by another
* process, SIGUSR1 is sent and the signal handler in the waiting process
* writes the byte to the pipe on behalf of the signaling process.
*
* The Windows implementation uses Windows events that are inherited by
* all postmaster child processes.
* The Windows implementation uses Windows events that are inherited by all
* postmaster child processes. There's no need for the self-pipe trick there.
*
* Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
......@@ -39,7 +36,6 @@
#include <limits.h>
#include <signal.h>
#include <unistd.h>
#include <sys/time.h>
#ifdef HAVE_SYS_EPOLL_H
#include <sys/epoll.h>
#endif
......@@ -49,9 +45,6 @@
#ifdef HAVE_SYS_POLL_H
#include <sys/poll.h>
#endif
#ifdef HAVE_SYS_SELECT_H
#include <sys/select.h>
#endif
#include "miscadmin.h"
#include "pgstat.h"
......@@ -69,14 +62,12 @@
* define somewhere before this block.
*/
#if defined(WAIT_USE_EPOLL) || defined(WAIT_USE_POLL) || \
defined(WAIT_USE_SELECT) || defined(WAIT_USE_WIN32)
defined(WAIT_USE_WIN32)
/* don't overwrite manual choice */
#elif defined(HAVE_SYS_EPOLL_H)
#define WAIT_USE_EPOLL
#elif defined(HAVE_POLL)
#define WAIT_USE_POLL
#elif HAVE_SYS_SELECT_H
#define WAIT_USE_SELECT
#elif WIN32
#define WAIT_USE_WIN32
#else
......@@ -162,8 +153,8 @@ InitializeLatchSupport(void)
/*
* Set up the self-pipe that allows a signal handler to wake up the
* select() in WaitLatch. Make the write-end non-blocking, so that
* SetLatch won't block if the event has already been set many times
* poll()/epoll_wait() in WaitLatch. Make the write-end non-blocking, so
* that SetLatch won't block if the event has already been set many times
* filling the kernel buffer. Make the read-end non-blocking too, so that
* we can easily clear the pipe by reading until EAGAIN or EWOULDBLOCK.
*/
......@@ -401,8 +392,9 @@ SetLatch(volatile Latch *latch)
/*
* See if anyone's waiting for the latch. It can be the current process if
* we're in a signal handler. We use the self-pipe to wake up the select()
* in that case. If it's another process, send a signal.
* we're in a signal handler. We use the self-pipe to wake up the
* poll()/epoll_wait() in that case. If it's another process, send a
* signal.
*
* Fetch owner_pid only once, in case the latch is concurrently getting
* owned or disowned. XXX: This assumes that pid_t is atomic, which isn't
......@@ -666,8 +658,6 @@ AddWaitEventToSet(WaitEventSet *set, uint32 events, pgsocket fd, Latch *latch,
WaitEventAdjustEpoll(set, event, EPOLL_CTL_ADD);
#elif defined(WAIT_USE_POLL)
WaitEventAdjustPoll(set, event);
#elif defined(WAIT_USE_SELECT)
/* nothing to do */
#elif defined(WAIT_USE_WIN32)
WaitEventAdjustWin32(set, event);
#endif
......@@ -724,8 +714,6 @@ ModifyWaitEvent(WaitEventSet *set, int pos, uint32 events, Latch *latch)
WaitEventAdjustEpoll(set, event, EPOLL_CTL_MOD);
#elif defined(WAIT_USE_POLL)
WaitEventAdjustPoll(set, event);
#elif defined(WAIT_USE_SELECT)
/* nothing to do */
#elif defined(WAIT_USE_WIN32)
WaitEventAdjustWin32(set, event);
#endif
......@@ -1055,9 +1043,11 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
* because we don't expect the pipe to become readable or to have
* any errors either, treat those cases as postmaster death, too.
*
* As explained in the WAIT_USE_SELECT implementation, select(2)
* may spuriously return. Be paranoid about that here too, a
* spurious WL_POSTMASTER_DEATH would be painful.
* Be paranoid about a spurious event signalling the postmaster as
* being dead. There have been reports about that happening with
* older primitives (select(2) to be specific), and a spurious
* WL_POSTMASTER_DEATH event would be painful. Re-checking doesn't
* cost much.
*/
if (!PostmasterIsAlive())
{
......@@ -1171,9 +1161,11 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
* we don't expect the pipe to become readable or to have any
* errors either, treat those cases as postmaster death, too.
*
* As explained in the WAIT_USE_SELECT implementation, select(2)
* may spuriously return. Be paranoid about that here too, a
* spurious WL_POSTMASTER_DEATH would be painful.
* Be paranoid about a spurious event signalling the postmaster as
* being dead. There have been reports about that happening with
* older primitives (select(2) to be specific), and a spurious
* WL_POSTMASTER_DEATH event would be painful. Re-checking doesn't
* cost much.
*/
if (!PostmasterIsAlive())
{
......@@ -1214,163 +1206,6 @@ WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
return returned_events;
}
#elif defined(WAIT_USE_SELECT)
/*
* Wait using select(2).
*
* XXX: On at least older linux kernels select(), in violation of POSIX,
* doesn't reliably return a socket as writable if closed - but we rely on
* that. So far all the known cases of this problem are on platforms that also
* provide a poll() implementation without that bug. If we find one where
* that's not the case, we'll need to add a workaround.
*/
static inline int
WaitEventSetWaitBlock(WaitEventSet *set, int cur_timeout,
WaitEvent *occurred_events, int nevents)
{
int returned_events = 0;
int rc;
WaitEvent *cur_event;
fd_set input_mask;
fd_set output_mask;
int hifd;
struct timeval tv;
struct timeval *tvp = NULL;
FD_ZERO(&input_mask);
FD_ZERO(&output_mask);
/*
* Prepare input/output masks. We do so every loop iteration as there's no
* entirely portable way to copy fd_sets.
*/
for (cur_event = set->events;
cur_event < (set->events + set->nevents);
cur_event++)
{
if (cur_event->events == WL_LATCH_SET)
FD_SET(cur_event->fd, &input_mask);
else if (cur_event->events == WL_POSTMASTER_DEATH)
FD_SET(cur_event->fd, &input_mask);
else
{
Assert(cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE));
if (cur_event->events == WL_SOCKET_READABLE)
FD_SET(cur_event->fd, &input_mask);
else if (cur_event->events == WL_SOCKET_WRITEABLE)
FD_SET(cur_event->fd, &output_mask);
}
if (cur_event->fd > hifd)
hifd = cur_event->fd;
}
/* Sleep */
if (cur_timeout >= 0)
{
tv.tv_sec = cur_timeout / 1000L;
tv.tv_usec = (cur_timeout % 1000L) * 1000L;
tvp = &tv;
}
rc = select(hifd + 1, &input_mask, &output_mask, NULL, tvp);
/* Check return code */
if (rc < 0)
{
/* EINTR is okay, otherwise complain */
if (errno != EINTR)
{
waiting = false;
ereport(ERROR,
(errcode_for_socket_access(),
errmsg("select() failed: %m")));
}
return 0; /* retry */
}
else if (rc == 0)
{
/* timeout exceeded */
return -1;
}
/*
* To associate events with select's masks, we have to check the status of
* the file descriptors associated with an event; by looping through all
* events.
*/
for (cur_event = set->events;
cur_event < (set->events + set->nevents)
&& returned_events < nevents;
cur_event++)
{
occurred_events->pos = cur_event->pos;
occurred_events->user_data = cur_event->user_data;
occurred_events->events = 0;
if (cur_event->events == WL_LATCH_SET &&
FD_ISSET(cur_event->fd, &input_mask))
{
/* There's data in the self-pipe, clear it. */
drainSelfPipe();
if (set->latch->is_set)
{
occurred_events->fd = PGINVALID_SOCKET;
occurred_events->events = WL_LATCH_SET;
occurred_events++;
returned_events++;
}
}
else if (cur_event->events == WL_POSTMASTER_DEATH &&
FD_ISSET(cur_event->fd, &input_mask))
{
/*
* According to the select(2) man page on Linux, select(2) may
* spuriously return and report a file descriptor as readable,
* when it's not; and presumably so can poll(2). It's not clear
* that the relevant cases would ever apply to the postmaster
* pipe, but since the consequences of falsely returning
* WL_POSTMASTER_DEATH could be pretty unpleasant, we take the
* trouble to positively verify EOF with PostmasterIsAlive().
*/
if (!PostmasterIsAlive())
{
occurred_events->fd = PGINVALID_SOCKET;
occurred_events->events = WL_POSTMASTER_DEATH;
occurred_events++;
returned_events++;
}
}
else if (cur_event->events & (WL_SOCKET_READABLE | WL_SOCKET_WRITEABLE))
{
Assert(cur_event->fd != PGINVALID_SOCKET);
if ((cur_event->events & WL_SOCKET_READABLE) &&
FD_ISSET(cur_event->fd, &input_mask))
{
/* data available in socket, or EOF */
occurred_events->events |= WL_SOCKET_READABLE;
}
if ((cur_event->events & WL_SOCKET_WRITEABLE) &&
FD_ISSET(cur_event->fd, &output_mask))
{
/* socket is writeable, or EOF */
occurred_events->events |= WL_SOCKET_WRITEABLE;
}
if (occurred_events->events != 0)
{
occurred_events->fd = cur_event->fd;
occurred_events++;
returned_events++;
}
}
}
return returned_events;
}
#elif defined(WAIT_USE_WIN32)
/*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment