Commit 966970ed authored by Tom Lane's avatar Tom Lane

Re-revert stats collector latch changes.

This reverts commit cb2f2873, restoring
the latch-ified stats collector logic.  We'll soon see if this works any
better on the Windows buildfarm machines.
parent b85427f2
...@@ -28,12 +28,6 @@ ...@@ -28,12 +28,6 @@
#include <arpa/inet.h> #include <arpa/inet.h>
#include <signal.h> #include <signal.h>
#include <time.h> #include <time.h>
#ifdef HAVE_POLL_H
#include <poll.h>
#endif
#ifdef HAVE_SYS_POLL_H
#include <sys/poll.h>
#endif
#include "pgstat.h" #include "pgstat.h"
...@@ -55,8 +49,8 @@ ...@@ -55,8 +49,8 @@
#include "storage/backendid.h" #include "storage/backendid.h"
#include "storage/fd.h" #include "storage/fd.h"
#include "storage/ipc.h" #include "storage/ipc.h"
#include "storage/latch.h"
#include "storage/pg_shmem.h" #include "storage/pg_shmem.h"
#include "storage/pmsignal.h"
#include "storage/procsignal.h" #include "storage/procsignal.h"
#include "utils/ascii.h" #include "utils/ascii.h"
#include "utils/guc.h" #include "utils/guc.h"
...@@ -94,9 +88,6 @@ ...@@ -94,9 +88,6 @@
* failed statistics collector; in * failed statistics collector; in
* seconds. */ * seconds. */
#define PGSTAT_SELECT_TIMEOUT 2 /* How often to check for postmaster
* death; in seconds. */
#define PGSTAT_POLL_LOOP_COUNT (PGSTAT_MAX_WAIT_TIME / PGSTAT_RETRY_DELAY) #define PGSTAT_POLL_LOOP_COUNT (PGSTAT_MAX_WAIT_TIME / PGSTAT_RETRY_DELAY)
#define PGSTAT_INQ_LOOP_COUNT (PGSTAT_INQ_INTERVAL / PGSTAT_RETRY_DELAY) #define PGSTAT_INQ_LOOP_COUNT (PGSTAT_INQ_INTERVAL / PGSTAT_RETRY_DELAY)
...@@ -139,6 +130,8 @@ PgStat_MsgBgWriter BgWriterStats; ...@@ -139,6 +130,8 @@ PgStat_MsgBgWriter BgWriterStats;
*/ */
NON_EXEC_STATIC pgsocket pgStatSock = PGINVALID_SOCKET; NON_EXEC_STATIC pgsocket pgStatSock = PGINVALID_SOCKET;
static Latch pgStatLatch;
static struct sockaddr_storage pgStatAddr; static struct sockaddr_storage pgStatAddr;
static time_t last_pgstat_start_time; static time_t last_pgstat_start_time;
...@@ -3009,15 +3002,7 @@ PgstatCollectorMain(int argc, char *argv[]) ...@@ -3009,15 +3002,7 @@ PgstatCollectorMain(int argc, char *argv[])
{ {
int len; int len;
PgStat_Msg msg; PgStat_Msg msg;
int wr;
#ifndef WIN32
#ifdef HAVE_POLL
struct pollfd input_fd;
#else
struct timeval sel_timeout;
fd_set rfds;
#endif
#endif
IsUnderPostmaster = true; /* we are a postmaster subprocess now */ IsUnderPostmaster = true; /* we are a postmaster subprocess now */
...@@ -3036,9 +3021,13 @@ PgstatCollectorMain(int argc, char *argv[]) ...@@ -3036,9 +3021,13 @@ PgstatCollectorMain(int argc, char *argv[])
elog(FATAL, "setsid() failed: %m"); elog(FATAL, "setsid() failed: %m");
#endif #endif
/* Initialize private latch for use by signal handlers */
InitLatch(&pgStatLatch);
/* /*
* Ignore all signals usually bound to some action in the postmaster, * Ignore all signals usually bound to some action in the postmaster,
* except SIGQUIT. * except SIGHUP and SIGQUIT. Note we don't need a SIGUSR1 handler to
* support latch operations, because pgStatLatch is local not shared.
*/ */
pqsignal(SIGHUP, pgstat_sighup_handler); pqsignal(SIGHUP, pgstat_sighup_handler);
pqsignal(SIGINT, SIG_IGN); pqsignal(SIGINT, SIG_IGN);
...@@ -3073,26 +3062,24 @@ PgstatCollectorMain(int argc, char *argv[]) ...@@ -3073,26 +3062,24 @@ PgstatCollectorMain(int argc, char *argv[])
pgStatRunningInCollector = true; pgStatRunningInCollector = true;
pgStatDBHash = pgstat_read_statsfile(InvalidOid, true); pgStatDBHash = pgstat_read_statsfile(InvalidOid, true);
/*
* Setup the descriptor set for select(2). Since only one bit in the set
* ever changes, we need not repeat FD_ZERO each time.
*/
#if !defined(HAVE_POLL) && !defined(WIN32)
FD_ZERO(&rfds);
#endif
/* /*
* Loop to process messages until we get SIGQUIT or detect ungraceful * Loop to process messages until we get SIGQUIT or detect ungraceful
* death of our parent postmaster. * death of our parent postmaster.
* *
* For performance reasons, we don't want to do a PostmasterIsAlive() test * For performance reasons, we don't want to do ResetLatch/WaitLatch after
* after every message; instead, do it only when select()/poll() is * every message; instead, do that only after a recv() fails to obtain a
* interrupted by timeout. In essence, we'll stay alive as long as * message. (This effectively means that if backends are sending us stuff
* backends keep sending us stuff often, even if the postmaster is gone. * like mad, we won't notice postmaster death until things slack off a
* bit; which seems fine.) To do that, we have an inner loop that
* iterates as long as recv() succeeds. We do recognize got_SIGHUP inside
* the inner loop, which means that such interrupts will get serviced but
* the latch won't get cleared until next time there is a break in the
* action.
*/ */
for (;;) for (;;)
{ {
int got_data; /* Clear any already-pending wakeups */
ResetLatch(&pgStatLatch);
/* /*
* Quit if we get SIGQUIT from the postmaster. * Quit if we get SIGQUIT from the postmaster.
...@@ -3100,13 +3087,19 @@ PgstatCollectorMain(int argc, char *argv[]) ...@@ -3100,13 +3087,19 @@ PgstatCollectorMain(int argc, char *argv[])
if (need_exit) if (need_exit)
break; break;
/*
* Inner loop iterates as long as we keep getting messages, or until
* need_exit becomes set.
*/
while (!need_exit)
{
/* /*
* Reload configuration if we got SIGHUP from the postmaster. * Reload configuration if we got SIGHUP from the postmaster.
*/ */
if (got_SIGHUP) if (got_SIGHUP)
{ {
ProcessConfigFile(PGC_SIGHUP);
got_SIGHUP = false; got_SIGHUP = false;
ProcessConfigFile(PGC_SIGHUP);
} }
/* /*
...@@ -3117,71 +3110,15 @@ PgstatCollectorMain(int argc, char *argv[]) ...@@ -3117,71 +3110,15 @@ PgstatCollectorMain(int argc, char *argv[])
pgstat_write_statsfile(false); pgstat_write_statsfile(false);
/* /*
* Wait for a message to arrive; but not for more than * Try to receive and process a message. This will not block,
* PGSTAT_SELECT_TIMEOUT seconds. (This determines how quickly we will * since the socket is set to non-blocking mode.
* shut down after an ungraceful postmaster termination; so it needn't
* be very fast. However, on some systems SIGQUIT won't interrupt the
* poll/select call, so this also limits speed of response to SIGQUIT,
* which is more important.)
*
* We use poll(2) if available, otherwise select(2). Win32 has its own
* implementation.
*/ */
#ifndef WIN32
#ifdef HAVE_POLL
input_fd.fd = pgStatSock;
input_fd.events = POLLIN | POLLERR;
input_fd.revents = 0;
if (poll(&input_fd, 1, PGSTAT_SELECT_TIMEOUT * 1000) < 0)
{
if (errno == EINTR)
continue;
ereport(ERROR,
(errcode_for_socket_access(),
errmsg("poll() failed in statistics collector: %m")));
}
got_data = (input_fd.revents != 0);
#else /* !HAVE_POLL */
FD_SET(pgStatSock, &rfds);
/*
* timeout struct is modified by select() on some operating systems,
* so re-fill it each time.
*/
sel_timeout.tv_sec = PGSTAT_SELECT_TIMEOUT;
sel_timeout.tv_usec = 0;
if (select(pgStatSock + 1, &rfds, NULL, NULL, &sel_timeout) < 0)
{
if (errno == EINTR)
continue;
ereport(ERROR,
(errcode_for_socket_access(),
errmsg("select() failed in statistics collector: %m")));
}
got_data = FD_ISSET(pgStatSock, &rfds);
#endif /* HAVE_POLL */
#else /* WIN32 */
got_data = pgwin32_waitforsinglesocket(pgStatSock, FD_READ,
PGSTAT_SELECT_TIMEOUT * 1000);
#endif
/*
* If there is a message on the socket, read it and check for
* validity.
*/
if (got_data)
{
len = recv(pgStatSock, (char *) &msg, len = recv(pgStatSock, (char *) &msg,
sizeof(PgStat_Msg), 0); sizeof(PgStat_Msg), 0);
if (len < 0) if (len < 0)
{ {
if (errno == EINTR) if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)
continue; break; /* out of inner loop */
ereport(ERROR, ereport(ERROR,
(errcode_for_socket_access(), (errcode_for_socket_access(),
errmsg("could not read statistics message: %m"))); errmsg("could not read statistics message: %m")));
...@@ -3279,17 +3216,21 @@ PgstatCollectorMain(int argc, char *argv[]) ...@@ -3279,17 +3216,21 @@ PgstatCollectorMain(int argc, char *argv[])
default: default:
break; break;
} }
} } /* end of inner message-processing loop */
else
{ /* Sleep until there's something to do */
wr = WaitLatchOrSocket(&pgStatLatch,
WL_LATCH_SET | WL_POSTMASTER_DEATH | WL_SOCKET_READABLE,
pgStatSock,
-1L);
/* /*
* We can only get here if the select/poll timeout elapsed. Check * Emergency bailout if postmaster has died. This is to avoid the
* for postmaster death. * necessity for manual cleanup of all postmaster children.
*/ */
if (!PostmasterIsAlive()) if (wr & WL_POSTMASTER_DEATH)
break; break;
} } /* end of outer loop */
} /* end of message-processing loop */
/* /*
* Save the final stats to reuse at next startup. * Save the final stats to reuse at next startup.
...@@ -3304,14 +3245,24 @@ PgstatCollectorMain(int argc, char *argv[]) ...@@ -3304,14 +3245,24 @@ PgstatCollectorMain(int argc, char *argv[])
static void static void
pgstat_exit(SIGNAL_ARGS) pgstat_exit(SIGNAL_ARGS)
{ {
int save_errno = errno;
need_exit = true; need_exit = true;
SetLatch(&pgStatLatch);
errno = save_errno;
} }
/* SIGHUP handler for collector process */ /* SIGHUP handler for collector process */
static void static void
pgstat_sighup_handler(SIGNAL_ARGS) pgstat_sighup_handler(SIGNAL_ARGS)
{ {
int save_errno = errno;
got_SIGHUP = true; got_SIGHUP = true;
SetLatch(&pgStatLatch);
errno = save_errno;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment