Commit 49340037 authored by Tom Lane's avatar Tom Lane

Reduce idle power consumption of stats collector process.

Latch-ify the stats collector, so that it does not need an arbitrary wakeup
cycle to check for postmaster death.  The incremental savings in idle power
is pretty marginal, since we only had it waking every two seconds; but I
believe that this patch may also improve the collector's performance under
load, by reducing the number of kernel calls made per message when messages
are arriving constantly (we now avoid a select/poll call except when we
need to sleep).  The change also reduces the time needed for a normal
database shutdown on platforms where signals don't interrupt select().
parent 5461564a
...@@ -28,12 +28,6 @@ ...@@ -28,12 +28,6 @@
#include <arpa/inet.h> #include <arpa/inet.h>
#include <signal.h> #include <signal.h>
#include <time.h> #include <time.h>
#ifdef HAVE_POLL_H
#include <poll.h>
#endif
#ifdef HAVE_SYS_POLL_H
#include <sys/poll.h>
#endif
#include "pgstat.h" #include "pgstat.h"
...@@ -55,6 +49,7 @@ ...@@ -55,6 +49,7 @@
#include "storage/backendid.h" #include "storage/backendid.h"
#include "storage/fd.h" #include "storage/fd.h"
#include "storage/ipc.h" #include "storage/ipc.h"
#include "storage/latch.h"
#include "storage/pg_shmem.h" #include "storage/pg_shmem.h"
#include "storage/pmsignal.h" #include "storage/pmsignal.h"
#include "storage/procsignal.h" #include "storage/procsignal.h"
...@@ -94,9 +89,6 @@ ...@@ -94,9 +89,6 @@
* failed statistics collector; in * failed statistics collector; in
* seconds. */ * seconds. */
#define PGSTAT_SELECT_TIMEOUT 2 /* How often to check for postmaster
* death; in seconds. */
#define PGSTAT_POLL_LOOP_COUNT (PGSTAT_MAX_WAIT_TIME / PGSTAT_RETRY_DELAY) #define PGSTAT_POLL_LOOP_COUNT (PGSTAT_MAX_WAIT_TIME / PGSTAT_RETRY_DELAY)
#define PGSTAT_INQ_LOOP_COUNT (PGSTAT_INQ_INTERVAL / PGSTAT_RETRY_DELAY) #define PGSTAT_INQ_LOOP_COUNT (PGSTAT_INQ_INTERVAL / PGSTAT_RETRY_DELAY)
...@@ -139,6 +131,8 @@ PgStat_MsgBgWriter BgWriterStats; ...@@ -139,6 +131,8 @@ PgStat_MsgBgWriter BgWriterStats;
*/ */
NON_EXEC_STATIC pgsocket pgStatSock = PGINVALID_SOCKET; NON_EXEC_STATIC pgsocket pgStatSock = PGINVALID_SOCKET;
static Latch pgStatLatch;
static struct sockaddr_storage pgStatAddr; static struct sockaddr_storage pgStatAddr;
static time_t last_pgstat_start_time; static time_t last_pgstat_start_time;
...@@ -3009,15 +3003,7 @@ PgstatCollectorMain(int argc, char *argv[]) ...@@ -3009,15 +3003,7 @@ PgstatCollectorMain(int argc, char *argv[])
{ {
int len; int len;
PgStat_Msg msg; PgStat_Msg msg;
int wr;
#ifndef WIN32
#ifdef HAVE_POLL
struct pollfd input_fd;
#else
struct timeval sel_timeout;
fd_set rfds;
#endif
#endif
IsUnderPostmaster = true; /* we are a postmaster subprocess now */ IsUnderPostmaster = true; /* we are a postmaster subprocess now */
...@@ -3036,9 +3022,13 @@ PgstatCollectorMain(int argc, char *argv[]) ...@@ -3036,9 +3022,13 @@ PgstatCollectorMain(int argc, char *argv[])
elog(FATAL, "setsid() failed: %m"); elog(FATAL, "setsid() failed: %m");
#endif #endif
/* Initialize private latch for use by signal handlers */
InitLatch(&pgStatLatch);
/* /*
* Ignore all signals usually bound to some action in the postmaster, * Ignore all signals usually bound to some action in the postmaster,
* except SIGQUIT. * except SIGHUP and SIGQUIT. Note we don't need a SIGUSR1 handler to
* support latch operations, because pgStatLatch is local not shared.
*/ */
pqsignal(SIGHUP, pgstat_sighup_handler); pqsignal(SIGHUP, pgstat_sighup_handler);
pqsignal(SIGINT, SIG_IGN); pqsignal(SIGINT, SIG_IGN);
...@@ -3073,26 +3063,24 @@ PgstatCollectorMain(int argc, char *argv[]) ...@@ -3073,26 +3063,24 @@ PgstatCollectorMain(int argc, char *argv[])
pgStatRunningInCollector = true; pgStatRunningInCollector = true;
pgStatDBHash = pgstat_read_statsfile(InvalidOid, true); pgStatDBHash = pgstat_read_statsfile(InvalidOid, true);
/*
* Setup the descriptor set for select(2). Since only one bit in the set
* ever changes, we need not repeat FD_ZERO each time.
*/
#if !defined(HAVE_POLL) && !defined(WIN32)
FD_ZERO(&rfds);
#endif
/* /*
* Loop to process messages until we get SIGQUIT or detect ungraceful * Loop to process messages until we get SIGQUIT or detect ungraceful
* death of our parent postmaster. * death of our parent postmaster.
* *
* For performance reasons, we don't want to do a PostmasterIsAlive() test * For performance reasons, we don't want to do ResetLatch/WaitLatch after
* after every message; instead, do it only when select()/poll() is * every message; instead, do that only after a recv() fails to obtain a
* interrupted by timeout. In essence, we'll stay alive as long as * message. (This effectively means that if backends are sending us stuff
* backends keep sending us stuff often, even if the postmaster is gone. * like mad, we won't notice postmaster death until things slack off a
* bit; which seems fine.) To do that, we have an inner loop that
* iterates as long as recv() succeeds. We do recognize got_SIGHUP inside
* the inner loop, which means that such interrupts will get serviced but
* the latch won't get cleared until next time there is a break in the
* action.
*/ */
for (;;) for (;;)
{ {
int got_data; /* Clear any already-pending wakeups */
ResetLatch(&pgStatLatch);
/* /*
* Quit if we get SIGQUIT from the postmaster. * Quit if we get SIGQUIT from the postmaster.
...@@ -3100,13 +3088,19 @@ PgstatCollectorMain(int argc, char *argv[]) ...@@ -3100,13 +3088,19 @@ PgstatCollectorMain(int argc, char *argv[])
if (need_exit) if (need_exit)
break; break;
/*
* Inner loop iterates as long as we keep getting messages, or until
* need_exit becomes set.
*/
while (!need_exit)
{
/* /*
* Reload configuration if we got SIGHUP from the postmaster. * Reload configuration if we got SIGHUP from the postmaster.
*/ */
if (got_SIGHUP) if (got_SIGHUP)
{ {
ProcessConfigFile(PGC_SIGHUP);
got_SIGHUP = false; got_SIGHUP = false;
ProcessConfigFile(PGC_SIGHUP);
} }
/* /*
...@@ -3117,71 +3111,15 @@ PgstatCollectorMain(int argc, char *argv[]) ...@@ -3117,71 +3111,15 @@ PgstatCollectorMain(int argc, char *argv[])
pgstat_write_statsfile(false); pgstat_write_statsfile(false);
/* /*
* Wait for a message to arrive; but not for more than * Try to receive and process a message. This will not block,
* PGSTAT_SELECT_TIMEOUT seconds. (This determines how quickly we will * since the socket is set to non-blocking mode.
* shut down after an ungraceful postmaster termination; so it needn't
* be very fast. However, on some systems SIGQUIT won't interrupt the
* poll/select call, so this also limits speed of response to SIGQUIT,
* which is more important.)
*
* We use poll(2) if available, otherwise select(2). Win32 has its own
* implementation.
*/ */
#ifndef WIN32
#ifdef HAVE_POLL
input_fd.fd = pgStatSock;
input_fd.events = POLLIN | POLLERR;
input_fd.revents = 0;
if (poll(&input_fd, 1, PGSTAT_SELECT_TIMEOUT * 1000) < 0)
{
if (errno == EINTR)
continue;
ereport(ERROR,
(errcode_for_socket_access(),
errmsg("poll() failed in statistics collector: %m")));
}
got_data = (input_fd.revents != 0);
#else /* !HAVE_POLL */
FD_SET(pgStatSock, &rfds);
/*
* timeout struct is modified by select() on some operating systems,
* so re-fill it each time.
*/
sel_timeout.tv_sec = PGSTAT_SELECT_TIMEOUT;
sel_timeout.tv_usec = 0;
if (select(pgStatSock + 1, &rfds, NULL, NULL, &sel_timeout) < 0)
{
if (errno == EINTR)
continue;
ereport(ERROR,
(errcode_for_socket_access(),
errmsg("select() failed in statistics collector: %m")));
}
got_data = FD_ISSET(pgStatSock, &rfds);
#endif /* HAVE_POLL */
#else /* WIN32 */
got_data = pgwin32_waitforsinglesocket(pgStatSock, FD_READ,
PGSTAT_SELECT_TIMEOUT * 1000);
#endif
/*
* If there is a message on the socket, read it and check for
* validity.
*/
if (got_data)
{
len = recv(pgStatSock, (char *) &msg, len = recv(pgStatSock, (char *) &msg,
sizeof(PgStat_Msg), 0); sizeof(PgStat_Msg), 0);
if (len < 0) if (len < 0)
{ {
if (errno == EINTR) if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)
continue; break; /* out of inner loop */
ereport(ERROR, ereport(ERROR,
(errcode_for_socket_access(), (errcode_for_socket_access(),
errmsg("could not read statistics message: %m"))); errmsg("could not read statistics message: %m")));
...@@ -3279,17 +3217,18 @@ PgstatCollectorMain(int argc, char *argv[]) ...@@ -3279,17 +3217,18 @@ PgstatCollectorMain(int argc, char *argv[])
default: default:
break; break;
} }
} } /* end of inner message-processing loop */
else
{ /* Sleep until there's something to do */
/* wr = WaitLatchOrSocket(&pgStatLatch,
* We can only get here if the select/poll timeout elapsed. Check WL_LATCH_SET | WL_POSTMASTER_DEATH | WL_SOCKET_READABLE,
* for postmaster death. pgStatSock,
*/ -1L);
if (!PostmasterIsAlive())
/* Check for postmaster death */
if (wr & WL_POSTMASTER_DEATH)
break; break;
} } /* end of outer loop */
} /* end of message-processing loop */
/* /*
* Save the final stats to reuse at next startup. * Save the final stats to reuse at next startup.
...@@ -3304,14 +3243,24 @@ PgstatCollectorMain(int argc, char *argv[]) ...@@ -3304,14 +3243,24 @@ PgstatCollectorMain(int argc, char *argv[])
static void static void
pgstat_exit(SIGNAL_ARGS) pgstat_exit(SIGNAL_ARGS)
{ {
int save_errno = errno;
need_exit = true; need_exit = true;
SetLatch(&pgStatLatch);
errno = save_errno;
} }
/* SIGHUP handler for collector process */ /* SIGHUP handler for collector process */
static void static void
pgstat_sighup_handler(SIGNAL_ARGS) pgstat_sighup_handler(SIGNAL_ARGS)
{ {
int save_errno = errno;
got_SIGHUP = true; got_SIGHUP = true;
SetLatch(&pgStatLatch);
errno = save_errno;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment