Commit d8735b8b authored by Fujii Masao's avatar Fujii Masao

Fix issues in pg_stat_wal.

1) Previously there were both pgstat_send_wal() and pgstat_report_wal()
   in order to send WAL activity to the stats collector. With the former being
   used by wal writer, the latter by most other processes. They were a bit
   redundant and so this commit merges them into pgstat_send_wal() to
   simplify the code.

2) Previously WAL global statistics counters were calculated and then
   compared with zero-filled buffer in order to determine whether any WAL
   activity has happened since the last submission. These calculation and
   comparison were not cheap. This was regularly exercised even in read-only
   workloads. This commit fixes the issue by making some WAL activity
   counters directly be checked to determine if there's WAL activity stats
   to send.

3) Previously pgstat_report_stat() did not check if there's WAL activity
   stats to send as part of the "Don't expend a clock check if nothing to do"
   check at the top. It's probably rare to have pending WAL stats without
   also passing one of the other conditions, but for safely this commit
   changes pgstat_report_stats() so that it checks also some WAL activity
   counters at the top.

This commit also adds the comments about the design of WAL stats.

Reported-by: Andres Freund
Author: Masahiro Ikeda
Reviewed-by: Kyotaro Horiguchi, Atsushi Torikoshi, Andres Freund, Fujii Masao
Discussion: https://postgr.es/m/20210324232224.vrfiij2rxxwqqjjb@alap3.anarazel.de
parent 694da198
......@@ -505,7 +505,7 @@ CheckpointerMain(void)
pgstat_send_bgwriter();
/* Send WAL statistics to the stats collector. */
pgstat_report_wal();
pgstat_send_wal(true);
/*
* If any checkpoint flags have been set, redo the loop to handle the
......@@ -583,7 +583,7 @@ HandleCheckpointerInterrupts(void)
BgWriterStats.m_requested_checkpoints++;
ShutdownXLOG(0, 0);
pgstat_send_bgwriter();
pgstat_report_wal();
pgstat_send_wal(true);
/* Normal exit from the checkpointer is here */
proc_exit(0); /* done */
......
......@@ -133,8 +133,8 @@ PgStat_MsgWal WalStats;
/*
* WAL usage counters saved from pgWALUsage at the previous call to
* pgstat_report_wal(). This is used to calculate how much WAL usage
* happens between pgstat_report_wal() calls, by substracting
* pgstat_send_wal(). This is used to calculate how much WAL usage
* happens between pgstat_send_wal() calls, by substracting
* the previous counters from the current ones.
*/
static WalUsage prevWalUsage;
......@@ -852,9 +852,19 @@ pgstat_report_stat(bool disconnect)
TabStatusArray *tsa;
int i;
/* Don't expend a clock check if nothing to do */
/*
* Don't expend a clock check if nothing to do.
*
* To determine whether any WAL activity has occurred since last time, not
* only the number of generated WAL records but also the numbers of WAL
* writes and syncs need to be checked. Because even transaction that
* generates no WAL records can write or sync WAL data when flushing the
* data pages.
*/
if ((pgStatTabList == NULL || pgStatTabList->tsa_used == 0) &&
pgStatXactCommit == 0 && pgStatXactRollback == 0 &&
pgWalUsage.wal_records == prevWalUsage.wal_records &&
WalStats.m_wal_write == 0 && WalStats.m_wal_sync == 0 &&
!have_function_stats && !disconnect)
return;
......@@ -948,7 +958,7 @@ pgstat_report_stat(bool disconnect)
pgstat_send_funcstats();
/* Send WAL statistics */
pgstat_report_wal();
pgstat_send_wal(true);
/* Finally send SLRU statistics */
pgstat_send_slru();
......@@ -2918,7 +2928,7 @@ void
pgstat_initialize(void)
{
/*
* Initialize prevWalUsage with pgWalUsage so that pgstat_report_wal() can
* Initialize prevWalUsage with pgWalUsage so that pgstat_send_wal() can
* calculate how much pgWalUsage counters are increased by substracting
* prevWalUsage from pgWalUsage.
*/
......@@ -3030,44 +3040,6 @@ pgstat_send_bgwriter(void)
MemSet(&BgWriterStats, 0, sizeof(BgWriterStats));
}
/* ----------
* pgstat_report_wal() -
*
* Calculate how much WAL usage counters are increased and send
* WAL statistics to the collector.
*
* Must be called by processes that generate WAL.
* ----------
*/
void
pgstat_report_wal(void)
{
WalUsage walusage;
/*
* Calculate how much WAL usage counters are increased by substracting the
* previous counters from the current ones. Fill the results in WAL stats
* message.
*/
MemSet(&walusage, 0, sizeof(WalUsage));
WalUsageAccumDiff(&walusage, &pgWalUsage, &prevWalUsage);
WalStats.m_wal_records = walusage.wal_records;
WalStats.m_wal_fpi = walusage.wal_fpi;
WalStats.m_wal_bytes = walusage.wal_bytes;
/*
* Send WAL stats message to the collector.
*/
if (!pgstat_send_wal(true))
return;
/*
* Save the current counters for the subsequent calculation of WAL usage.
*/
prevWalUsage = pgWalUsage;
}
/* ----------
* pgstat_send_wal() -
*
......@@ -3075,24 +3047,39 @@ pgstat_report_wal(void)
*
* If 'force' is not set, WAL stats message is only sent if enough time has
* passed since last one was sent to reach PGSTAT_STAT_INTERVAL.
*
* Return true if the message is sent, and false otherwise.
* ----------
*/
bool
void
pgstat_send_wal(bool force)
{
/* We assume this initializes to zeroes */
static const PgStat_MsgWal all_zeroes;
static TimestampTz sendTime = 0;
/*
* This function can be called even if nothing at all has happened. In
* this case, avoid sending a completely empty message to the stats
* collector.
*
* Check wal_records counter to determine whether any WAL activity has
* happened since last time. Note that other WalUsage counters don't need
* to be checked because they are incremented always together with
* wal_records counter.
*
* m_wal_buffers_full also doesn't need to be checked because it's
* incremented only when at least one WAL record is generated (i.e.,
* wal_records counter is incremented). But for safely, we assert that
* m_wal_buffers_full is always zero when no WAL record is generated
*
* This function can be called by a process like walwriter that normally
* generates no WAL records. To determine whether any WAL activity has
* happened at that process since the last time, the numbers of WAL writes
* and syncs are also checked.
*/
if (memcmp(&WalStats, &all_zeroes, sizeof(PgStat_MsgWal)) == 0)
return false;
if (pgWalUsage.wal_records == prevWalUsage.wal_records &&
WalStats.m_wal_write == 0 && WalStats.m_wal_sync == 0)
{
Assert(WalStats.m_wal_buffers_full == 0);
return;
}
if (!force)
{
......@@ -3100,13 +3087,41 @@ pgstat_send_wal(bool force)
/*
* Don't send a message unless it's been at least PGSTAT_STAT_INTERVAL
* msec since we last sent one.
* msec since we last sent one to avoid overloading the stats
* collector.
*/
if (!TimestampDifferenceExceeds(sendTime, now, PGSTAT_STAT_INTERVAL))
return false;
return;
sendTime = now;
}
/*
* Set the counters related to generated WAL data if the counters were
* updated.
*/
if (pgWalUsage.wal_records != prevWalUsage.wal_records)
{
WalUsage walusage;
/*
* Calculate how much WAL usage counters were increased by
* substracting the previous counters from the current ones. Fill the
* results in WAL stats message.
*/
MemSet(&walusage, 0, sizeof(WalUsage));
WalUsageAccumDiff(&walusage, &pgWalUsage, &prevWalUsage);
WalStats.m_wal_records = walusage.wal_records;
WalStats.m_wal_fpi = walusage.wal_fpi;
WalStats.m_wal_bytes = walusage.wal_bytes;
/*
* Save the current counters for the subsequent calculation of WAL
* usage.
*/
prevWalUsage = pgWalUsage;
}
/*
* Prepare and send the message
*/
......@@ -3117,8 +3132,6 @@ pgstat_send_wal(bool force)
* Clear out the statistics buffer, so it can be re-used.
*/
MemSet(&WalStats, 0, sizeof(WalStats));
return true;
}
/* ----------
......
......@@ -16,6 +16,11 @@
#include "portability/instr_time.h"
/*
* BufferUsage and WalUsage counters keep being incremented infinitely,
* i.e., must never be reset to zero, so that we can calculate how much
* the counters are incremented in an arbitrary period.
*/
typedef struct BufferUsage
{
int64 shared_blks_hit; /* # of shared buffer hits */
......@@ -32,6 +37,13 @@ typedef struct BufferUsage
instr_time blk_write_time; /* time spent writing */
} BufferUsage;
/*
* WalUsage tracks only WAL activity like WAL records generation that
* can be measured per query and is displayed by EXPLAIN command,
* pg_stat_statements extension, etc. It does not track other WAL activity
* like WAL writes that it's not worth measuring per query. That's tracked
* by WAL global statistics counters in WalStats, instead.
*/
typedef struct WalUsage
{
int64 wal_records; /* # of WAL records produced */
......
......@@ -1091,8 +1091,7 @@ extern void pgstat_twophase_postabort(TransactionId xid, uint16 info,
extern void pgstat_send_archiver(const char *xlog, bool failed);
extern void pgstat_send_bgwriter(void);
extern void pgstat_report_wal(void);
extern bool pgstat_send_wal(bool force);
extern void pgstat_send_wal(bool force);
/* ----------
* Support functions for the SQL-callable functions to
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment