Commit 28cac71b authored by Tomas Vondra's avatar Tomas Vondra

Collect statistics about SLRU caches

There's a number of SLRU caches used to access important data like clog,
commit timestamps, multixact, asynchronous notifications, etc. Until now
we had no easy way to monitor these shared caches, compute hit ratios,
number of reads/writes etc.

This commit extends the statistics collector to track this information
for a predefined list of SLRUs, and also introduces a new system view
pg_stat_slru displaying the data.

The list of built-in SLRUs is fixed, but additional SLRUs may be defined
in extensions. Unfortunately, there's no suitable registry of SLRUs, so
this patch simply defines a fixed list of SLRUs with entries for the
built-in ones and one entry for all additional SLRUs. Extensions adding
their own SLRU are fairly rare, so this seems acceptable.

This patch only allows monitoring of SLRUs, not tuning. The SLRU sizes
are still fixed (hard-coded in the code) and it's not entirely clear
which of the SLRUs might need a GUC to tune size. In a way, allowing us
to determine that is one of the goals of this patch.

Bump catversion as the patch introduces new functions and system view.

Author: Tomas Vondra
Reviewed-by: Alvaro Herrera
Discussion: https://www.postgresql.org/message-id/flat/20200119143707.gyinppnigokesjok@development
parent 17ca0679
......@@ -575,6 +575,13 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
yet included in <structname>pg_stat_user_functions</structname>).</entry>
</row>
<row>
<entry><structname>pg_stat_slru</structname><indexterm><primary>pg_stat_slru</primary></indexterm></entry>
<entry>One row per SLRU, showing statistics of operations. See
<xref linkend="pg-stat-slru-view"/> for details.
</entry>
</row>
</tbody>
</tgroup>
</table>
......@@ -3259,6 +3266,76 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
</tgroup>
</table>
<para>
The <structname>pg_stat_slru</structname> view will contain
one row for each tracked SLRU cache, showing statistics about access
to cached pages.
</para>
<table id="pg-stat-slru-view" xreflabel="pg_stat_slru">
<title><structname>pg_stat_slru</structname> View</title>
<tgroup cols="3">
<thead>
<row>
<entry>Column</entry>
<entry>Type</entry>
<entry>Description</entry>
</row>
</thead>
<tbody>
<row>
<entry><structfield>name</structfield></entry>
<entry><type>name</type></entry>
<entry>name of the SLRU</entry>
</row>
<row>
<entry><structfield>blks_zeroed</structfield></entry>
<entry><type>bigint</type></entry>
<entry>Number of blocks zeroed during initializations</entry>
</row>
<row>
<entry><structfield>blks_hit</structfield></entry>
<entry><type>biging</type></entry>
<entry>Number of times disk blocks were found already in the SLRU,
so that a read was not necessary (this only includes hits in the
SLRU, not the operating system's file system cache)
</entry>
</row>
<row>
<entry><structfield>blks_read</structfield></entry>
<entry><type>bigint</type></entry>
<entry>Number of disk blocks read for this SLRU</entry>
</row>
<row>
<entry><structfield>blks_written</structfield></entry>
<entry><type>bigint</type></entry>
<entry>Number of disk blocks written for this SLRU</entry>
</row>
<row>
<entry><structfield>blks_exists</structfield></entry>
<entry><type>bigint</type></entry>
<entry>Number of blocks checked for existence for this SLRU</entry>
</row>
<row>
<entry><structfield>flushes</structfield></entry>
<entry><type>bigint</type></entry>
<entry>Number of flushes of dirty data for this SLRU</entry>
</row>
<row>
<entry><structfield>truncates</structfield></entry>
<entry><type>bigint</type></entry>
<entry>Number of truncates for this SLRU</entry>
</row>
<row>
<entry><structfield>stats_reset</structfield></entry>
<entry><type>timestamp with time zone</type></entry>
<entry>Time at which these statistics were last reset</entry>
</row>
</tbody>
</tgroup>
</table>
<para>
The <structname>pg_stat_user_functions</structname> view will contain
one row for each tracked function, showing statistics about executions of
......@@ -3383,6 +3460,26 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
function can be granted to others)
</entry>
</row>
<row>
<entry><literal><function>pg_stat_reset_slru</function>(text)</literal><indexterm><primary>pg_stat_reset_slru</primary></indexterm></entry>
<entry><type>void</type></entry>
<entry>
Reset statistics either for a single SLRU or all SLRUs in the cluster
to zero (requires superuser privileges by default, but EXECUTE for this
function can be granted to others).
Calling <literal>pg_stat_reset_slru(NULL)</literal> will zero all the
counters shown in the <structname>pg_stat_slru</structname> view for
all SLRU caches.
Calling <literal>pg_stat_reset_slru(name)</literal> with names from a
predefined list (<literal>async</literal>, <literal>clog</literal>,
<literal>commit_timestamp</literal>, <literal>multixact_offset</literal>,
<literal>multixact_member</literal>, <literal>oldserxid</literal>,
<literal>pg_xact</literal>, <literal>subtrans</literal> and
<literal>other</literal>) resets counters for only that entry.
Names not included in this list are treated as <literal>other</literal>.
</entry>
</row>
</tbody>
</tgroup>
</table>
......
......@@ -286,6 +286,9 @@ SimpleLruZeroPage(SlruCtl ctl, int pageno)
/* Assume this page is now the latest active page */
shared->latest_page_number = pageno;
/* update the stats counter of zeroed pages */
pgstat_count_slru_page_zeroed(ctl);
return slotno;
}
......@@ -403,6 +406,10 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
}
/* Otherwise, it's ready to use */
SlruRecentlyUsed(shared, slotno);
/* update the stats counter of pages found in the SLRU */
pgstat_count_slru_page_hit(ctl);
return slotno;
}
......@@ -444,6 +451,10 @@ SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
SlruReportIOError(ctl, pageno, xid);
SlruRecentlyUsed(shared, slotno);
/* update the stats counter of pages not found in SLRU */
pgstat_count_slru_page_read(ctl);
return slotno;
}
}
......@@ -596,6 +607,9 @@ SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno)
bool result;
off_t endpos;
/* update the stats counter of checked pages */
pgstat_count_slru_page_exists(ctl);
SlruFileName(ctl, path, segno);
fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
......@@ -730,6 +744,9 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruFlush fdata)
char path[MAXPGPATH];
int fd = -1;
/* update the stats counter of written pages */
pgstat_count_slru_page_written(ctl);
/*
* Honor the write-WAL-before-data rule, if appropriate, so that we do not
* write out data before associated WAL records. This is the same action
......@@ -1125,6 +1142,9 @@ SimpleLruFlush(SlruCtl ctl, bool allow_redirtied)
int i;
bool ok;
/* update the stats counter of flushes */
pgstat_count_slru_flush(ctl);
/*
* Find and write dirty pages
*/
......@@ -1186,6 +1206,9 @@ SimpleLruTruncate(SlruCtl ctl, int cutoffPage)
SlruShared shared = ctl->shared;
int slotno;
/* update the stats counter of truncates */
pgstat_count_slru_truncate(ctl);
/*
* The cutoff point is the start of the segment containing cutoffPage.
*/
......
......@@ -793,6 +793,19 @@ CREATE VIEW pg_stat_replication AS
JOIN pg_stat_get_wal_senders() AS W ON (S.pid = W.pid)
LEFT JOIN pg_authid AS U ON (S.usesysid = U.oid);
CREATE VIEW pg_stat_slru AS
SELECT
s.name,
s.blks_zeroed,
s.blks_hit,
s.blks_read,
s.blks_written,
s.blks_exists,
s.flushes,
s.truncates,
s.stats_reset
FROM pg_stat_get_slru() s;
CREATE VIEW pg_stat_wal_receiver AS
SELECT
s.pid,
......@@ -1410,6 +1423,7 @@ REVOKE EXECUTE ON FUNCTION pg_promote(boolean, integer) FROM public;
REVOKE EXECUTE ON FUNCTION pg_stat_reset() FROM public;
REVOKE EXECUTE ON FUNCTION pg_stat_reset_shared(text) FROM public;
REVOKE EXECUTE ON FUNCTION pg_stat_reset_slru(text) FROM public;
REVOKE EXECUTE ON FUNCTION pg_stat_reset_single_table_counters(oid) FROM public;
REVOKE EXECUTE ON FUNCTION pg_stat_reset_single_function_counters(oid) FROM public;
......
This diff is collapsed.
......@@ -1690,6 +1690,83 @@ pg_stat_get_buf_alloc(PG_FUNCTION_ARGS)
PG_RETURN_INT64(pgstat_fetch_global()->buf_alloc);
}
/*
* Returns statistics of SLRU caches.
*/
Datum
pg_stat_get_slru(PG_FUNCTION_ARGS)
{
#define PG_STAT_GET_SLRU_COLS 9
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
TupleDesc tupdesc;
Tuplestorestate *tupstore;
MemoryContext per_query_ctx;
MemoryContext oldcontext;
int i;
PgStat_SLRUStats *stats;
/* check to see if caller supports us returning a tuplestore */
if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("set-valued function called in context that cannot accept a set")));
if (!(rsinfo->allowedModes & SFRM_Materialize))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("materialize mode required, but it is not allowed in this context")));
/* Build a tuple descriptor for our result type */
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
elog(ERROR, "return type must be a row type");
per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
oldcontext = MemoryContextSwitchTo(per_query_ctx);
tupstore = tuplestore_begin_heap(true, false, work_mem);
rsinfo->returnMode = SFRM_Materialize;
rsinfo->setResult = tupstore;
rsinfo->setDesc = tupdesc;
MemoryContextSwitchTo(oldcontext);
/* request SLRU stats from the stat collector */
stats = pgstat_fetch_slru();
for (i = 0; ; i++)
{
/* for each row */
Datum values[PG_STAT_GET_SLRU_COLS];
bool nulls[PG_STAT_GET_SLRU_COLS];
PgStat_SLRUStats stat = stats[i];
char *name;
name = pgstat_slru_name(i);
if (!name)
break;
MemSet(values, 0, sizeof(values));
MemSet(nulls, 0, sizeof(nulls));
values[0] = PointerGetDatum(cstring_to_text(name));
values[1] = Int64GetDatum(stat.blocks_zeroed);
values[2] = Int64GetDatum(stat.blocks_hit);
values[3] = Int64GetDatum(stat.blocks_read);
values[4] = Int64GetDatum(stat.blocks_written);
values[5] = Int64GetDatum(stat.blocks_exists);
values[6] = Int64GetDatum(stat.flush);
values[7] = Int64GetDatum(stat.truncate);
values[8] = Int64GetDatum(stat.stat_reset_timestamp);
tuplestore_putvalues(tupstore, tupdesc, values, nulls);
}
/* clean up and return the tuplestore */
tuplestore_donestoring(tupstore);
return (Datum) 0;
}
Datum
pg_stat_get_xact_numscans(PG_FUNCTION_ARGS)
{
......@@ -1935,6 +2012,20 @@ pg_stat_reset_single_function_counters(PG_FUNCTION_ARGS)
PG_RETURN_VOID();
}
/* Reset SLRU counters (a specific one or all of them). */
Datum
pg_stat_reset_slru(PG_FUNCTION_ARGS)
{
char *target = NULL;
if (!PG_ARGISNULL(0))
target = text_to_cstring(PG_GETARG_TEXT_PP(0));
pgstat_reset_slru_counter(target);
PG_RETURN_VOID();
}
Datum
pg_stat_get_archiver(PG_FUNCTION_ARGS)
{
......
......@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 202004011
#define CATALOG_VERSION_NO 202004021
#endif
......@@ -5436,6 +5436,16 @@
proname => 'pg_stat_get_buf_alloc', provolatile => 's', proparallel => 'r',
prorettype => 'int8', proargtypes => '', prosrc => 'pg_stat_get_buf_alloc' },
{ oid => '2306',
descr => 'statistics: information about SLRU caches',
proname => 'pg_stat_get_slru', prorows => '100', proisstrict => 'f',
proretset => 't', provolatile => 's', proparallel => 'r',
prorettype => 'record', proargtypes => '',
proallargtypes => '{text,int8,int8,int8,int8,int8,int8,int8,timestamptz}',
proargmodes => '{o,o,o,o,o,o,o,o,o}',
proargnames => '{name,blks_zeroed,blks_hit,blks_read,blks_written,blks_exists,flushes,truncates,stats_reset}',
prosrc => 'pg_stat_get_slru' },
{ oid => '2978', descr => 'statistics: number of function calls',
proname => 'pg_stat_get_function_calls', provolatile => 's',
proparallel => 'r', prorettype => 'int8', proargtypes => 'oid',
......@@ -5540,6 +5550,11 @@
proname => 'pg_stat_reset_single_function_counters', provolatile => 'v',
prorettype => 'void', proargtypes => 'oid',
prosrc => 'pg_stat_reset_single_function_counters' },
{ oid => '2307',
descr => 'statistics: reset collected statistics for a single SLRU',
proname => 'pg_stat_reset_slru', provolatile => 'v', proisstrict => 'f',
prorettype => 'void', proargtypes => 'text',
prosrc => 'pg_stat_reset_slru' },
{ oid => '3163', descr => 'current trigger depth',
proname => 'pg_trigger_depth', provolatile => 's', proparallel => 'r',
......
......@@ -11,6 +11,7 @@
#ifndef PGSTAT_H
#define PGSTAT_H
#include "access/slru.h"
#include "datatype/timestamp.h"
#include "libpq/pqcomm.h"
#include "miscadmin.h"
......@@ -55,11 +56,13 @@ typedef enum StatMsgType
PGSTAT_MTYPE_RESETCOUNTER,
PGSTAT_MTYPE_RESETSHAREDCOUNTER,
PGSTAT_MTYPE_RESETSINGLECOUNTER,
PGSTAT_MTYPE_RESETSLRUCOUNTER,
PGSTAT_MTYPE_AUTOVAC_START,
PGSTAT_MTYPE_VACUUM,
PGSTAT_MTYPE_ANALYZE,
PGSTAT_MTYPE_ARCHIVER,
PGSTAT_MTYPE_BGWRITER,
PGSTAT_MTYPE_SLRU,
PGSTAT_MTYPE_FUNCSTAT,
PGSTAT_MTYPE_FUNCPURGE,
PGSTAT_MTYPE_RECOVERYCONFLICT,
......@@ -343,6 +346,17 @@ typedef struct PgStat_MsgResetsinglecounter
Oid m_objectid;
} PgStat_MsgResetsinglecounter;
/* ----------
* PgStat_MsgResetslrucounter Sent by the backend to tell the collector
* to reset a SLRU counter
* ----------
*/
typedef struct PgStat_MsgResetslrucounter
{
PgStat_MsgHdr m_hdr;
int m_index;
} PgStat_MsgResetslrucounter;
/* ----------
* PgStat_MsgAutovacStart Sent by the autovacuum daemon to signal
* that a database is going to be processed
......@@ -423,6 +437,23 @@ typedef struct PgStat_MsgBgWriter
PgStat_Counter m_checkpoint_sync_time;
} PgStat_MsgBgWriter;
/* ----------
* PgStat_MsgSLRU Sent by the SLRU to update statistics.
* ----------
*/
typedef struct PgStat_MsgSLRU
{
PgStat_MsgHdr m_hdr;
PgStat_Counter m_index;
PgStat_Counter m_blocks_zeroed;
PgStat_Counter m_blocks_hit;
PgStat_Counter m_blocks_read;
PgStat_Counter m_blocks_written;
PgStat_Counter m_blocks_exists;
PgStat_Counter m_flush;
PgStat_Counter m_truncate;
} PgStat_MsgSLRU;
/* ----------
* PgStat_MsgRecoveryConflict Sent by the backend upon recovery conflict
* ----------
......@@ -560,11 +591,13 @@ typedef union PgStat_Msg
PgStat_MsgResetcounter msg_resetcounter;
PgStat_MsgResetsharedcounter msg_resetsharedcounter;
PgStat_MsgResetsinglecounter msg_resetsinglecounter;
PgStat_MsgResetslrucounter msg_resetslrucounter;
PgStat_MsgAutovacStart msg_autovacuum_start;
PgStat_MsgVacuum msg_vacuum;
PgStat_MsgAnalyze msg_analyze;
PgStat_MsgArchiver msg_archiver;
PgStat_MsgBgWriter msg_bgwriter;
PgStat_MsgSLRU msg_slru;
PgStat_MsgFuncstat msg_funcstat;
PgStat_MsgFuncpurge msg_funcpurge;
PgStat_MsgRecoveryConflict msg_recoveryconflict;
......@@ -713,6 +746,21 @@ typedef struct PgStat_GlobalStats
TimestampTz stat_reset_timestamp;
} PgStat_GlobalStats;
/*
* SLRU statistics kept in the stats collector
*/
typedef struct PgStat_SLRUStats
{
PgStat_Counter blocks_zeroed;
PgStat_Counter blocks_hit;
PgStat_Counter blocks_read;
PgStat_Counter blocks_written;
PgStat_Counter blocks_exists;
PgStat_Counter flush;
PgStat_Counter truncate;
TimestampTz stat_reset_timestamp;
} PgStat_SLRUStats;
/* ----------
* Backend states
......@@ -1210,6 +1258,11 @@ extern char *pgstat_stat_filename;
*/
extern PgStat_MsgBgWriter BgWriterStats;
/*
* SLRU statistics counters are updated directly by slru.
*/
extern PgStat_MsgSLRU SlruStats[];
/*
* Updated by pgstat_count_buffer_*_time macros
*/
......@@ -1247,6 +1300,7 @@ extern void pgstat_clear_snapshot(void);
extern void pgstat_reset_counters(void);
extern void pgstat_reset_shared_counters(const char *);
extern void pgstat_reset_single_counter(Oid objectid, PgStat_Single_Reset_Type type);
extern void pgstat_reset_slru_counter(const char *);
extern void pgstat_report_autovac(Oid dboid);
extern void pgstat_report_vacuum(Oid tableoid, bool shared,
......@@ -1422,5 +1476,16 @@ extern PgStat_StatFuncEntry *pgstat_fetch_stat_funcentry(Oid funcid);
extern int pgstat_fetch_stat_numbackends(void);
extern PgStat_ArchiverStats *pgstat_fetch_stat_archiver(void);
extern PgStat_GlobalStats *pgstat_fetch_global(void);
extern PgStat_SLRUStats *pgstat_fetch_slru(void);
extern void pgstat_count_slru_page_zeroed(SlruCtl ctl);
extern void pgstat_count_slru_page_hit(SlruCtl ctl);
extern void pgstat_count_slru_page_read(SlruCtl ctl);
extern void pgstat_count_slru_page_written(SlruCtl ctl);
extern void pgstat_count_slru_page_exists(SlruCtl ctl);
extern void pgstat_count_slru_flush(SlruCtl ctl);
extern void pgstat_count_slru_truncate(SlruCtl ctl);
extern char *pgstat_slru_name(int idx);
extern int pgstat_slru_index(const char *name);
#endif /* PGSTAT_H */
......@@ -2007,6 +2007,16 @@ pg_stat_replication| SELECT s.pid,
FROM ((pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, wait_event_type, wait_event, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port, backend_xid, backend_xmin, backend_type, ssl, sslversion, sslcipher, sslbits, sslcompression, ssl_client_dn, ssl_client_serial, ssl_issuer_dn, gss_auth, gss_princ, gss_enc, leader_pid)
JOIN pg_stat_get_wal_senders() w(pid, state, sent_lsn, write_lsn, flush_lsn, replay_lsn, write_lag, flush_lag, replay_lag, sync_priority, sync_state, reply_time, spill_txns, spill_count, spill_bytes) ON ((s.pid = w.pid)))
LEFT JOIN pg_authid u ON ((s.usesysid = u.oid)));
pg_stat_slru| SELECT s.name,
s.blks_zeroed,
s.blks_hit,
s.blks_read,
s.blks_written,
s.blks_exists,
s.flushes,
s.truncates,
s.stats_reset
FROM pg_stat_get_slru() s(name, blks_zeroed, blks_hit, blks_read, blks_written, blks_exists, flushes, truncates, stats_reset);
pg_stat_ssl| SELECT s.pid,
s.ssl,
s.sslversion AS version,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment