Commit 3fa17d37 authored by Amit Kapila's avatar Amit Kapila

Use HTAB for replication slot statistics.

Previously, we used to use the array of size max_replication_slots to
store stats for replication slots. But that had two problems in the cases
where a message for dropping a slot gets lost: 1) the stats for the new
slot are not recorded if the array is full and 2) writing beyond the end
of the array if the user reduces the max_replication_slots.

This commit uses HTAB for replication slot statistics, resolving both
problems. Now, pgstat_vacuum_stat() search for all the dead replication
slots in stats hashtable and tell the collector to remove them. To avoid
showing the stats for the already-dropped slots, pg_stat_replication_slots
view searches slot stats by the slot name taken from pg_replication_slots.

Also, we send a message for creating a slot at slot creation, initializing
the stats. This reduces the possibility that the stats are accumulated
into the old slot stats when a message for dropping a slot gets lost.

Reported-by: Andres Freund
Author: Sawada Masahiko, test case by Vignesh C
Reviewed-by: Amit Kapila, Vignesh C, Dilip Kumar
Discussion: https://postgr.es/m/20210319185247.ldebgpdaxsowiflw@alap3.anarazel.de
parent e7eea52b
......@@ -2,9 +2,10 @@
# drop replication slot and restart.
use strict;
use warnings;
use File::Path qw(rmtree);
use PostgresNode;
use TestLib;
use Test::More tests => 1;
use Test::More tests => 2;
# Test set-up
my $node = get_new_node('test');
......@@ -12,9 +13,22 @@ $node->init(allows_streaming => 'logical');
$node->append_conf('postgresql.conf', 'synchronous_commit = on');
$node->start;
# Check that replication slot stats are expected.
sub test_slot_stats
{
my ($node, $expected, $msg) = @_;
my $result = $node->safe_psql(
'postgres', qq[
SELECT slot_name, total_txns > 0 AS total_txn,
total_bytes > 0 AS total_bytes
FROM pg_stat_replication_slots
ORDER BY slot_name]);
is($result, $expected, $msg);
}
# Create table.
$node->safe_psql('postgres',
"CREATE TABLE test_repl_stat(col1 int)");
$node->safe_psql('postgres', "CREATE TABLE test_repl_stat(col1 int)");
# Create replication slots.
$node->safe_psql(
......@@ -26,7 +40,8 @@ $node->safe_psql(
]);
# Insert some data.
$node->safe_psql('postgres', "INSERT INTO test_repl_stat values(generate_series(1, 5));");
$node->safe_psql('postgres',
"INSERT INTO test_repl_stat values(generate_series(1, 5));");
$node->safe_psql(
'postgres', qq[
......@@ -50,27 +65,51 @@ $node->poll_query_until(
# Test to drop one of the replication slot and verify replication statistics data is
# fine after restart.
$node->safe_psql('postgres', "SELECT pg_drop_replication_slot('regression_slot4')");
$node->safe_psql('postgres',
"SELECT pg_drop_replication_slot('regression_slot4')");
$node->stop;
$node->start;
# Verify statistics data present in pg_stat_replication_slots are sane after
# restart.
my $result = $node->safe_psql('postgres',
"SELECT slot_name, total_txns > 0 AS total_txn,
total_bytes > 0 AS total_bytes FROM pg_stat_replication_slots
ORDER BY slot_name"
);
is($result, qq(regression_slot1|t|t
test_slot_stats(
$node,
qq(regression_slot1|t|t
regression_slot2|t|t
regression_slot3|t|t), 'check replication statistics are updated');
regression_slot3|t|t),
'check replication statistics are updated');
# Test to remove one of the replication slots and adjust
# max_replication_slots accordingly to the number of slots. This leads
# to a mismatch between the number of slots present in the stats file and the
# number of stats present in the shared memory, simulating the scenario for
# drop slot message lost by the statistics collector process. We verify
# replication statistics data is fine after restart.
$node->stop;
my $datadir = $node->data_dir;
my $slot3_replslotdir = "$datadir/pg_replslot/regression_slot3";
rmtree($slot3_replslotdir);
$node->append_conf('postgresql.conf', 'max_replication_slots = 2');
$node->start;
# Verify statistics data present in pg_stat_replication_slots are sane after
# restart.
test_slot_stats(
$node,
qq(regression_slot1|t|t
regression_slot2|t|t),
'check replication statistics after removing the slot file');
# cleanup
$node->safe_psql('postgres', "DROP TABLE test_repl_stat");
$node->safe_psql('postgres', "SELECT pg_drop_replication_slot('regression_slot1')");
$node->safe_psql('postgres', "SELECT pg_drop_replication_slot('regression_slot2')");
$node->safe_psql('postgres', "SELECT pg_drop_replication_slot('regression_slot3')");
$node->safe_psql('postgres',
"SELECT pg_drop_replication_slot('regression_slot1')");
$node->safe_psql('postgres',
"SELECT pg_drop_replication_slot('regression_slot2')");
# shutdown
$node->stop;
......@@ -866,20 +866,6 @@ CREATE VIEW pg_stat_replication AS
JOIN pg_stat_get_wal_senders() AS W ON (S.pid = W.pid)
LEFT JOIN pg_authid AS U ON (S.usesysid = U.oid);
CREATE VIEW pg_stat_replication_slots AS
SELECT
s.slot_name,
s.spill_txns,
s.spill_count,
s.spill_bytes,
s.stream_txns,
s.stream_count,
s.stream_bytes,
s.total_txns,
s.total_bytes,
s.stats_reset
FROM pg_stat_get_replication_slots() AS s;
CREATE VIEW pg_stat_slru AS
SELECT
s.name,
......@@ -984,6 +970,22 @@ CREATE VIEW pg_replication_slots AS
FROM pg_get_replication_slots() AS L
LEFT JOIN pg_database D ON (L.datoid = D.oid);
CREATE VIEW pg_stat_replication_slots AS
SELECT
s.slot_name,
s.spill_txns,
s.spill_count,
s.spill_bytes,
s.stream_txns,
s.stream_count,
s.stream_bytes,
s.total_txns,
s.total_bytes,
s.stats_reset
FROM pg_replication_slots as r,
LATERAL pg_stat_get_replication_slot(slot_name) as s
WHERE r.datoid IS NOT NULL; -- excluding physical slots
CREATE VIEW pg_stat_database AS
SELECT
D.oid AS datid,
......
This diff is collapsed.
......@@ -1773,7 +1773,7 @@ void
UpdateDecodingStats(LogicalDecodingContext *ctx)
{
ReorderBuffer *rb = ctx->reorder;
PgStat_ReplSlotStats repSlotStat;
PgStat_StatReplSlotEntry repSlotStat;
/* Nothing to do if we don't have any replication stats to be sent. */
if (rb->spillBytes <= 0 && rb->streamBytes <= 0 && rb->totalBytes <= 0)
......
......@@ -328,12 +328,7 @@ ReplicationSlotCreate(const char *name, bool db_specific,
* ReplicationSlotAllocationLock.
*/
if (SlotIsLogical(slot))
{
PgStat_ReplSlotStats repSlotStat;
MemSet(&repSlotStat, 0, sizeof(PgStat_ReplSlotStats));
namestrcpy(&repSlotStat.slotname, NameStr(slot->data.name));
pgstat_report_replslot(&repSlotStat);
}
pgstat_report_replslot_create(NameStr(slot->data.name));
/*
* Now that the slot has been marked as in_use and active, it's safe to
......@@ -349,17 +344,15 @@ ReplicationSlotCreate(const char *name, bool db_specific,
* Search for the named replication slot.
*
* Return the replication slot if found, otherwise NULL.
*
* The caller must hold ReplicationSlotControlLock in shared mode.
*/
ReplicationSlot *
SearchNamedReplicationSlot(const char *name)
SearchNamedReplicationSlot(const char *name, bool need_lock)
{
int i;
ReplicationSlot *slot = NULL;
ReplicationSlot *slot = NULL;
Assert(LWLockHeldByMeInMode(ReplicationSlotControlLock,
LW_SHARED));
if (need_lock)
LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
for (i = 0; i < max_replication_slots; i++)
{
......@@ -372,6 +365,9 @@ SearchNamedReplicationSlot(const char *name)
}
}
if (need_lock)
LWLockRelease(ReplicationSlotControlLock);
return slot;
}
......@@ -416,7 +412,7 @@ retry:
* Search for the slot with the specified name if the slot to acquire is
* not given. If the slot is not found, we either return -1 or error out.
*/
s = slot ? slot : SearchNamedReplicationSlot(name);
s = slot ? slot : SearchNamedReplicationSlot(name, false);
if (s == NULL || !s->in_use)
{
LWLockRelease(ReplicationSlotControlLock);
......@@ -713,6 +709,12 @@ ReplicationSlotDropPtr(ReplicationSlot *slot)
* reduce that possibility. If the messages reached in reverse, we would
* lose one statistics update message. But the next update message will
* create the statistics for the replication slot.
*
* XXX In case, the messages for creation and drop slot of the same name
* get lost and create happens before (auto)vacuum cleans up the dead
* slot, the stats will be accumulated into the old slot. One can imagine
* having OIDs for each slot to avoid the accumulation of stats but that
* doesn't seem worth doing as in practice this won't happen frequently.
*/
if (SlotIsLogical(slot))
pgstat_report_replslot_drop(NameStr(slot->data.name));
......
......@@ -24,6 +24,7 @@
#include "pgstat.h"
#include "postmaster/bgworker_internals.h"
#include "postmaster/postmaster.h"
#include "replication/slot.h"
#include "storage/proc.h"
#include "storage/procarray.h"
#include "utils/acl.h"
......@@ -2207,8 +2208,33 @@ pg_stat_reset_replication_slot(PG_FUNCTION_ARGS)
char *target = NULL;
if (!PG_ARGISNULL(0))
{
ReplicationSlot *slot;
target = text_to_cstring(PG_GETARG_TEXT_PP(0));
/*
* Check if the slot exists with the given name. It is possible that
* by the time this message is executed the slot is dropped but at
* least this check will ensure that the given name is for a valid
* slot.
*/
slot = SearchNamedReplicationSlot(target, true);
if (!slot)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("replication slot \"%s\" does not exist",
target)));
/*
* Nothing to do for physical slots as we collect stats only for
* logical slots.
*/
if (SlotIsPhysical(slot))
PG_RETURN_VOID();
}
pgstat_reset_replslot_counter(target);
PG_RETURN_VOID();
......@@ -2280,73 +2306,77 @@ pg_stat_get_archiver(PG_FUNCTION_ARGS)
PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
}
/* Get the statistics for the replication slots */
/*
* Get the statistics for the replication slot. If the slot statistics is not
* available, return all-zeroes stats.
*/
Datum
pg_stat_get_replication_slots(PG_FUNCTION_ARGS)
pg_stat_get_replication_slot(PG_FUNCTION_ARGS)
{
#define PG_STAT_GET_REPLICATION_SLOT_COLS 10
ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
text *slotname_text = PG_GETARG_TEXT_P(0);
NameData slotname;
TupleDesc tupdesc;
Tuplestorestate *tupstore;
MemoryContext per_query_ctx;
MemoryContext oldcontext;
PgStat_ReplSlotStats *slotstats;
int nstats;
int i;
Datum values[10];
bool nulls[10];
PgStat_StatReplSlotEntry *slotent;
PgStat_StatReplSlotEntry allzero;
/* check to see if caller supports us returning a tuplestore */
if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("set-valued function called in context that cannot accept a set")));
if (!(rsinfo->allowedModes & SFRM_Materialize))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("materialize mode required, but it is not allowed in this context")));
/* Build a tuple descriptor for our result type */
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
elog(ERROR, "return type must be a row type");
per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
oldcontext = MemoryContextSwitchTo(per_query_ctx);
tupstore = tuplestore_begin_heap(true, false, work_mem);
rsinfo->returnMode = SFRM_Materialize;
rsinfo->setResult = tupstore;
rsinfo->setDesc = tupdesc;
/* Initialise values and NULL flags arrays */
MemSet(values, 0, sizeof(values));
MemSet(nulls, 0, sizeof(nulls));
MemoryContextSwitchTo(oldcontext);
/* Initialise attributes information in the tuple descriptor */
tupdesc = CreateTemplateTupleDesc(PG_STAT_GET_REPLICATION_SLOT_COLS);
TupleDescInitEntry(tupdesc, (AttrNumber) 1, "slot_name",
TEXTOID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 2, "spill_txns",
INT8OID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 3, "spill_count",
INT8OID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 4, "spill_bytes",
INT8OID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 5, "stream_txns",
INT8OID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 6, "stream_count",
INT8OID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 7, "stream_bytes",
INT8OID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 8, "total_txns",
INT8OID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 9, "total_bytes",
INT8OID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 10, "stats_reset",
TIMESTAMPTZOID, -1, 0);
BlessTupleDesc(tupdesc);
slotstats = pgstat_fetch_replslot(&nstats);
for (i = 0; i < nstats; i++)
namestrcpy(&slotname, text_to_cstring(slotname_text));
slotent = pgstat_fetch_replslot(slotname);
if (!slotent)
{
Datum values[PG_STAT_GET_REPLICATION_SLOT_COLS];
bool nulls[PG_STAT_GET_REPLICATION_SLOT_COLS];
PgStat_ReplSlotStats *s = &(slotstats[i]);
MemSet(values, 0, sizeof(values));
MemSet(nulls, 0, sizeof(nulls));
values[0] = CStringGetTextDatum(NameStr(s->slotname));
values[1] = Int64GetDatum(s->spill_txns);
values[2] = Int64GetDatum(s->spill_count);
values[3] = Int64GetDatum(s->spill_bytes);
values[4] = Int64GetDatum(s->stream_txns);
values[5] = Int64GetDatum(s->stream_count);
values[6] = Int64GetDatum(s->stream_bytes);
values[7] = Int64GetDatum(s->total_txns);
values[8] = Int64GetDatum(s->total_bytes);
if (s->stat_reset_timestamp == 0)
nulls[9] = true;
else
values[9] = TimestampTzGetDatum(s->stat_reset_timestamp);
tuplestore_putvalues(tupstore, tupdesc, values, nulls);
/*
* If the slot is not found, initialise its stats. This is possible if
* the create slot message is lost.
*/
memset(&allzero, 0, sizeof(PgStat_StatReplSlotEntry));
slotent = &allzero;
}
tuplestore_donestoring(tupstore);
values[0] = CStringGetTextDatum(NameStr(slotname));
values[1] = Int64GetDatum(slotent->spill_txns);
values[2] = Int64GetDatum(slotent->spill_count);
values[3] = Int64GetDatum(slotent->spill_bytes);
values[4] = Int64GetDatum(slotent->stream_txns);
values[5] = Int64GetDatum(slotent->stream_count);
values[6] = Int64GetDatum(slotent->stream_bytes);
values[7] = Int64GetDatum(slotent->total_txns);
values[8] = Int64GetDatum(slotent->total_bytes);
return (Datum) 0;
if (slotent->stat_reset_timestamp == 0)
nulls[9] = true;
else
values[9] = TimestampTzGetDatum(slotent->stat_reset_timestamp);
/* Returns the record as Datum */
PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
}
......@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 202104231
#define CATALOG_VERSION_NO 202104271
#endif
......@@ -5308,14 +5308,14 @@
proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}',
proargnames => '{pid,status,receive_start_lsn,receive_start_tli,written_lsn,flushed_lsn,received_tli,last_msg_send_time,last_msg_receipt_time,latest_end_lsn,latest_end_time,slot_name,sender_host,sender_port,conninfo}',
prosrc => 'pg_stat_get_wal_receiver' },
{ oid => '8595', descr => 'statistics: information about replication slots',
proname => 'pg_stat_get_replication_slots', prorows => '10',
{ oid => '8595', descr => 'statistics: information about replication slot',
proname => 'pg_stat_get_replication_slot', prorows => '1',
proisstrict => 'f', proretset => 't', provolatile => 's', proparallel => 'r',
prorettype => 'record', proargtypes => '',
proallargtypes => '{text,int8,int8,int8,int8,int8,int8,int8,int8,timestamptz}',
proargmodes => '{o,o,o,o,o,o,o,o,o,o}',
proargnames => '{slot_name,spill_txns,spill_count,spill_bytes,stream_txns,stream_count,stream_bytes,total_txns,total_bytes,stats_reset}',
prosrc => 'pg_stat_get_replication_slots' },
prorettype => 'record', proargtypes => 'text',
proallargtypes => '{text,text,int8,int8,int8,int8,int8,int8,int8,int8,timestamptz}',
proargmodes => '{i,o,o,o,o,o,o,o,o,o,o}',
proargnames => '{slot_name,slot_name,spill_txns,spill_count,spill_bytes,stream_txns,stream_count,stream_bytes,total_txns,total_bytes,stats_reset}',
prosrc => 'pg_stat_get_replication_slot' },
{ oid => '6118', descr => 'statistics: information about subscription',
proname => 'pg_stat_get_subscription', prorows => '10', proisstrict => 'f',
proretset => 't', provolatile => 's', proparallel => 'r',
......
......@@ -541,6 +541,7 @@ typedef struct PgStat_MsgReplSlot
{
PgStat_MsgHdr m_hdr;
NameData m_slotname;
bool m_create;
bool m_drop;
PgStat_Counter m_spill_txns;
PgStat_Counter m_spill_count;
......@@ -917,7 +918,7 @@ typedef struct PgStat_SLRUStats
/*
* Replication slot statistics kept in the stats collector
*/
typedef struct PgStat_ReplSlotStats
typedef struct PgStat_StatReplSlotEntry
{
NameData slotname;
PgStat_Counter spill_txns;
......@@ -929,7 +930,7 @@ typedef struct PgStat_ReplSlotStats
PgStat_Counter total_txns;
PgStat_Counter total_bytes;
TimestampTz stat_reset_timestamp;
} PgStat_ReplSlotStats;
} PgStat_StatReplSlotEntry;
/*
......@@ -1031,7 +1032,8 @@ extern void pgstat_report_recovery_conflict(int reason);
extern void pgstat_report_deadlock(void);
extern void pgstat_report_checksum_failures_in_db(Oid dboid, int failurecount);
extern void pgstat_report_checksum_failure(void);
extern void pgstat_report_replslot(const PgStat_ReplSlotStats *repSlotStat);
extern void pgstat_report_replslot(const PgStat_StatReplSlotEntry *repSlotStat);
extern void pgstat_report_replslot_create(const char *slotname);
extern void pgstat_report_replslot_drop(const char *slotname);
extern void pgstat_initialize(void);
......@@ -1129,7 +1131,7 @@ extern PgStat_ArchiverStats *pgstat_fetch_stat_archiver(void);
extern PgStat_GlobalStats *pgstat_fetch_global(void);
extern PgStat_WalStats *pgstat_fetch_stat_wal(void);
extern PgStat_SLRUStats *pgstat_fetch_slru(void);
extern PgStat_ReplSlotStats *pgstat_fetch_replslot(int *nslots_p);
extern PgStat_StatReplSlotEntry *pgstat_fetch_replslot(NameData slotname);
extern PgStat_RecoveryPrefetchStats *pgstat_fetch_recoveryprefetch(void);
extern void pgstat_count_slru_page_zeroed(int slru_idx);
......
......@@ -223,7 +223,7 @@ extern XLogRecPtr ReplicationSlotsComputeLogicalRestartLSN(void);
extern bool ReplicationSlotsCountDBSlots(Oid dboid, int *nslots, int *nactive);
extern void ReplicationSlotsDropDBSlots(Oid dboid);
extern void InvalidateObsoleteReplicationSlots(XLogSegNo oldestSegno);
extern ReplicationSlot *SearchNamedReplicationSlot(const char *name);
extern ReplicationSlot *SearchNamedReplicationSlot(const char *name, bool need_lock);
extern void ReplicationSlotNameForTablesync(Oid suboid, Oid relid, char *syncslotname, int szslot);
extern void ReplicationSlotDropAtPubNode(WalReceiverConn *wrconn, char *slotname, bool missing_ok);
......
......@@ -2071,7 +2071,9 @@ pg_stat_replication_slots| SELECT s.slot_name,
s.total_txns,
s.total_bytes,
s.stats_reset
FROM pg_stat_get_replication_slots() s(slot_name, spill_txns, spill_count, spill_bytes, stream_txns, stream_count, stream_bytes, total_txns, total_bytes, stats_reset);
FROM pg_replication_slots r,
LATERAL pg_stat_get_replication_slot((r.slot_name)::text) s(slot_name, spill_txns, spill_count, spill_bytes, stream_txns, stream_count, stream_bytes, total_txns, total_bytes, stats_reset)
WHERE (r.datoid IS NOT NULL);
pg_stat_slru| SELECT s.name,
s.blks_zeroed,
s.blks_hit,
......
......@@ -1870,12 +1870,12 @@ PgStat_MsgTabstat
PgStat_MsgTempFile
PgStat_MsgVacuum
PgStat_MsgWal
PgStat_ReplSlotStats
PgStat_SLRUStats
PgStat_Shared_Reset_Target
PgStat_Single_Reset_Type
PgStat_StatDBEntry
PgStat_StatFuncEntry
PgStat_StatReplSlotEntry
PgStat_StatTabEntry
PgStat_SubXactStatus
PgStat_TableCounts
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment