Commit 1816a1c6 authored by Alvaro Herrera's avatar Alvaro Herrera

Fix checkpoint signalling

Checkpointer uses its MyLatch to wake up when a checkpoint request is
received.  But before commit c6550776 the latch was not used for
anything else, so the code could just go to sleep after each loop
without rechecking the sleeping condition.  That commit added a separate
ResetLatch in its code path[1], which can cause a checkpoint to go
unnoticed for potentially a long time.

Fix by skipping sleep if any checkpoint flags are set.  Also add a test
to verify this; authored by Kyotaro Horiguchi.

[1] CreateCheckPoint -> InvalidateObsoleteReplicationSlots ->
ConditionVariableTimeSleep

Report and diagnosis by Kyotaro Horiguchi.
Co-authored-by: default avatarKyotaro Horiguchi <horikyota.ntt@gmail.com>
Co-authored-by: default avatarÁlvaro Herrera <alvherre@alvh.no-ip.org>
Discussion: https://postgr.es/m/20200408.141956.891237856186513376.horikyota.ntt@gmail.com
parent fef819ac
......@@ -494,6 +494,13 @@ CheckpointerMain(void)
*/
pgstat_send_bgwriter();
/*
* If any checkpoint flags have been set, redo the loop to handle the
* checkpoint without sleeping.
*/
if (((volatile CheckpointerShmemStruct *) CheckpointerShmem)->ckpt_flags)
continue;
/*
* Sleep until we are signaled or it's time for another checkpoint or
* xlog file switch.
......
......@@ -8,7 +8,7 @@ use TestLib;
use PostgresNode;
use File::Path qw(rmtree);
use Test::More tests => 13;
use Test::More tests => 14;
use Time::HiRes qw(usleep);
$ENV{PGDATABASE} = 'postgres';
......@@ -179,7 +179,47 @@ for (my $i = 0; $i < 10000; $i++)
}
ok($failed, 'check that replication has been broken');
$node_standby->stop;
$node_master->stop('immediate');
$node_standby->stop('immediate');
my $node_master2 = get_new_node('master2');
$node_master2->init(allows_streaming => 1);
$node_master2->append_conf(
'postgresql.conf', qq(
min_wal_size = 32MB
max_wal_size = 32MB
log_checkpoints = yes
));
$node_master2->start;
$node_master2->safe_psql('postgres',
"SELECT pg_create_physical_replication_slot('rep1')");
$backup_name = 'my_backup2';
$node_master2->backup($backup_name);
$node_master2->stop;
$node_master2->append_conf(
'postgresql.conf', qq(
max_slot_wal_keep_size = 0
));
$node_master2->start;
$node_standby = get_new_node('standby_2');
$node_standby->init_from_backup($node_master2, $backup_name,
has_streaming => 1);
$node_standby->append_conf('postgresql.conf', "primary_slot_name = 'rep1'");
$node_standby->start;
my @result =
split(
'\n',
$node_master2->safe_psql(
'postgres',
"CREATE TABLE tt();
DROP TABLE tt;
SELECT pg_switch_wal();
CHECKPOINT;
SELECT 'finished';",
timeout => '60'));
is($result[1], 'finished', 'check if checkpoint command is not blocked');
#####################################
# Advance WAL of $node by $n segments
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment