Commit b0afdcad authored by Michael Paquier's avatar Michael Paquier

Fix slot data persistency when advancing physical replication slots

Advancing a physical replication slot with pg_replication_slot_advance()
did not mark the slot as dirty if any advancing was done, preventing the
follow-up checkpoint to flush the slot data to disk.  This caused the
advancing to be lost even on clean restarts.  This does not happen for
logical slots as any advancing marked the slot as dirty.  Per
discussion, the original feature has been implemented so as in the event
of a crash the slot may move backwards to a past LSN.  This property is
kept and more documentation is added about that.

This commit adds some new TAP tests to check the persistency of physical
and logical slots after advancing across clean restarts.

Author: Alexey Kondratov, Michael Paquier
Reviewed-by: Andres Freund, Kyotaro Horiguchi, Craig Ringer
Discussion: https://postgr.es/m/059cc53a-8b14-653a-a24d-5f867503b0ee@postgrespro.ru
Backpatch-through: 11
parent 26a81bb8
...@@ -20470,8 +20470,11 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup()); ...@@ -20470,8 +20470,11 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup());
<entry> <entry>
Advances the current confirmed position of a replication slot named Advances the current confirmed position of a replication slot named
<parameter>slot_name</parameter>. The slot will not be moved backwards, <parameter>slot_name</parameter>. The slot will not be moved backwards,
and it will not be moved beyond the current insert location. Returns and it will not be moved beyond the current insert location. Returns
name of the slot and real position to which it was advanced to. the name of the slot and the real position to which it was advanced to.
The information of the updated slot is written out at the follow-up
checkpoint if any advancing is done. In the event of a crash, the
slot may return to an earlier position.
</entry> </entry>
</row> </row>
......
...@@ -370,6 +370,14 @@ pg_physical_replication_slot_advance(XLogRecPtr moveto) ...@@ -370,6 +370,14 @@ pg_physical_replication_slot_advance(XLogRecPtr moveto)
MyReplicationSlot->data.restart_lsn = moveto; MyReplicationSlot->data.restart_lsn = moveto;
SpinLockRelease(&MyReplicationSlot->mutex); SpinLockRelease(&MyReplicationSlot->mutex);
retlsn = moveto; retlsn = moveto;
/*
* Dirty the slot so as it is written out at the next checkpoint.
* Note that the LSN position advanced may still be lost in the
* event of a crash, but this makes the data consistent after a
* clean shutdown.
*/
ReplicationSlotMarkDirty();
} }
return retlsn; return retlsn;
...@@ -467,9 +475,9 @@ pg_logical_replication_slot_advance(XLogRecPtr moveto) ...@@ -467,9 +475,9 @@ pg_logical_replication_slot_advance(XLogRecPtr moveto)
* keep track of their progress, so we should make more of an * keep track of their progress, so we should make more of an
* effort to save it for them. * effort to save it for them.
* *
* Dirty the slot so it's written out at the next checkpoint. * Dirty the slot so it is written out at the next checkpoint.
* We'll still lose its position on crash, as documented, but it's * The LSN position advanced to may still be lost on a crash
* better than always losing the position even on clean restart. * but this makes the data consistent after a clean shutdown.
*/ */
ReplicationSlotMarkDirty(); ReplicationSlotMarkDirty();
} }
...@@ -566,15 +574,6 @@ pg_replication_slot_advance(PG_FUNCTION_ARGS) ...@@ -566,15 +574,6 @@ pg_replication_slot_advance(PG_FUNCTION_ARGS)
values[0] = NameGetDatum(&MyReplicationSlot->data.name); values[0] = NameGetDatum(&MyReplicationSlot->data.name);
nulls[0] = false; nulls[0] = false;
/* Update the on disk state when lsn was updated. */
if (XLogRecPtrIsInvalid(endlsn))
{
ReplicationSlotMarkDirty();
ReplicationSlotsComputeRequiredXmin(false);
ReplicationSlotsComputeRequiredLSN();
ReplicationSlotSave();
}
ReplicationSlotRelease(); ReplicationSlotRelease();
/* Return the reached position. */ /* Return the reached position. */
......
...@@ -3,7 +3,7 @@ use strict; ...@@ -3,7 +3,7 @@ use strict;
use warnings; use warnings;
use PostgresNode; use PostgresNode;
use TestLib; use TestLib;
use Test::More tests => 32; use Test::More tests => 34;
# Initialize master node # Initialize master node
my $node_master = get_new_node('master'); my $node_master = get_new_node('master');
...@@ -344,3 +344,28 @@ is($catalog_xmin, '', ...@@ -344,3 +344,28 @@ is($catalog_xmin, '',
is($xmin, '', 'xmin of cascaded slot null with hs feedback reset'); is($xmin, '', 'xmin of cascaded slot null with hs feedback reset');
is($catalog_xmin, '', is($catalog_xmin, '',
'catalog xmin of cascaded slot still null with hs_feedback reset'); 'catalog xmin of cascaded slot still null with hs_feedback reset');
# Test physical slot advancing and its durability. Create a new slot on
# the primary, not used by any of the standbys. This reserves WAL at creation.
my $phys_slot = 'phys_slot';
$node_master->safe_psql('postgres',
"SELECT pg_create_physical_replication_slot('$phys_slot', true);");
$node_master->psql('postgres', "
CREATE TABLE tab_phys_slot (a int);
INSERT INTO tab_phys_slot VALUES (generate_series(1,10));");
my $current_lsn = $node_master->safe_psql('postgres',
"SELECT pg_current_wal_lsn();");
chomp($current_lsn);
my $psql_rc = $node_master->psql('postgres',
"SELECT pg_replication_slot_advance('$phys_slot', '$current_lsn'::pg_lsn);");
is($psql_rc, '0', 'slot advancing with physical slot');
my $phys_restart_lsn_pre = $node_master->safe_psql('postgres',
"SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$phys_slot';");
chomp($phys_restart_lsn_pre);
# Slot advance should persist across clean restarts.
$node_master->restart;
my $phys_restart_lsn_post = $node_master->safe_psql('postgres',
"SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$phys_slot';");
chomp($phys_restart_lsn_post);
ok(($phys_restart_lsn_pre cmp $phys_restart_lsn_post) == 0,
"physical slot advance persists across restarts");
...@@ -7,7 +7,7 @@ use strict; ...@@ -7,7 +7,7 @@ use strict;
use warnings; use warnings;
use PostgresNode; use PostgresNode;
use TestLib; use TestLib;
use Test::More tests => 10; use Test::More tests => 12;
use Config; use Config;
# Initialize master node # Initialize master node
...@@ -135,5 +135,29 @@ is($node_master->psql('postgres', 'DROP DATABASE otherdb'), ...@@ -135,5 +135,29 @@ is($node_master->psql('postgres', 'DROP DATABASE otherdb'),
is($node_master->slot('otherdb_slot')->{'slot_name'}, is($node_master->slot('otherdb_slot')->{'slot_name'},
undef, 'logical slot was actually dropped with DB'); undef, 'logical slot was actually dropped with DB');
# Test logical slot advancing and its durability.
my $logical_slot = 'logical_slot';
$node_master->safe_psql('postgres',
"SELECT pg_create_logical_replication_slot('$logical_slot', 'test_decoding', false);");
$node_master->psql('postgres', "
CREATE TABLE tab_logical_slot (a int);
INSERT INTO tab_logical_slot VALUES (generate_series(1,10));");
my $current_lsn = $node_master->safe_psql('postgres',
"SELECT pg_current_wal_lsn();");
chomp($current_lsn);
my $psql_rc = $node_master->psql('postgres',
"SELECT pg_replication_slot_advance('$logical_slot', '$current_lsn'::pg_lsn);");
is($psql_rc, '0', 'slot advancing with logical slot');
my $logical_restart_lsn_pre = $node_master->safe_psql('postgres',
"SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$logical_slot';");
chomp($logical_restart_lsn_pre);
# Slot advance should persists across clean restarts.
$node_master->restart;
my $logical_restart_lsn_post = $node_master->safe_psql('postgres',
"SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$logical_slot';");
chomp($logical_restart_lsn_post);
ok(($logical_restart_lsn_pre cmp $logical_restart_lsn_post) == 0,
"logical slot advance persists across restarts");
# done with the node # done with the node
$node_master->stop; $node_master->stop;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment