Commit 6d0eb385 authored by Amit Kapila's avatar Amit Kapila

Fix deadlock for multiple replicating truncates of the same table.

While applying the truncate change, the logical apply worker acquires
RowExclusiveLock on the relation being truncated. This allowed truncate on
the relation at a time by two apply workers which lead to a deadlock. The
reason was that one of the workers after updating the pg_class tuple tries
to acquire SHARE lock on the relation and started to wait for the second
worker which has acquired RowExclusiveLock on the relation. And when the
second worker tries to update the pg_class tuple, it starts to wait for
the first worker which leads to a deadlock. Fix it by acquiring
AccessExclusiveLock on the relation before applying the truncate change as
we do for normal truncate operation.

Author: Peter Smith, test case by Haiying Tang
Reviewed-by: Dilip Kumar, Amit Kapila
Backpatch-through: 11
Discussion: https://postgr.es/m/CAHut+PsNm43p0jM+idTvWwiGZPcP0hGrHMPK9TOAkc+a4UpUqw@mail.gmail.com
parent f21fadaf
...@@ -1818,6 +1818,7 @@ apply_handle_truncate(StringInfo s) ...@@ -1818,6 +1818,7 @@ apply_handle_truncate(StringInfo s)
List *relids = NIL; List *relids = NIL;
List *relids_logged = NIL; List *relids_logged = NIL;
ListCell *lc; ListCell *lc;
LOCKMODE lockmode = AccessExclusiveLock;
if (handle_streamed_transaction(LOGICAL_REP_MSG_TRUNCATE, s)) if (handle_streamed_transaction(LOGICAL_REP_MSG_TRUNCATE, s))
return; return;
...@@ -1831,14 +1832,14 @@ apply_handle_truncate(StringInfo s) ...@@ -1831,14 +1832,14 @@ apply_handle_truncate(StringInfo s)
LogicalRepRelId relid = lfirst_oid(lc); LogicalRepRelId relid = lfirst_oid(lc);
LogicalRepRelMapEntry *rel; LogicalRepRelMapEntry *rel;
rel = logicalrep_rel_open(relid, RowExclusiveLock); rel = logicalrep_rel_open(relid, lockmode);
if (!should_apply_changes_for_rel(rel)) if (!should_apply_changes_for_rel(rel))
{ {
/* /*
* The relation can't become interesting in the middle of the * The relation can't become interesting in the middle of the
* transaction so it's safe to unlock it. * transaction so it's safe to unlock it.
*/ */
logicalrep_rel_close(rel, RowExclusiveLock); logicalrep_rel_close(rel, lockmode);
continue; continue;
} }
...@@ -1856,7 +1857,7 @@ apply_handle_truncate(StringInfo s) ...@@ -1856,7 +1857,7 @@ apply_handle_truncate(StringInfo s)
{ {
ListCell *child; ListCell *child;
List *children = find_all_inheritors(rel->localreloid, List *children = find_all_inheritors(rel->localreloid,
RowExclusiveLock, lockmode,
NULL); NULL);
foreach(child, children) foreach(child, children)
...@@ -1876,7 +1877,7 @@ apply_handle_truncate(StringInfo s) ...@@ -1876,7 +1877,7 @@ apply_handle_truncate(StringInfo s)
*/ */
if (RELATION_IS_OTHER_TEMP(childrel)) if (RELATION_IS_OTHER_TEMP(childrel))
{ {
table_close(childrel, RowExclusiveLock); table_close(childrel, lockmode);
continue; continue;
} }
......
...@@ -6,7 +6,7 @@ use strict; ...@@ -6,7 +6,7 @@ use strict;
use warnings; use warnings;
use PostgresNode; use PostgresNode;
use TestLib; use TestLib;
use Test::More tests => 11; use Test::More tests => 14;
# setup # setup
...@@ -16,6 +16,8 @@ $node_publisher->start; ...@@ -16,6 +16,8 @@ $node_publisher->start;
my $node_subscriber = get_new_node('subscriber'); my $node_subscriber = get_new_node('subscriber');
$node_subscriber->init(allows_streaming => 'logical'); $node_subscriber->init(allows_streaming => 'logical');
$node_subscriber->append_conf('postgresql.conf',
qq(max_logical_replication_workers = 6));
$node_subscriber->start; $node_subscriber->start;
my $publisher_connstr = $node_publisher->connstr . ' dbname=postgres'; my $publisher_connstr = $node_publisher->connstr . ' dbname=postgres';
...@@ -187,3 +189,56 @@ $result = $node_subscriber->safe_psql('postgres', ...@@ -187,3 +189,56 @@ $result = $node_subscriber->safe_psql('postgres',
"SELECT count(*), min(a), max(a) FROM tab1"); "SELECT count(*), min(a), max(a) FROM tab1");
is($result, qq(0||), is($result, qq(0||),
'truncate replicated in synchronous logical replication'); 'truncate replicated in synchronous logical replication');
$node_publisher->safe_psql('postgres',
"ALTER SYSTEM RESET synchronous_standby_names");
$node_publisher->safe_psql('postgres', "SELECT pg_reload_conf()");
# test that truncate works for logical replication when there are multiple
# subscriptions for a single table
$node_publisher->safe_psql('postgres',
"CREATE TABLE tab5 (a int)");
$node_subscriber->safe_psql('postgres',
"CREATE TABLE tab5 (a int)");
$node_publisher->safe_psql('postgres',
"CREATE PUBLICATION pub5 FOR TABLE tab5");
$node_subscriber->safe_psql('postgres',
"CREATE SUBSCRIPTION sub5_1 CONNECTION '$publisher_connstr' PUBLICATION pub5"
);
$node_subscriber->safe_psql('postgres',
"CREATE SUBSCRIPTION sub5_2 CONNECTION '$publisher_connstr' PUBLICATION pub5"
);
# wait for initial data sync
$node_subscriber->poll_query_until('postgres', $synced_query)
or die "Timed out while waiting for subscriber to synchronize data";
# insert data to truncate
$node_publisher->safe_psql('postgres',
"INSERT INTO tab5 VALUES (1), (2), (3)");
$node_publisher->wait_for_catchup('sub5_1');
$node_publisher->wait_for_catchup('sub5_2');
$result = $node_subscriber->safe_psql('postgres',
"SELECT count(*), min(a), max(a) FROM tab5");
is($result, qq(6|1|3), 'insert replicated for multiple subscriptions');
$node_publisher->safe_psql('postgres', "TRUNCATE tab5");
$node_publisher->wait_for_catchup('sub5_1');
$node_publisher->wait_for_catchup('sub5_2');
$result = $node_subscriber->safe_psql('postgres',
"SELECT count(*), min(a), max(a) FROM tab5");
is($result, qq(0||),
'truncate replicated for multiple subscriptions');
# check deadlocks
$result = $node_subscriber->safe_psql('postgres',
"SELECT deadlocks FROM pg_stat_database WHERE datname='postgres'");
is($result, qq(0), 'no deadlocks detected');
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment