Commit 258cef12 authored by Robert Haas's avatar Robert Haas

Fix possibile deadlock when dropping partitions.

heap_drop_with_catalog and RangeVarCallbackForDropRelation should
lock the parent before locking the target relation.

Amit Langote

Discussion: http://postgr.es/m/29588799-a8ce-b0a2-3dae-f39ff6d35922@lab.ntt.co.jp
parent feffa0e0
...@@ -1759,29 +1759,33 @@ void ...@@ -1759,29 +1759,33 @@ void
heap_drop_with_catalog(Oid relid) heap_drop_with_catalog(Oid relid)
{ {
Relation rel; Relation rel;
HeapTuple tuple;
Oid parentOid; Oid parentOid;
Relation parent = NULL; Relation parent = NULL;
/* /*
* Open and lock the relation. * To drop a partition safely, we must grab exclusive lock on its parent,
* because another backend might be about to execute a query on the parent
* table. If it relies on previously cached partition descriptor, then
* it could attempt to access the just-dropped relation as its partition.
* We must therefore take a table lock strong enough to prevent all
* queries on the table from proceeding until we commit and send out a
* shared-cache-inval notice that will make them update their index lists.
*/ */
rel = relation_open(relid, AccessExclusiveLock); tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
if (((Form_pg_class) GETSTRUCT(tuple))->relispartition)
/*
* If the relation is a partition, we must grab exclusive lock on its
* parent because we need to update its partition descriptor. We must take
* a table lock strong enough to prevent all queries on the parent from
* proceeding until we commit and send out a shared-cache-inval notice
* that will make them update their partition descriptor. Sometimes, doing
* this is cycles spent uselessly, especially if the parent will be
* dropped as part of the same command anyway.
*/
if (rel->rd_rel->relispartition)
{ {
parentOid = get_partition_parent(relid); parentOid = get_partition_parent(relid);
parent = heap_open(parentOid, AccessExclusiveLock); parent = heap_open(parentOid, AccessExclusiveLock);
} }
ReleaseSysCache(tuple);
/*
* Open and lock the relation.
*/
rel = relation_open(relid, AccessExclusiveLock);
/* /*
* There can no longer be anyone *else* touching the relation, but we * There can no longer be anyone *else* touching the relation, but we
* might still have open queries or cursors, or pending trigger events, in * might still have open queries or cursors, or pending trigger events, in
......
...@@ -271,6 +271,7 @@ struct DropRelationCallbackState ...@@ -271,6 +271,7 @@ struct DropRelationCallbackState
{ {
char relkind; char relkind;
Oid heapOid; Oid heapOid;
Oid partParentOid;
bool concurrent; bool concurrent;
}; };
...@@ -1049,6 +1050,7 @@ RemoveRelations(DropStmt *drop) ...@@ -1049,6 +1050,7 @@ RemoveRelations(DropStmt *drop)
/* Look up the appropriate relation using namespace search. */ /* Look up the appropriate relation using namespace search. */
state.relkind = relkind; state.relkind = relkind;
state.heapOid = InvalidOid; state.heapOid = InvalidOid;
state.partParentOid = InvalidOid;
state.concurrent = drop->concurrent; state.concurrent = drop->concurrent;
relOid = RangeVarGetRelidExtended(rel, lockmode, true, relOid = RangeVarGetRelidExtended(rel, lockmode, true,
false, false,
...@@ -1078,6 +1080,8 @@ RemoveRelations(DropStmt *drop) ...@@ -1078,6 +1080,8 @@ RemoveRelations(DropStmt *drop)
/* /*
* Before acquiring a table lock, check whether we have sufficient rights. * Before acquiring a table lock, check whether we have sufficient rights.
* In the case of DROP INDEX, also try to lock the table before the index. * In the case of DROP INDEX, also try to lock the table before the index.
* Also, if the table to be dropped is a partition, we try to lock the parent
* first.
*/ */
static void static void
RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
...@@ -1087,6 +1091,7 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, ...@@ -1087,6 +1091,7 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
struct DropRelationCallbackState *state; struct DropRelationCallbackState *state;
char relkind; char relkind;
char expected_relkind; char expected_relkind;
bool is_partition;
Form_pg_class classform; Form_pg_class classform;
LOCKMODE heap_lockmode; LOCKMODE heap_lockmode;
...@@ -1106,6 +1111,17 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, ...@@ -1106,6 +1111,17 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
state->heapOid = InvalidOid; state->heapOid = InvalidOid;
} }
/*
* Similarly, if we previously locked some other partition's heap, and
* the name we're looking up no longer refers to that relation, release
* the now-useless lock.
*/
if (relOid != oldRelOid && OidIsValid(state->partParentOid))
{
UnlockRelationOid(state->partParentOid, AccessExclusiveLock);
state->partParentOid = InvalidOid;
}
/* Didn't find a relation, so no need for locking or permission checks. */ /* Didn't find a relation, so no need for locking or permission checks. */
if (!OidIsValid(relOid)) if (!OidIsValid(relOid))
return; return;
...@@ -1114,6 +1130,7 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, ...@@ -1114,6 +1130,7 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
if (!HeapTupleIsValid(tuple)) if (!HeapTupleIsValid(tuple))
return; /* concurrently dropped, so nothing to do */ return; /* concurrently dropped, so nothing to do */
classform = (Form_pg_class) GETSTRUCT(tuple); classform = (Form_pg_class) GETSTRUCT(tuple);
is_partition = classform->relispartition;
/* /*
* Both RELKIND_RELATION and RELKIND_PARTITIONED_TABLE are OBJECT_TABLE, * Both RELKIND_RELATION and RELKIND_PARTITIONED_TABLE are OBJECT_TABLE,
...@@ -1157,6 +1174,19 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid, ...@@ -1157,6 +1174,19 @@ RangeVarCallbackForDropRelation(const RangeVar *rel, Oid relOid, Oid oldRelOid,
if (OidIsValid(state->heapOid)) if (OidIsValid(state->heapOid))
LockRelationOid(state->heapOid, heap_lockmode); LockRelationOid(state->heapOid, heap_lockmode);
} }
/*
* Similarly, if the relation is a partition, we must acquire lock on its
* parent before locking the partition. That's because queries lock the
* parent before its partitions, so we risk deadlock it we do it the other
* way around.
*/
if (is_partition && relOid != oldRelOid)
{
state->partParentOid = get_partition_parent(relOid);
if (OidIsValid(state->partParentOid))
LockRelationOid(state->partParentOid, AccessExclusiveLock);
}
} }
/* /*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment