Commit 1ddc2703 authored by Tom Lane's avatar Tom Lane

Work around deadlock problems with VACUUM FULL/CLUSTER on system catalogs,

as per my recent proposal.

First, teach IndexBuildHeapScan to not wait for INSERT_IN_PROGRESS or
DELETE_IN_PROGRESS tuples to commit unless the index build is checking
uniqueness/exclusion constraints.  If it isn't, there's no harm in just
indexing the in-doubt tuple.

Second, modify VACUUM FULL/CLUSTER to suppress reverifying
uniqueness/exclusion constraint properties while rebuilding indexes of
the target relation.  This is reasonable because these commands aren't
meant to deal with corrupted-data situations.  Constraint properties
will still be rechecked when an index is rebuilt by a REINDEX command.

This gets us out of the problem that new-style VACUUM FULL would often
wait for other transactions while holding exclusive lock on a system
catalog, leading to probable deadlock because those other transactions
need to look at the catalogs too.  Although the real ultimate cause of
the problem is a debatable choice to release locks early after modifying
system catalogs, changing that choice would require pretty serious
analysis and is not something to be undertaken lightly or on a tight
schedule.  The present patch fixes the problem in a fairly reasonable
way and should also improve the speed of VACUUM FULL/CLUSTER a little bit.
parent 1c05b0b4
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.333 2010/02/07 20:48:09 tgl Exp $ * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.334 2010/02/07 22:40:33 tgl Exp $
* *
* *
* INTERFACE ROUTINES * INTERFACE ROUTINES
...@@ -1541,6 +1541,8 @@ IndexBuildHeapScan(Relation heapRelation, ...@@ -1541,6 +1541,8 @@ IndexBuildHeapScan(Relation heapRelation,
IndexBuildCallback callback, IndexBuildCallback callback,
void *callback_state) void *callback_state)
{ {
bool is_system_catalog;
bool checking_uniqueness;
HeapScanDesc scan; HeapScanDesc scan;
HeapTuple heapTuple; HeapTuple heapTuple;
Datum values[INDEX_MAX_KEYS]; Datum values[INDEX_MAX_KEYS];
...@@ -1560,6 +1562,13 @@ IndexBuildHeapScan(Relation heapRelation, ...@@ -1560,6 +1562,13 @@ IndexBuildHeapScan(Relation heapRelation,
*/ */
Assert(OidIsValid(indexRelation->rd_rel->relam)); Assert(OidIsValid(indexRelation->rd_rel->relam));
/* Remember if it's a system catalog */
is_system_catalog = IsSystemRelation(heapRelation);
/* See whether we're verifying uniqueness/exclusion properties */
checking_uniqueness = (indexInfo->ii_Unique ||
indexInfo->ii_ExclusionOps != NULL);
/* /*
* Need an EState for evaluation of index expressions and partial-index * Need an EState for evaluation of index expressions and partial-index
* predicates. Also a slot to hold the current tuple. * predicates. Also a slot to hold the current tuple.
...@@ -1652,6 +1661,7 @@ IndexBuildHeapScan(Relation heapRelation, ...@@ -1652,6 +1661,7 @@ IndexBuildHeapScan(Relation heapRelation,
{ {
/* do our own time qual check */ /* do our own time qual check */
bool indexIt; bool indexIt;
TransactionId xwait;
recheck: recheck:
...@@ -1710,29 +1720,31 @@ IndexBuildHeapScan(Relation heapRelation, ...@@ -1710,29 +1720,31 @@ IndexBuildHeapScan(Relation heapRelation,
case HEAPTUPLE_INSERT_IN_PROGRESS: case HEAPTUPLE_INSERT_IN_PROGRESS:
/* /*
* Since caller should hold ShareLock or better, we should * Since caller should hold ShareLock or better, normally
* not see any tuples inserted by open transactions --- * the only way to see this is if it was inserted earlier
* unless it's our own transaction. (Consider INSERT * in our own transaction. However, it can happen in
* followed by CREATE INDEX within a transaction.) An * system catalogs, since we tend to release write lock
* exception occurs when reindexing a system catalog, * before commit there. Give a warning if neither case
* because we often release lock on system catalogs before * applies.
* committing. In that case we wait for the inserting
* transaction to finish and check again. (We could do
* that on user tables too, but since the case is not
* expected it seems better to throw an error.)
*/ */
if (!TransactionIdIsCurrentTransactionId( xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
HeapTupleHeaderGetXmin(heapTuple->t_data))) if (!TransactionIdIsCurrentTransactionId(xwait))
{ {
if (!IsSystemRelation(heapRelation)) if (!is_system_catalog)
elog(ERROR, "concurrent insert in progress"); elog(WARNING, "concurrent insert in progress within table \"%s\"",
else RelationGetRelationName(heapRelation));
/*
* If we are performing uniqueness checks, indexing
* such a tuple could lead to a bogus uniqueness
* failure. In that case we wait for the inserting
* transaction to finish and check again.
*/
if (checking_uniqueness)
{ {
/* /*
* Must drop the lock on the buffer before we wait * Must drop the lock on the buffer before we wait
*/ */
TransactionId xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
XactLockTableWait(xwait); XactLockTableWait(xwait);
goto recheck; goto recheck;
...@@ -1749,30 +1761,27 @@ IndexBuildHeapScan(Relation heapRelation, ...@@ -1749,30 +1761,27 @@ IndexBuildHeapScan(Relation heapRelation,
case HEAPTUPLE_DELETE_IN_PROGRESS: case HEAPTUPLE_DELETE_IN_PROGRESS:
/* /*
* Since caller should hold ShareLock or better, we should * Similar situation to INSERT_IN_PROGRESS case.
* not see any tuples deleted by open transactions ---
* unless it's our own transaction. (Consider DELETE
* followed by CREATE INDEX within a transaction.) An
* exception occurs when reindexing a system catalog,
* because we often release lock on system catalogs before
* committing. In that case we wait for the deleting
* transaction to finish and check again. (We could do
* that on user tables too, but since the case is not
* expected it seems better to throw an error.)
*/ */
Assert(!(heapTuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI)); Assert(!(heapTuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI));
if (!TransactionIdIsCurrentTransactionId( xwait = HeapTupleHeaderGetXmax(heapTuple->t_data);
HeapTupleHeaderGetXmax(heapTuple->t_data))) if (!TransactionIdIsCurrentTransactionId(xwait))
{ {
if (!IsSystemRelation(heapRelation)) if (!is_system_catalog)
elog(ERROR, "concurrent delete in progress"); elog(WARNING, "concurrent delete in progress within table \"%s\"",
else RelationGetRelationName(heapRelation));
/*
* If we are performing uniqueness checks, assuming
* the tuple is dead could lead to missing a uniqueness
* violation. In that case we wait for the deleting
* transaction to finish and check again.
*/
if (checking_uniqueness)
{ {
/* /*
* Must drop the lock on the buffer before we wait * Must drop the lock on the buffer before we wait
*/ */
TransactionId xwait = HeapTupleHeaderGetXmax(heapTuple->t_data);
LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
XactLockTableWait(xwait); XactLockTableWait(xwait);
goto recheck; goto recheck;
...@@ -2402,7 +2411,7 @@ IndexGetRelation(Oid indexId) ...@@ -2402,7 +2411,7 @@ IndexGetRelation(Oid indexId)
* reindex_index - This routine is used to recreate a single index * reindex_index - This routine is used to recreate a single index
*/ */
void void
reindex_index(Oid indexId) reindex_index(Oid indexId, bool skip_constraint_checks)
{ {
Relation iRel, Relation iRel,
heapRelation, heapRelation,
...@@ -2411,6 +2420,7 @@ reindex_index(Oid indexId) ...@@ -2411,6 +2420,7 @@ reindex_index(Oid indexId)
IndexInfo *indexInfo; IndexInfo *indexInfo;
HeapTuple indexTuple; HeapTuple indexTuple;
Form_pg_index indexForm; Form_pg_index indexForm;
volatile bool skipped_constraint = false;
/* /*
* Open and lock the parent heap relation. ShareLock is sufficient since * Open and lock the parent heap relation. ShareLock is sufficient since
...@@ -2448,6 +2458,17 @@ reindex_index(Oid indexId) ...@@ -2448,6 +2458,17 @@ reindex_index(Oid indexId)
/* Fetch info needed for index_build */ /* Fetch info needed for index_build */
indexInfo = BuildIndexInfo(iRel); indexInfo = BuildIndexInfo(iRel);
/* If requested, skip checking uniqueness/exclusion constraints */
if (skip_constraint_checks)
{
if (indexInfo->ii_Unique || indexInfo->ii_ExclusionOps != NULL)
skipped_constraint = true;
indexInfo->ii_Unique = false;
indexInfo->ii_ExclusionOps = NULL;
indexInfo->ii_ExclusionProcs = NULL;
indexInfo->ii_ExclusionStrats = NULL;
}
/* We'll build a new physical relation for the index */ /* We'll build a new physical relation for the index */
RelationSetNewRelfilenode(iRel, InvalidTransactionId); RelationSetNewRelfilenode(iRel, InvalidTransactionId);
...@@ -2466,13 +2487,16 @@ reindex_index(Oid indexId) ...@@ -2466,13 +2487,16 @@ reindex_index(Oid indexId)
/* /*
* If the index is marked invalid or not ready (ie, it's from a failed * If the index is marked invalid or not ready (ie, it's from a failed
* CREATE INDEX CONCURRENTLY), we can now mark it valid. This allows * CREATE INDEX CONCURRENTLY), and we didn't skip a uniqueness check,
* REINDEX to be used to clean up in such cases. * we can now mark it valid. This allows REINDEX to be used to clean up
* in such cases.
* *
* We can also reset indcheckxmin, because we have now done a * We can also reset indcheckxmin, because we have now done a
* non-concurrent index build, *except* in the case where index_build * non-concurrent index build, *except* in the case where index_build
* found some still-broken HOT chains. * found some still-broken HOT chains.
*/ */
if (!skipped_constraint)
{
pg_index = heap_open(IndexRelationId, RowExclusiveLock); pg_index = heap_open(IndexRelationId, RowExclusiveLock);
indexTuple = SearchSysCacheCopy(INDEXRELID, indexTuple = SearchSysCacheCopy(INDEXRELID,
...@@ -2492,7 +2516,9 @@ reindex_index(Oid indexId) ...@@ -2492,7 +2516,9 @@ reindex_index(Oid indexId)
simple_heap_update(pg_index, &indexTuple->t_self, indexTuple); simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
CatalogUpdateIndexes(pg_index, indexTuple); CatalogUpdateIndexes(pg_index, indexTuple);
} }
heap_close(pg_index, RowExclusiveLock); heap_close(pg_index, RowExclusiveLock);
}
/* Close rels, but keep locks */ /* Close rels, but keep locks */
index_close(iRel, NoLock); index_close(iRel, NoLock);
...@@ -2513,6 +2539,11 @@ reindex_index(Oid indexId) ...@@ -2513,6 +2539,11 @@ reindex_index(Oid indexId)
* do CCI after having collected the index list. (This way we can still use * do CCI after having collected the index list. (This way we can still use
* catalog indexes while collecting the list.) * catalog indexes while collecting the list.)
* *
* We also skip rechecking uniqueness/exclusion constraint properties if
* heap_rebuilt is true. This avoids likely deadlock conditions when doing
* VACUUM FULL or CLUSTER on system catalogs. REINDEX should be used to
* rebuild an index if constraint inconsistency is suspected.
*
* Returns true if any indexes were rebuilt. Note that a * Returns true if any indexes were rebuilt. Note that a
* CommandCounterIncrement will occur after each index rebuild. * CommandCounterIncrement will occur after each index rebuild.
*/ */
...@@ -2594,7 +2625,7 @@ reindex_relation(Oid relid, bool toast_too, bool heap_rebuilt) ...@@ -2594,7 +2625,7 @@ reindex_relation(Oid relid, bool toast_too, bool heap_rebuilt)
if (is_pg_class) if (is_pg_class)
RelationSetIndexList(rel, doneIndexes, InvalidOid); RelationSetIndexList(rel, doneIndexes, InvalidOid);
reindex_index(indexOid); reindex_index(indexOid, heap_rebuilt);
CommandCounterIncrement(); CommandCounterIncrement();
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/cluster.c,v 1.198 2010/02/07 20:48:10 tgl Exp $ * $PostgreSQL: pgsql/src/backend/commands/cluster.c,v 1.199 2010/02/07 22:40:33 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -948,7 +948,6 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, ...@@ -948,7 +948,6 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
*/ */
if (!is_system_catalog && if (!is_system_catalog &&
!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data))) !TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple->t_data)))
elog(WARNING, "concurrent insert in progress within table \"%s\"", elog(WARNING, "concurrent insert in progress within table \"%s\"",
RelationGetRelationName(OldHeap)); RelationGetRelationName(OldHeap));
/* treat as live */ /* treat as live */
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/indexcmds.c,v 1.191 2010/02/07 20:48:10 tgl Exp $ * $PostgreSQL: pgsql/src/backend/commands/indexcmds.c,v 1.192 2010/02/07 22:40:33 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -1588,7 +1588,7 @@ ReindexIndex(RangeVar *indexRelation) ...@@ -1588,7 +1588,7 @@ ReindexIndex(RangeVar *indexRelation)
ReleaseSysCache(tuple); ReleaseSysCache(tuple);
reindex_index(indOid); reindex_index(indOid, false);
} }
/* /*
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/catalog/index.h,v 1.82 2010/02/07 20:48:11 tgl Exp $ * $PostgreSQL: pgsql/src/include/catalog/index.h,v 1.83 2010/02/07 22:40:33 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -70,7 +70,7 @@ extern double IndexBuildHeapScan(Relation heapRelation, ...@@ -70,7 +70,7 @@ extern double IndexBuildHeapScan(Relation heapRelation,
extern void validate_index(Oid heapId, Oid indexId, Snapshot snapshot); extern void validate_index(Oid heapId, Oid indexId, Snapshot snapshot);
extern void reindex_index(Oid indexId); extern void reindex_index(Oid indexId, bool skip_constraint_checks);
extern bool reindex_relation(Oid relid, bool toast_too, bool heap_rebuilt); extern bool reindex_relation(Oid relid, bool toast_too, bool heap_rebuilt);
extern bool ReindexIsProcessingHeap(Oid heapOid); extern bool ReindexIsProcessingHeap(Oid heapOid);
......
# ---------- # ----------
# $PostgreSQL: pgsql/src/test/regress/parallel_schedule,v 1.59 2010/02/07 20:48:13 tgl Exp $ # $PostgreSQL: pgsql/src/test/regress/parallel_schedule,v 1.60 2010/02/07 22:40:33 tgl Exp $
# #
# By convention, we put no more than twenty tests in any one parallel group; # By convention, we put no more than twenty tests in any one parallel group;
# this limits the number of connections needed to run the tests. # this limits the number of connections needed to run the tests.
...@@ -52,10 +52,7 @@ test: copy copyselect ...@@ -52,10 +52,7 @@ test: copy copyselect
# ---------- # ----------
# Another group of parallel tests # Another group of parallel tests
# ---------- # ----------
test: constraints triggers create_misc create_aggregate create_operator inherit typed_table drop_if_exists create_cast test: constraints triggers create_misc create_aggregate create_operator inherit typed_table vacuum drop_if_exists create_cast
# XXX temporarily run this by itself
test: vacuum
# Depends on the above # Depends on the above
test: create_index create_view test: create_index create_view
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment