Commit bf9a60ee authored by Kevin Grittner's avatar Kevin Grittner

Fix interaction between CREATE INDEX and "snapshot too old".

Since indexes are created without valid LSNs, an index created
while a snapshot older than old_snapshot_threshold existed could
cause queries to return incorrect results when those old snapshots
were used, if any relevant rows had been subject to early pruning
before the index was built.  Prevent usage of a newly created index
until all such snapshots are released, for relations where this can
happen.

Questions about the interaction of "snapshot too old" with index
creation were initially raised by Andres Freund.

Reviewed by Robert Haas.
parent cae1c788
...@@ -2040,18 +2040,24 @@ index_build(Relation heapRelation, ...@@ -2040,18 +2040,24 @@ index_build(Relation heapRelation,
/* /*
* If we found any potentially broken HOT chains, mark the index as not * If we found any potentially broken HOT chains, mark the index as not
* being usable until the current transaction is below the event horizon. * being usable until the current transaction is below the event horizon.
* See src/backend/access/heap/README.HOT for discussion. * See src/backend/access/heap/README.HOT for discussion. Also set this
* if early pruning/vacuuming is enabled for the heap relation. While it
* might become safe to use the index earlier based on actual cleanup
* activity and other active transactions, the test for that would be much
* more complex and would require some form of blocking, so keep it simple
* and fast by just using the current transaction.
* *
* However, when reindexing an existing index, we should do nothing here. * However, when reindexing an existing index, we should do nothing here.
* Any HOT chains that are broken with respect to the index must predate * Any HOT chains that are broken with respect to the index must predate
* the index's original creation, so there is no need to change the * the index's original creation, so there is no need to change the
* index's usability horizon. Moreover, we *must not* try to change the * index's usability horizon. Moreover, we *must not* try to change the
* index's pg_index entry while reindexing pg_index itself, and this * index's pg_index entry while reindexing pg_index itself, and this
* optimization nicely prevents that. * optimization nicely prevents that. The more complex rules needed for a
* reindex are handled separately after this function returns.
* *
* We also need not set indcheckxmin during a concurrent index build, * We also need not set indcheckxmin during a concurrent index build,
* because we won't set indisvalid true until all transactions that care * because we won't set indisvalid true until all transactions that care
* about the broken HOT chains are gone. * about the broken HOT chains or early pruning/vacuuming are gone.
* *
* Therefore, this code path can only be taken during non-concurrent * Therefore, this code path can only be taken during non-concurrent
* CREATE INDEX. Thus the fact that heap_update will set the pg_index * CREATE INDEX. Thus the fact that heap_update will set the pg_index
...@@ -2060,7 +2066,8 @@ index_build(Relation heapRelation, ...@@ -2060,7 +2066,8 @@ index_build(Relation heapRelation,
* about any concurrent readers of the tuple; no other transaction can see * about any concurrent readers of the tuple; no other transaction can see
* it yet. * it yet.
*/ */
if (indexInfo->ii_BrokenHotChain && !isreindex && if ((indexInfo->ii_BrokenHotChain || EarlyPruningEnabled(heapRelation)) &&
!isreindex &&
!indexInfo->ii_Concurrent) !indexInfo->ii_Concurrent)
{ {
Oid indexId = RelationGetRelid(indexRelation); Oid indexId = RelationGetRelid(indexRelation);
...@@ -3389,6 +3396,11 @@ reindex_index(Oid indexId, bool skip_constraint_checks, char persistence, ...@@ -3389,6 +3396,11 @@ reindex_index(Oid indexId, bool skip_constraint_checks, char persistence,
* reindexing pg_index itself, we must not try to update tuples in it. * reindexing pg_index itself, we must not try to update tuples in it.
* pg_index's indexes should always have these flags in their clean state, * pg_index's indexes should always have these flags in their clean state,
* so that won't happen. * so that won't happen.
*
* If early pruning/vacuuming is enabled for the heap relation, the
* usability horizon must be advanced to the current transaction on every
* build or rebuild. pg_index is OK in this regard because catalog tables
* are not subject to early cleanup.
*/ */
if (!skipped_constraint) if (!skipped_constraint)
{ {
...@@ -3396,6 +3408,7 @@ reindex_index(Oid indexId, bool skip_constraint_checks, char persistence, ...@@ -3396,6 +3408,7 @@ reindex_index(Oid indexId, bool skip_constraint_checks, char persistence,
HeapTuple indexTuple; HeapTuple indexTuple;
Form_pg_index indexForm; Form_pg_index indexForm;
bool index_bad; bool index_bad;
bool early_vacuum_enabled = EarlyPruningEnabled(heapRelation);
pg_index = heap_open(IndexRelationId, RowExclusiveLock); pg_index = heap_open(IndexRelationId, RowExclusiveLock);
...@@ -3409,11 +3422,12 @@ reindex_index(Oid indexId, bool skip_constraint_checks, char persistence, ...@@ -3409,11 +3422,12 @@ reindex_index(Oid indexId, bool skip_constraint_checks, char persistence,
!indexForm->indisready || !indexForm->indisready ||
!indexForm->indislive); !indexForm->indislive);
if (index_bad || if (index_bad ||
(indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain)) (indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain) ||
early_vacuum_enabled)
{ {
if (!indexInfo->ii_BrokenHotChain) if (!indexInfo->ii_BrokenHotChain && !early_vacuum_enabled)
indexForm->indcheckxmin = false; indexForm->indcheckxmin = false;
else if (index_bad) else if (index_bad || early_vacuum_enabled)
indexForm->indcheckxmin = true; indexForm->indcheckxmin = true;
indexForm->indisvalid = true; indexForm->indisvalid = true;
indexForm->indisready = true; indexForm->indisready = true;
......
...@@ -4290,8 +4290,7 @@ IssuePendingWritebacks(WritebackContext *context) ...@@ -4290,8 +4290,7 @@ IssuePendingWritebacks(WritebackContext *context)
void void
TestForOldSnapshot_impl(Snapshot snapshot, Relation relation) TestForOldSnapshot_impl(Snapshot snapshot, Relation relation)
{ {
if (!IsCatalogRelation(relation) if (RelationAllowsEarlyPruning(relation)
&& !RelationIsAccessibleInLogicalDecoding(relation)
&& (snapshot)->whenTaken < GetOldSnapshotThresholdTimestamp()) && (snapshot)->whenTaken < GetOldSnapshotThresholdTimestamp())
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_SNAPSHOT_TOO_OLD), (errcode(ERRCODE_SNAPSHOT_TOO_OLD),
......
...@@ -1596,10 +1596,7 @@ TransactionIdLimitedForOldSnapshots(TransactionId recentXmin, ...@@ -1596,10 +1596,7 @@ TransactionIdLimitedForOldSnapshots(TransactionId recentXmin,
{ {
if (TransactionIdIsNormal(recentXmin) if (TransactionIdIsNormal(recentXmin)
&& old_snapshot_threshold >= 0 && old_snapshot_threshold >= 0
&& RelationNeedsWAL(relation) && RelationAllowsEarlyPruning(relation))
&& !IsCatalogRelation(relation)
&& !RelationIsAccessibleInLogicalDecoding(relation)
&& !RelationHasUnloggedIndex(relation))
{ {
int64 ts = GetSnapshotCurrentTimestamp(); int64 ts = GetSnapshotCurrentTimestamp();
TransactionId xlimit = recentXmin; TransactionId xlimit = recentXmin;
......
...@@ -31,6 +31,19 @@ ...@@ -31,6 +31,19 @@
#define OLD_SNAPSHOT_PADDING_ENTRIES 10 #define OLD_SNAPSHOT_PADDING_ENTRIES 10
#define OLD_SNAPSHOT_TIME_MAP_ENTRIES (old_snapshot_threshold + OLD_SNAPSHOT_PADDING_ENTRIES) #define OLD_SNAPSHOT_TIME_MAP_ENTRIES (old_snapshot_threshold + OLD_SNAPSHOT_PADDING_ENTRIES)
/*
* Common definition of relation properties that allow early pruning/vacuuming
* when old_snapshot_threshold >= 0.
*/
#define RelationAllowsEarlyPruning(rel) \
( \
RelationNeedsWAL(rel) \
&& !IsCatalogRelation(rel) \
&& !RelationIsAccessibleInLogicalDecoding(rel) \
&& !RelationHasUnloggedIndex(rel) \
)
#define EarlyPruningEnabled(rel) (old_snapshot_threshold >= 0 && RelationAllowsEarlyPruning(rel))
/* GUC variables */ /* GUC variables */
extern PGDLLIMPORT int old_snapshot_threshold; extern PGDLLIMPORT int old_snapshot_threshold;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment