Commit a5736bf7 authored by Alvaro Herrera's avatar Alvaro Herrera

Fix traversal of half-frozen update chains

When some tuple versions in an update chain are frozen due to them being
older than freeze_min_age, the xmax/xmin trail can become broken.  This
breaks HOT (and probably other things).  A subsequent VACUUM can break
things in more serious ways, such as leaving orphan heap-only tuples
whose root HOT redirect items were removed.  This can be seen because
index creation (or REINDEX) complain like
  ERROR:  XX000: failed to find parent tuple for heap-only tuple at (0,7) in table "t"

Because of relfrozenxid contraints, we cannot avoid the freezing of the
early tuples, so we must cope with the results: whenever we see an Xmin
of FrozenTransactionId, consider it a match for whatever the previous
Xmax value was.

This problem seems to have appeared in 9.3 with multixact changes,
though strictly speaking it seems unrelated.

Since 9.4 we have commit 37484ad2 "Change the way we mark tuples as
frozen", so the fix is simple: just compare the raw Xmin (still stored
in the tuple header, since freezing merely set an infomask bit) to the
Xmax.  But in 9.3 we rewrite the Xmin value to FrozenTransactionId, so
the original value is lost and we have nothing to compare the Xmax with.
To cope with that case we need to compare the Xmin with FrozenXid,
assume it's a match, and hope for the best.  Sadly, since you can
pg_upgrade a 9.3 instance containing half-frozen pages to newer
releases, we need to keep the old check in newer versions too, which
seems a bit brittle; I hope we can somehow get rid of that.

I didn't optimize the new function for performance.  The new coding is
probably a bit slower than before, since there is a function call rather
than a straight comparison, but I'd rather have it work correctly than
be fast but wrong.

This is a followup after 20b65522 fixed a few related problems.
Apparently, in 9.6 and up there are more ways to get into trouble, but
in 9.3 - 9.5 I cannot reproduce a problem anymore with this patch, so
there must be a separate bug.

Reported-by: Peter Geoghegan
Diagnosed-by: Peter Geoghegan, Michael Paquier, Daniel Wood,
	Yi Wen Wong, Álvaro
Discussion: https://postgr.es/m/CAH2-Wznm4rCrhFAiwKPWTpEw2bXDtgROZK7jWWGucXeH3D1fmA@mail.gmail.com
parent f49842d1
...@@ -2074,8 +2074,7 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer, ...@@ -2074,8 +2074,7 @@ heap_hot_search_buffer(ItemPointer tid, Relation relation, Buffer buffer,
* broken. * broken.
*/ */
if (TransactionIdIsValid(prev_xmax) && if (TransactionIdIsValid(prev_xmax) &&
!TransactionIdEquals(prev_xmax, !HeapTupleUpdateXmaxMatchesXmin(prev_xmax, heapTuple->t_data))
HeapTupleHeaderGetXmin(heapTuple->t_data)))
break; break;
/* /*
...@@ -2261,7 +2260,7 @@ heap_get_latest_tid(Relation relation, ...@@ -2261,7 +2260,7 @@ heap_get_latest_tid(Relation relation,
* tuple. Check for XMIN match. * tuple. Check for XMIN match.
*/ */
if (TransactionIdIsValid(priorXmax) && if (TransactionIdIsValid(priorXmax) &&
!TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(tp.t_data))) !HeapTupleUpdateXmaxMatchesXmin(priorXmax, tp.t_data))
{ {
UnlockReleaseBuffer(buffer); UnlockReleaseBuffer(buffer);
break; break;
...@@ -2293,6 +2292,50 @@ heap_get_latest_tid(Relation relation, ...@@ -2293,6 +2292,50 @@ heap_get_latest_tid(Relation relation,
} /* end of loop */ } /* end of loop */
} }
/*
* HeapTupleUpdateXmaxMatchesXmin - verify update chain xmax/xmin lineage
*
* Given the new version of a tuple after some update, verify whether the
* given Xmax (corresponding to the previous version) matches the tuple's
* Xmin, taking into account that the Xmin might have been frozen after the
* update.
*/
bool
HeapTupleUpdateXmaxMatchesXmin(TransactionId xmax, HeapTupleHeader htup)
{
TransactionId xmin = HeapTupleHeaderGetXmin(htup);
/*
* If the xmax of the old tuple is identical to the xmin of the new one,
* it's a match.
*/
if (TransactionIdEquals(xmax, xmin))
return true;
/*
* If the Xmin that was in effect prior to a freeze matches the Xmax,
* it's good too.
*/
if (HeapTupleHeaderXminFrozen(htup) &&
TransactionIdEquals(HeapTupleHeaderGetRawXmin(htup), xmax))
return true;
/*
* When a tuple is frozen, the original Xmin is lost, but we know it's a
* committed transaction. So unless the Xmax is InvalidXid, we don't know
* for certain that there is a match, but there may be one; and we must
* return true so that a HOT chain that is half-frozen can be walked
* correctly.
*
* We no longer freeze tuples this way, but we must keep this in order to
* interpret pre-pg_upgrade pages correctly.
*/
if (TransactionIdEquals(xmin, FrozenTransactionId) &&
TransactionIdIsValid(xmax))
return true;
return false;
}
/* /*
* UpdateXmaxHintBits - update tuple hint bits after xmax transaction ends * UpdateXmaxHintBits - update tuple hint bits after xmax transaction ends
...@@ -5712,8 +5755,7 @@ l4: ...@@ -5712,8 +5755,7 @@ l4:
* end of the chain, we're done, so return success. * end of the chain, we're done, so return success.
*/ */
if (TransactionIdIsValid(priorXmax) && if (TransactionIdIsValid(priorXmax) &&
!TransactionIdEquals(HeapTupleHeaderGetXmin(mytup.t_data), !HeapTupleUpdateXmaxMatchesXmin(priorXmax, mytup.t_data))
priorXmax))
{ {
result = HeapTupleMayBeUpdated; result = HeapTupleMayBeUpdated;
goto out_locked; goto out_locked;
......
...@@ -473,7 +473,7 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum, ...@@ -473,7 +473,7 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
* Check the tuple XMIN against prior XMAX, if any * Check the tuple XMIN against prior XMAX, if any
*/ */
if (TransactionIdIsValid(priorXmax) && if (TransactionIdIsValid(priorXmax) &&
!TransactionIdEquals(HeapTupleHeaderGetXmin(htup), priorXmax)) !HeapTupleUpdateXmaxMatchesXmin(priorXmax, htup))
break; break;
/* /*
...@@ -813,7 +813,7 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets) ...@@ -813,7 +813,7 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
htup = (HeapTupleHeader) PageGetItem(page, lp); htup = (HeapTupleHeader) PageGetItem(page, lp);
if (TransactionIdIsValid(priorXmax) && if (TransactionIdIsValid(priorXmax) &&
!TransactionIdEquals(priorXmax, HeapTupleHeaderGetXmin(htup))) !HeapTupleUpdateXmaxMatchesXmin(priorXmax, htup))
break; break;
/* Remember the root line pointer for this item */ /* Remember the root line pointer for this item */
......
...@@ -2594,8 +2594,7 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode, ...@@ -2594,8 +2594,7 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
* atomic, and Xmin never changes in an existing tuple, except to * atomic, and Xmin never changes in an existing tuple, except to
* invalid or frozen, and neither of those can match priorXmax.) * invalid or frozen, and neither of those can match priorXmax.)
*/ */
if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data), if (!HeapTupleUpdateXmaxMatchesXmin(priorXmax, tuple.t_data))
priorXmax))
{ {
ReleaseBuffer(buffer); ReleaseBuffer(buffer);
return NULL; return NULL;
...@@ -2742,8 +2741,7 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode, ...@@ -2742,8 +2741,7 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode,
/* /*
* As above, if xmin isn't what we're expecting, do nothing. * As above, if xmin isn't what we're expecting, do nothing.
*/ */
if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data), if (!HeapTupleUpdateXmaxMatchesXmin(priorXmax, tuple.t_data))
priorXmax))
{ {
ReleaseBuffer(buffer); ReleaseBuffer(buffer);
return NULL; return NULL;
......
...@@ -146,6 +146,9 @@ extern void heap_get_latest_tid(Relation relation, Snapshot snapshot, ...@@ -146,6 +146,9 @@ extern void heap_get_latest_tid(Relation relation, Snapshot snapshot,
ItemPointer tid); ItemPointer tid);
extern void setLastTid(const ItemPointer tid); extern void setLastTid(const ItemPointer tid);
extern bool HeapTupleUpdateXmaxMatchesXmin(TransactionId xmax,
HeapTupleHeader htup);
extern BulkInsertState GetBulkInsertState(void); extern BulkInsertState GetBulkInsertState(void);
extern void FreeBulkInsertState(BulkInsertState); extern void FreeBulkInsertState(BulkInsertState);
extern void ReleaseBulkInsertStatePin(BulkInsertState bistate); extern void ReleaseBulkInsertStatePin(BulkInsertState bistate);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment