Commit f57e3f4c authored by Tom Lane's avatar Tom Lane

Repair problems with VACUUM destroying t_ctid chains too soon, and with

insufficient paranoia in code that follows t_ctid links.  (We must do both
because even with VACUUM doing it properly, the intermediate state with
a dangling t_ctid link is visible concurrently during lazy VACUUM, and
could be seen afterwards if either type of VACUUM crashes partway through.)
Also try to improve documentation about what's going on.  Patch is a bit
bulky because passing the XMAX information around required changing the
APIs of some low-level heapam.c routines, but it's not conceptually very
complicated.  Per trouble report from Teodor and subsequent analysis.
This needs to be back-patched, but I'll do that after 8.1 beta is out.
parent 97bb6e89
This diff is collapsed.
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/async.c,v 1.123 2005/06/17 22:32:43 tgl Exp $ * $PostgreSQL: pgsql/src/backend/commands/async.c,v 1.124 2005/08/20 00:39:53 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -550,8 +550,9 @@ AtCommit_Notify(void) ...@@ -550,8 +550,9 @@ AtCommit_Notify(void)
} }
else if (listener->notification == 0) else if (listener->notification == 0)
{ {
ItemPointerData ctid;
HTSU_Result result; HTSU_Result result;
ItemPointerData update_ctid;
TransactionId update_xmax;
rTuple = heap_modifytuple(lTuple, tdesc, rTuple = heap_modifytuple(lTuple, tdesc,
value, nulls, repl); value, nulls, repl);
...@@ -573,7 +574,7 @@ AtCommit_Notify(void) ...@@ -573,7 +574,7 @@ AtCommit_Notify(void)
* heap_update calls. * heap_update calls.
*/ */
result = heap_update(lRel, &lTuple->t_self, rTuple, result = heap_update(lRel, &lTuple->t_self, rTuple,
&ctid, &update_ctid, &update_xmax,
GetCurrentCommandId(), InvalidSnapshot, GetCurrentCommandId(), InvalidSnapshot,
false /* no wait for commit */ ); false /* no wait for commit */ );
switch (result) switch (result)
...@@ -585,7 +586,6 @@ AtCommit_Notify(void) ...@@ -585,7 +586,6 @@ AtCommit_Notify(void)
case HeapTupleMayBeUpdated: case HeapTupleMayBeUpdated:
/* done successfully */ /* done successfully */
#ifdef NOT_USED /* currently there are no indexes */ #ifdef NOT_USED /* currently there are no indexes */
CatalogUpdateIndexes(lRel, rTuple); CatalogUpdateIndexes(lRel, rTuple);
#endif #endif
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.191 2005/08/12 01:35:57 tgl Exp $ * $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.192 2005/08/20 00:39:54 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -1592,14 +1592,18 @@ GetTupleForTrigger(EState *estate, ResultRelInfo *relinfo, ...@@ -1592,14 +1592,18 @@ GetTupleForTrigger(EState *estate, ResultRelInfo *relinfo,
if (newSlot != NULL) if (newSlot != NULL)
{ {
HTSU_Result test; HTSU_Result test;
ItemPointerData update_ctid;
TransactionId update_xmax;
*newSlot = NULL;
/* /*
* lock tuple for update * lock tuple for update
*/ */
*newSlot = NULL;
tuple.t_self = *tid;
ltrmark:; ltrmark:;
test = heap_lock_tuple(relation, &tuple, &buffer, cid, tuple.t_self = *tid;
test = heap_lock_tuple(relation, &tuple, &buffer,
&update_ctid, &update_xmax, cid,
LockTupleExclusive, false); LockTupleExclusive, false);
switch (test) switch (test)
{ {
...@@ -1617,15 +1621,18 @@ ltrmark:; ...@@ -1617,15 +1621,18 @@ ltrmark:;
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update"))); errmsg("could not serialize access due to concurrent update")));
else if (!(ItemPointerEquals(&(tuple.t_self), tid))) else if (!ItemPointerEquals(&update_ctid, &tuple.t_self))
{ {
TupleTableSlot *epqslot = EvalPlanQual(estate, /* it was updated, so look at the updated version */
relinfo->ri_RangeTableIndex, TupleTableSlot *epqslot;
&(tuple.t_self));
epqslot = EvalPlanQual(estate,
if (!(TupIsNull(epqslot))) relinfo->ri_RangeTableIndex,
&update_ctid,
update_xmax);
if (!TupIsNull(epqslot))
{ {
*tid = tuple.t_self; *tid = update_ctid;
*newSlot = epqslot; *newSlot = epqslot;
goto ltrmark; goto ltrmark;
} }
...@@ -1639,7 +1646,7 @@ ltrmark:; ...@@ -1639,7 +1646,7 @@ ltrmark:;
default: default:
ReleaseBuffer(buffer); ReleaseBuffer(buffer);
elog(ERROR, "invalid heap_lock_tuple status: %d", test); elog(ERROR, "unrecognized heap_lock_tuple status: %u", test);
return NULL; /* keep compiler quiet */ return NULL; /* keep compiler quiet */
} }
} }
...@@ -1659,6 +1666,7 @@ ltrmark:; ...@@ -1659,6 +1666,7 @@ ltrmark:;
tuple.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp); tuple.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
tuple.t_len = ItemIdGetLength(lp); tuple.t_len = ItemIdGetLength(lp);
tuple.t_self = *tid; tuple.t_self = *tid;
tuple.t_tableOid = RelationGetRelid(relation);
} }
result = heap_copytuple(&tuple); result = heap_copytuple(&tuple);
......
This diff is collapsed.
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.253 2005/08/18 21:34:20 tgl Exp $ * $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.254 2005/08/20 00:39:55 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -1165,8 +1165,10 @@ lnext: ; ...@@ -1165,8 +1165,10 @@ lnext: ;
foreach(l, estate->es_rowMarks) foreach(l, estate->es_rowMarks)
{ {
execRowMark *erm = lfirst(l); execRowMark *erm = lfirst(l);
Buffer buffer;
HeapTupleData tuple; HeapTupleData tuple;
Buffer buffer;
ItemPointerData update_ctid;
TransactionId update_xmax;
TupleTableSlot *newSlot; TupleTableSlot *newSlot;
LockTupleMode lockmode; LockTupleMode lockmode;
HTSU_Result test; HTSU_Result test;
...@@ -1183,15 +1185,17 @@ lnext: ; ...@@ -1183,15 +1185,17 @@ lnext: ;
if (isNull) if (isNull)
elog(ERROR, "\"%s\" is NULL", erm->resname); elog(ERROR, "\"%s\" is NULL", erm->resname);
tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
if (estate->es_forUpdate) if (estate->es_forUpdate)
lockmode = LockTupleExclusive; lockmode = LockTupleExclusive;
else else
lockmode = LockTupleShared; lockmode = LockTupleShared;
tuple.t_self = *((ItemPointer) DatumGetPointer(datum));
test = heap_lock_tuple(erm->relation, &tuple, &buffer, test = heap_lock_tuple(erm->relation, &tuple, &buffer,
estate->es_snapshot->curcid, &update_ctid, &update_xmax,
lockmode, estate->es_rowNoWait); estate->es_snapshot->curcid,
lockmode, estate->es_rowNoWait);
ReleaseBuffer(buffer); ReleaseBuffer(buffer);
switch (test) switch (test)
{ {
...@@ -1207,11 +1211,15 @@ lnext: ; ...@@ -1207,11 +1211,15 @@ lnext: ;
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update"))); errmsg("could not serialize access due to concurrent update")));
if (!(ItemPointerEquals(&(tuple.t_self), if (!ItemPointerEquals(&update_ctid,
(ItemPointer) DatumGetPointer(datum)))) &tuple.t_self))
{ {
newSlot = EvalPlanQual(estate, erm->rti, &(tuple.t_self)); /* updated, so look at updated version */
if (!(TupIsNull(newSlot))) newSlot = EvalPlanQual(estate,
erm->rti,
&update_ctid,
update_xmax);
if (!TupIsNull(newSlot))
{ {
slot = newSlot; slot = newSlot;
estate->es_useEvalPlan = true; estate->es_useEvalPlan = true;
...@@ -1454,8 +1462,9 @@ ExecDelete(TupleTableSlot *slot, ...@@ -1454,8 +1462,9 @@ ExecDelete(TupleTableSlot *slot,
{ {
ResultRelInfo *resultRelInfo; ResultRelInfo *resultRelInfo;
Relation resultRelationDesc; Relation resultRelationDesc;
ItemPointerData ctid;
HTSU_Result result; HTSU_Result result;
ItemPointerData update_ctid;
TransactionId update_xmax;
/* /*
* get information on the (current) result relation * get information on the (current) result relation
...@@ -1486,7 +1495,7 @@ ExecDelete(TupleTableSlot *slot, ...@@ -1486,7 +1495,7 @@ ExecDelete(TupleTableSlot *slot,
*/ */
ldelete:; ldelete:;
result = heap_delete(resultRelationDesc, tupleid, result = heap_delete(resultRelationDesc, tupleid,
&ctid, &update_ctid, &update_xmax,
estate->es_snapshot->curcid, estate->es_snapshot->curcid,
estate->es_crosscheck_snapshot, estate->es_crosscheck_snapshot,
true /* wait for commit */ ); true /* wait for commit */ );
...@@ -1504,14 +1513,17 @@ ldelete:; ...@@ -1504,14 +1513,17 @@ ldelete:;
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update"))); errmsg("could not serialize access due to concurrent update")));
else if (!(ItemPointerEquals(tupleid, &ctid))) else if (!ItemPointerEquals(tupleid, &update_ctid))
{ {
TupleTableSlot *epqslot = EvalPlanQual(estate, TupleTableSlot *epqslot;
resultRelInfo->ri_RangeTableIndex, &ctid);
epqslot = EvalPlanQual(estate,
resultRelInfo->ri_RangeTableIndex,
&update_ctid,
update_xmax);
if (!TupIsNull(epqslot)) if (!TupIsNull(epqslot))
{ {
*tupleid = ctid; *tupleid = update_ctid;
goto ldelete; goto ldelete;
} }
} }
...@@ -1558,8 +1570,9 @@ ExecUpdate(TupleTableSlot *slot, ...@@ -1558,8 +1570,9 @@ ExecUpdate(TupleTableSlot *slot,
HeapTuple tuple; HeapTuple tuple;
ResultRelInfo *resultRelInfo; ResultRelInfo *resultRelInfo;
Relation resultRelationDesc; Relation resultRelationDesc;
ItemPointerData ctid;
HTSU_Result result; HTSU_Result result;
ItemPointerData update_ctid;
TransactionId update_xmax;
/* /*
* abort the operation if not running transactions * abort the operation if not running transactions
...@@ -1627,7 +1640,7 @@ lreplace:; ...@@ -1627,7 +1640,7 @@ lreplace:;
* referential integrity updates in serializable transactions. * referential integrity updates in serializable transactions.
*/ */
result = heap_update(resultRelationDesc, tupleid, tuple, result = heap_update(resultRelationDesc, tupleid, tuple,
&ctid, &update_ctid, &update_xmax,
estate->es_snapshot->curcid, estate->es_snapshot->curcid,
estate->es_crosscheck_snapshot, estate->es_crosscheck_snapshot,
true /* wait for commit */ ); true /* wait for commit */ );
...@@ -1645,14 +1658,17 @@ lreplace:; ...@@ -1645,14 +1658,17 @@ lreplace:;
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update"))); errmsg("could not serialize access due to concurrent update")));
else if (!(ItemPointerEquals(tupleid, &ctid))) else if (!ItemPointerEquals(tupleid, &update_ctid))
{ {
TupleTableSlot *epqslot = EvalPlanQual(estate, TupleTableSlot *epqslot;
resultRelInfo->ri_RangeTableIndex, &ctid);
epqslot = EvalPlanQual(estate,
resultRelInfo->ri_RangeTableIndex,
&update_ctid,
update_xmax);
if (!TupIsNull(epqslot)) if (!TupIsNull(epqslot))
{ {
*tupleid = ctid; *tupleid = update_ctid;
slot = ExecFilterJunk(estate->es_junkFilter, epqslot); slot = ExecFilterJunk(estate->es_junkFilter, epqslot);
tuple = ExecMaterializeSlot(slot); tuple = ExecMaterializeSlot(slot);
goto lreplace; goto lreplace;
...@@ -1791,9 +1807,21 @@ ExecConstraints(ResultRelInfo *resultRelInfo, ...@@ -1791,9 +1807,21 @@ ExecConstraints(ResultRelInfo *resultRelInfo,
* under READ COMMITTED rules. * under READ COMMITTED rules.
* *
* See backend/executor/README for some info about how this works. * See backend/executor/README for some info about how this works.
*
* estate - executor state data
* rti - rangetable index of table containing tuple
* *tid - t_ctid from the outdated tuple (ie, next updated version)
* priorXmax - t_xmax from the outdated tuple
*
* *tid is also an output parameter: it's modified to hold the TID of the
* latest version of the tuple (note this may be changed even on failure)
*
* Returns a slot containing the new candidate update/delete tuple, or
* NULL if we determine we shouldn't process the row.
*/ */
TupleTableSlot * TupleTableSlot *
EvalPlanQual(EState *estate, Index rti, ItemPointer tid) EvalPlanQual(EState *estate, Index rti,
ItemPointer tid, TransactionId priorXmax)
{ {
evalPlanQual *epq; evalPlanQual *epq;
EState *epqstate; EState *epqstate;
...@@ -1837,11 +1865,24 @@ EvalPlanQual(EState *estate, Index rti, ItemPointer tid) ...@@ -1837,11 +1865,24 @@ EvalPlanQual(EState *estate, Index rti, ItemPointer tid)
{ {
Buffer buffer; Buffer buffer;
if (heap_fetch(relation, SnapshotDirty, &tuple, &buffer, false, NULL)) if (heap_fetch(relation, SnapshotDirty, &tuple, &buffer, true, NULL))
{ {
TransactionId xwait = SnapshotDirty->xmax; /*
* If xmin isn't what we're expecting, the slot must have been
* recycled and reused for an unrelated tuple. This implies
* that the latest version of the row was deleted, so we need
* do nothing. (Should be safe to examine xmin without getting
* buffer's content lock, since xmin never changes in an existing
* tuple.)
*/
if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
priorXmax))
{
ReleaseBuffer(buffer);
return NULL;
}
/* xmin should not be dirty... */ /* otherwise xmin should not be dirty... */
if (TransactionIdIsValid(SnapshotDirty->xmin)) if (TransactionIdIsValid(SnapshotDirty->xmin))
elog(ERROR, "t_xmin is uncommitted in tuple to be updated"); elog(ERROR, "t_xmin is uncommitted in tuple to be updated");
...@@ -1849,11 +1890,11 @@ EvalPlanQual(EState *estate, Index rti, ItemPointer tid) ...@@ -1849,11 +1890,11 @@ EvalPlanQual(EState *estate, Index rti, ItemPointer tid)
* If tuple is being updated by other transaction then we have * If tuple is being updated by other transaction then we have
* to wait for its commit/abort. * to wait for its commit/abort.
*/ */
if (TransactionIdIsValid(xwait)) if (TransactionIdIsValid(SnapshotDirty->xmax))
{ {
ReleaseBuffer(buffer); ReleaseBuffer(buffer);
XactLockTableWait(xwait); XactLockTableWait(SnapshotDirty->xmax);
continue; continue; /* loop back to repeat heap_fetch */
} }
/* /*
...@@ -1865,22 +1906,50 @@ EvalPlanQual(EState *estate, Index rti, ItemPointer tid) ...@@ -1865,22 +1906,50 @@ EvalPlanQual(EState *estate, Index rti, ItemPointer tid)
} }
/* /*
* Oops! Invalid tuple. Have to check is it updated or deleted. * If the referenced slot was actually empty, the latest version
* Note that it's possible to get invalid SnapshotDirty->tid if * of the row must have been deleted, so we need do nothing.
* tuple updated by this transaction. Have we to check this ?
*/ */
if (ItemPointerIsValid(&(SnapshotDirty->tid)) && if (tuple.t_data == NULL)
!(ItemPointerEquals(&(tuple.t_self), &(SnapshotDirty->tid))))
{ {
/* updated, so look at the updated copy */ ReleaseBuffer(buffer);
tuple.t_self = SnapshotDirty->tid; return NULL;
continue;
} }
/* /*
* Deleted or updated by this transaction; forget it. * As above, if xmin isn't what we're expecting, do nothing.
*/ */
return NULL; if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data),
priorXmax))
{
ReleaseBuffer(buffer);
return NULL;
}
/*
* If we get here, the tuple was found but failed SnapshotDirty.
* Assuming the xmin is either a committed xact or our own xact
* (as it certainly should be if we're trying to modify the tuple),
* this must mean that the row was updated or deleted by either
* a committed xact or our own xact. If it was deleted, we can
* ignore it; if it was updated then chain up to the next version
* and repeat the whole test.
*
* As above, it should be safe to examine xmax and t_ctid without
* the buffer content lock, because they can't be changing.
*/
if (ItemPointerEquals(&tuple.t_self, &tuple.t_data->t_ctid))
{
/* deleted, so forget about it */
ReleaseBuffer(buffer);
return NULL;
}
/* updated, so look at the updated row */
tuple.t_self = tuple.t_data->t_ctid;
/* updated row should have xmin matching this xmax */
priorXmax = HeapTupleHeaderGetXmax(tuple.t_data);
ReleaseBuffer(buffer);
/* loop back to fetch next in chain */
} }
/* /*
......
...@@ -32,7 +32,7 @@ ...@@ -32,7 +32,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/time/tqual.c,v 1.89 2005/05/19 21:35:47 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/time/tqual.c,v 1.90 2005/08/20 00:39:57 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -677,14 +677,15 @@ HeapTupleSatisfiesUpdate(HeapTupleHeader tuple, CommandId curcid, ...@@ -677,14 +677,15 @@ HeapTupleSatisfiesUpdate(HeapTupleHeader tuple, CommandId curcid,
* However, we also include the effects of other xacts still in progress. * However, we also include the effects of other xacts still in progress.
* *
* Returns extra information in the global variable SnapshotDirty, namely * Returns extra information in the global variable SnapshotDirty, namely
* xids of concurrent xacts that affected the tuple. Also, the tuple's * xids of concurrent xacts that affected the tuple. SnapshotDirty->xmin
* t_ctid (forward link) is returned if it's being updated. * is set to InvalidTransactionId if xmin is either committed good or
* committed dead; or to xmin if that transaction is still in progress.
* Similarly for SnapshotDirty->xmax.
*/ */
bool bool
HeapTupleSatisfiesDirty(HeapTupleHeader tuple, Buffer buffer) HeapTupleSatisfiesDirty(HeapTupleHeader tuple, Buffer buffer)
{ {
SnapshotDirty->xmin = SnapshotDirty->xmax = InvalidTransactionId; SnapshotDirty->xmin = SnapshotDirty->xmax = InvalidTransactionId;
ItemPointerSetInvalid(&(SnapshotDirty->tid));
if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED)) if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED))
{ {
...@@ -781,7 +782,6 @@ HeapTupleSatisfiesDirty(HeapTupleHeader tuple, Buffer buffer) ...@@ -781,7 +782,6 @@ HeapTupleSatisfiesDirty(HeapTupleHeader tuple, Buffer buffer)
{ {
if (tuple->t_infomask & HEAP_IS_LOCKED) if (tuple->t_infomask & HEAP_IS_LOCKED)
return true; return true;
SnapshotDirty->tid = tuple->t_ctid;
return false; /* updated by other */ return false; /* updated by other */
} }
...@@ -824,7 +824,6 @@ HeapTupleSatisfiesDirty(HeapTupleHeader tuple, Buffer buffer) ...@@ -824,7 +824,6 @@ HeapTupleSatisfiesDirty(HeapTupleHeader tuple, Buffer buffer)
tuple->t_infomask |= HEAP_XMAX_COMMITTED; tuple->t_infomask |= HEAP_XMAX_COMMITTED;
SetBufferCommitInfoNeedsSave(buffer); SetBufferCommitInfoNeedsSave(buffer);
SnapshotDirty->tid = tuple->t_ctid;
return false; /* updated by other */ return false; /* updated by other */
} }
...@@ -1224,10 +1223,13 @@ HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId OldestXmin, ...@@ -1224,10 +1223,13 @@ HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId OldestXmin,
HeapTupleHeaderGetXmax(tuple))) HeapTupleHeaderGetXmax(tuple)))
{ {
/* /*
* inserter also deleted it, so it was never visible to anyone * Inserter also deleted it, so it was never visible to anyone
* else * else. However, we can only remove it early if it's not an
* updated tuple; else its parent tuple is linking to it via t_ctid,
* and this tuple mustn't go away before the parent does.
*/ */
return HEAPTUPLE_DEAD; if (!(tuple->t_infomask & HEAP_UPDATED))
return HEAPTUPLE_DEAD;
} }
if (!TransactionIdPrecedes(HeapTupleHeaderGetXmax(tuple), OldestXmin)) if (!TransactionIdPrecedes(HeapTupleHeaderGetXmax(tuple), OldestXmin))
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.103 2005/08/01 20:31:13 tgl Exp $ * $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.104 2005/08/20 00:39:59 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -152,19 +152,23 @@ extern bool heap_release_fetch(Relation relation, Snapshot snapshot, ...@@ -152,19 +152,23 @@ extern bool heap_release_fetch(Relation relation, Snapshot snapshot,
HeapTuple tuple, Buffer *userbuf, bool keep_buf, HeapTuple tuple, Buffer *userbuf, bool keep_buf,
PgStat_Info *pgstat_info); PgStat_Info *pgstat_info);
extern ItemPointer heap_get_latest_tid(Relation relation, Snapshot snapshot, extern void heap_get_latest_tid(Relation relation, Snapshot snapshot,
ItemPointer tid); ItemPointer tid);
extern void setLastTid(const ItemPointer tid); extern void setLastTid(const ItemPointer tid);
extern Oid heap_insert(Relation relation, HeapTuple tup, CommandId cid, extern Oid heap_insert(Relation relation, HeapTuple tup, CommandId cid,
bool use_wal, bool use_fsm); bool use_wal, bool use_fsm);
extern HTSU_Result heap_delete(Relation relation, ItemPointer tid, ItemPointer ctid, extern HTSU_Result heap_delete(Relation relation, ItemPointer tid,
CommandId cid, Snapshot crosscheck, bool wait); ItemPointer ctid, TransactionId *update_xmax,
extern HTSU_Result heap_update(Relation relation, ItemPointer otid, HeapTuple tup, CommandId cid, Snapshot crosscheck, bool wait);
ItemPointer ctid, CommandId cid, Snapshot crosscheck, bool wait); extern HTSU_Result heap_update(Relation relation, ItemPointer otid,
extern HTSU_Result heap_lock_tuple(Relation relation, HeapTuple tup, HeapTuple newtup,
Buffer *userbuf, CommandId cid, ItemPointer ctid, TransactionId *update_xmax,
LockTupleMode mode, bool nowait); CommandId cid, Snapshot crosscheck, bool wait);
extern HTSU_Result heap_lock_tuple(Relation relation, HeapTuple tuple,
Buffer *buffer, ItemPointer ctid,
TransactionId *update_xmax, CommandId cid,
LockTupleMode mode, bool nowait);
extern Oid simple_heap_insert(Relation relation, HeapTuple tup); extern Oid simple_heap_insert(Relation relation, HeapTuple tup);
extern void simple_heap_delete(Relation relation, ItemPointer tid); extern void simple_heap_delete(Relation relation, ItemPointer tid);
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/access/htup.h,v 1.75 2005/06/08 15:50:27 tgl Exp $ * $PostgreSQL: pgsql/src/include/access/htup.h,v 1.76 2005/08/20 00:39:59 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -80,6 +80,21 @@ ...@@ -80,6 +80,21 @@
* However, with the advent of subtransactions, a tuple may need both Xmax * However, with the advent of subtransactions, a tuple may need both Xmax
* and Cmin simultaneously, so this is no longer possible. * and Cmin simultaneously, so this is no longer possible.
* *
* A word about t_ctid: whenever a new tuple is stored on disk, its t_ctid
* is initialized with its own TID (location). If the tuple is ever updated,
* its t_ctid is changed to point to the replacement version of the tuple.
* Thus, a tuple is the latest version of its row iff XMAX is invalid or
* t_ctid points to itself (in which case, if XMAX is valid, the tuple is
* either locked or deleted). One can follow the chain of t_ctid links
* to find the newest version of the row. Beware however that VACUUM might
* erase the pointed-to (newer) tuple before erasing the pointing (older)
* tuple. Hence, when following a t_ctid link, it is necessary to check
* to see if the referenced slot is empty or contains an unrelated tuple.
* Check that the referenced tuple has XMIN equal to the referencing tuple's
* XMAX to verify that it is actually the descendant version and not an
* unrelated tuple stored into a slot recently freed by VACUUM. If either
* check fails, one may assume that there is no live descendant version.
*
* Following the fixed header fields, the nulls bitmap is stored (beginning * Following the fixed header fields, the nulls bitmap is stored (beginning
* at t_bits). The bitmap is *not* stored if t_infomask shows that there * at t_bits). The bitmap is *not* stored if t_infomask shows that there
* are no nulls in the tuple. If an OID field is present (as indicated by * are no nulls in the tuple. If an OID field is present (as indicated by
...@@ -334,18 +349,29 @@ do { \ ...@@ -334,18 +349,29 @@ do { \
/* /*
* HeapTupleData is an in-memory data structure that points to a tuple. * HeapTupleData is an in-memory data structure that points to a tuple.
* *
* This new HeapTuple for version >= 6.5 and this is why it was changed: * There are several ways in which this data structure is used:
*
* * Pointer to a tuple in a disk buffer: t_data points directly into the
* buffer (which the code had better be holding a pin on, but this is not
* reflected in HeapTupleData itself). t_datamcxt must be NULL.
*
* * Pointer to nothing: t_data and t_datamcxt are NULL. This is used as
* a failure indication in some functions.
*
* * Part of a palloc'd tuple: the HeapTupleData itself and the tuple
* form a single palloc'd chunk. t_data points to the memory location
* immediately following the HeapTupleData struct (at offset HEAPTUPLESIZE),
* and t_datamcxt is the containing context. This is used as the output
* format of heap_form_tuple and related routines.
* *
* 1. t_len moved off on-disk tuple data - ItemIdData is used to get len; * * Separately allocated tuple: t_data points to a palloc'd chunk that
* 2. t_ctid above is not self tuple TID now - it may point to * is not adjacent to the HeapTupleData, and t_datamcxt is the context
* updated version of tuple (required by MVCC); * containing that chunk.
* 3. someday someone let tuple to cross block boundaries -
* he have to add something below...
* *
* Change for 7.0: * t_len should always be valid, except in the pointer-to-nothing case.
* Up to now t_data could be NULL, the memory location directly following * t_self and t_tableOid should be valid if the HeapTupleData points to
* HeapTupleData, or pointing into a buffer. Now, it could also point to * a disk buffer, or if it represents a copy of a tuple on disk. They
* a separate allocation that was done in the t_datamcxt memory context. * should be explicitly set invalid in manufactured tuples.
*/ */
typedef struct HeapTupleData typedef struct HeapTupleData
{ {
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/executor/executor.h,v 1.118 2005/04/16 20:07:35 tgl Exp $ * $PostgreSQL: pgsql/src/include/executor/executor.h,v 1.119 2005/08/20 00:40:13 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -98,7 +98,7 @@ extern bool ExecContextForcesOids(PlanState *planstate, bool *hasoids); ...@@ -98,7 +98,7 @@ extern bool ExecContextForcesOids(PlanState *planstate, bool *hasoids);
extern void ExecConstraints(ResultRelInfo *resultRelInfo, extern void ExecConstraints(ResultRelInfo *resultRelInfo,
TupleTableSlot *slot, EState *estate); TupleTableSlot *slot, EState *estate);
extern TupleTableSlot *EvalPlanQual(EState *estate, Index rti, extern TupleTableSlot *EvalPlanQual(EState *estate, Index rti,
ItemPointer tid); ItemPointer tid, TransactionId priorXmax);
/* /*
* prototypes from functions in execProcnode.c * prototypes from functions in execProcnode.c
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/utils/tqual.h,v 1.57 2005/05/19 21:35:48 tgl Exp $ * $PostgreSQL: pgsql/src/include/utils/tqual.h,v 1.58 2005/08/20 00:40:32 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -42,7 +42,6 @@ typedef struct SnapshotData ...@@ -42,7 +42,6 @@ typedef struct SnapshotData
TransactionId *xip; /* array of xact IDs in progress */ TransactionId *xip; /* array of xact IDs in progress */
/* note: all ids in xip[] satisfy xmin <= xip[i] < xmax */ /* note: all ids in xip[] satisfy xmin <= xip[i] < xmax */
CommandId curcid; /* in my xact, CID < curcid are visible */ CommandId curcid; /* in my xact, CID < curcid are visible */
ItemPointerData tid; /* required for Dirty snapshot -:( */
} SnapshotData; } SnapshotData;
typedef SnapshotData *Snapshot; typedef SnapshotData *Snapshot;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment