Commit ed5c4e4a authored by Tom Lane's avatar Tom Lane

Improve documentation about reasoning behind the order of operations

in GetSnapshotData, GetNewTransactionId, CommitTransaction, AbortTransaction,
etc.  Correct race condition in transaction status testing in
HeapTupleSatisfiesVacuum --- this wasn't important for old VACUUM with
exclusive lock on its table, but it sure is important now.  All per
pghackers discussion 7/11/01 and 7/12/01.
parent ffbd97c8
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Copyright (c) 2000, PostgreSQL Global Development Group * Copyright (c) 2000, PostgreSQL Global Development Group
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/transam/varsup.c,v 1.41 2001/07/12 04:11:13 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/access/transam/varsup.c,v 1.42 2001/07/16 22:43:33 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -60,8 +60,16 @@ GetNewTransactionId(TransactionId *xid) ...@@ -60,8 +60,16 @@ GetNewTransactionId(TransactionId *xid)
* XXX by storing xid into MyProc without acquiring SInvalLock, we are * XXX by storing xid into MyProc without acquiring SInvalLock, we are
* relying on fetch/store of an xid to be atomic, else other backends * relying on fetch/store of an xid to be atomic, else other backends
* might see a partially-set xid here. But holding both locks at once * might see a partially-set xid here. But holding both locks at once
* would be a nasty concurrency hit (and at this writing, could cause a * would be a nasty concurrency hit (and in fact could cause a deadlock
* deadlock against GetSnapshotData). So for now, assume atomicity. * against GetSnapshotData). So for now, assume atomicity. Note that
* readers of PROC xid field should be careful to fetch the value only
* once, rather than assume they can read it multiple times and get the
* same answer each time.
*
* A solution to the atomic-store problem would be to give each PROC
* its own spinlock used only for fetching/storing that PROC's xid.
* (SInvalLock would then mean primarily that PROCs couldn't be added/
* removed while holding the lock.)
*/ */
if (MyProc != (PROC *) NULL) if (MyProc != (PROC *) NULL)
MyProc->xid = *xid; MyProc->xid = *xid;
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.107 2001/07/15 22:48:16 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.108 2001/07/16 22:43:33 tgl Exp $
* *
* NOTES * NOTES
* Transaction aborts can now occur two ways: * Transaction aborts can now occur two ways:
...@@ -1018,16 +1018,21 @@ CommitTransaction(void) ...@@ -1018,16 +1018,21 @@ CommitTransaction(void)
CloseSequences(); CloseSequences();
AtEOXact_portals(); AtEOXact_portals();
/* Here is where we really truly commit. */
RecordTransactionCommit(); RecordTransactionCommit();
/* /*
* Let others know about no transaction in progress by me. Note that * Let others know about no transaction in progress by me. Note that
* this must be done _before_ releasing locks we hold and * this must be done _before_ releasing locks we hold and _after_
* RecordTransactionCommit.
*
* SpinAcquire(SInvalLock) is required: UPDATE with xid 0 is blocked * SpinAcquire(SInvalLock) is required: UPDATE with xid 0 is blocked
* by xid 1' UPDATE, xid 1 is doing commit while xid 2 gets snapshot - * by xid 1' UPDATE, xid 1 is doing commit while xid 2 gets snapshot -
* if xid 2' GetSnapshotData sees xid 1 as running then it must see * if xid 2' GetSnapshotData sees xid 1 as running then it must see
* xid 0 as running as well or it will see two tuple versions - one * xid 0 as running as well or it will see two tuple versions - one
* deleted by xid 1 and one inserted by xid 0. * deleted by xid 1 and one inserted by xid 0. See notes in
* GetSnapshotData.
*/ */
if (MyProc != (PROC *) NULL) if (MyProc != (PROC *) NULL)
{ {
...@@ -1038,6 +1043,12 @@ CommitTransaction(void) ...@@ -1038,6 +1043,12 @@ CommitTransaction(void)
SpinRelease(SInvalLock); SpinRelease(SInvalLock);
} }
/*
* This is all post-commit cleanup. Note that if an error is raised
* here, it's too late to abort the transaction. This should be just
* noncritical resource releasing.
*/
RelationPurgeLocalRelation(true); RelationPurgeLocalRelation(true);
AtEOXact_temp_relations(true); AtEOXact_temp_relations(true);
smgrDoPendingDeletes(true); smgrDoPendingDeletes(true);
...@@ -1080,23 +1091,6 @@ AbortTransaction(void) ...@@ -1080,23 +1091,6 @@ AbortTransaction(void)
/* Prevent cancel/die interrupt while cleaning up */ /* Prevent cancel/die interrupt while cleaning up */
HOLD_INTERRUPTS(); HOLD_INTERRUPTS();
/*
* Let others to know about no transaction in progress - vadim
* 11/26/96
*
* XXX it'd be nice to acquire SInvalLock for this, but too much risk of
* lockup: what if we were holding SInvalLock when we elog'd? Net effect
* is that we are relying on fetch/store of an xid to be atomic, else
* other backends might see a partially-zeroed xid here. Would it be
* safe to release spins before we reset xid/xmin? But see also
* GetNewTransactionId, which does the same thing.
*/
if (MyProc != (PROC *) NULL)
{
MyProc->xid = InvalidTransactionId;
MyProc->xmin = InvalidTransactionId;
}
/* /*
* Release any spinlocks or buffer context locks we might be holding * Release any spinlocks or buffer context locks we might be holding
* as quickly as possible. (Real locks, however, must be held till we * as quickly as possible. (Real locks, however, must be held till we
...@@ -1143,10 +1137,23 @@ AbortTransaction(void) ...@@ -1143,10 +1137,23 @@ AbortTransaction(void)
AtAbort_Notify(); AtAbort_Notify();
CloseSequences(); CloseSequences();
AtEOXact_portals(); AtEOXact_portals();
/* Advertise the fact that we aborted in pg_log. */
RecordTransactionAbort(); RecordTransactionAbort();
/* Count transaction abort in statistics collector */ /*
pgstat_count_xact_rollback(); * Let others know about no transaction in progress by me. Note that
* this must be done _before_ releasing locks we hold and _after_
* RecordTransactionAbort.
*/
if (MyProc != (PROC *) NULL)
{
/* Lock SInvalLock because that's what GetSnapshotData uses. */
SpinAcquire(SInvalLock);
MyProc->xid = InvalidTransactionId;
MyProc->xmin = InvalidTransactionId;
SpinRelease(SInvalLock);
}
RelationPurgeLocalRelation(false); RelationPurgeLocalRelation(false);
AtEOXact_temp_relations(false); AtEOXact_temp_relations(false);
...@@ -1165,6 +1172,9 @@ AbortTransaction(void) ...@@ -1165,6 +1172,9 @@ AbortTransaction(void)
SharedBufferChanged = false;/* safest place to do it */ SharedBufferChanged = false;/* safest place to do it */
/* Count transaction abort in statistics collector */
pgstat_count_xact_rollback();
/* /*
* State remains TRANS_ABORT until CleanupTransaction(). * State remains TRANS_ABORT until CleanupTransaction().
*/ */
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.36 2001/07/12 04:11:13 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/ipc/sinval.c,v 1.37 2001/07/16 22:43:34 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -193,8 +193,10 @@ TransactionIdIsInProgress(TransactionId xid) ...@@ -193,8 +193,10 @@ TransactionIdIsInProgress(TransactionId xid)
if (pOffset != INVALID_OFFSET) if (pOffset != INVALID_OFFSET)
{ {
PROC *proc = (PROC *) MAKE_PTR(pOffset); PROC *proc = (PROC *) MAKE_PTR(pOffset);
/* Fetch xid just once - see GetNewTransactionId */
TransactionId pxid = proc->xid;
if (TransactionIdEquals(proc->xid, xid)) if (TransactionIdEquals(pxid, xid))
{ {
result = true; result = true;
break; break;
...@@ -236,9 +238,9 @@ GetXmaxRecent(TransactionId *XmaxRecent) ...@@ -236,9 +238,9 @@ GetXmaxRecent(TransactionId *XmaxRecent)
if (pOffset != INVALID_OFFSET) if (pOffset != INVALID_OFFSET)
{ {
PROC *proc = (PROC *) MAKE_PTR(pOffset); PROC *proc = (PROC *) MAKE_PTR(pOffset);
TransactionId xid; /* Fetch xid just once - see GetNewTransactionId */
TransactionId xid = proc->xid;
xid = proc->xid;
if (! TransactionIdIsSpecial(xid)) if (! TransactionIdIsSpecial(xid))
{ {
if (TransactionIdPrecedes(xid, result)) if (TransactionIdPrecedes(xid, result))
...@@ -256,8 +258,19 @@ GetXmaxRecent(TransactionId *XmaxRecent) ...@@ -256,8 +258,19 @@ GetXmaxRecent(TransactionId *XmaxRecent)
*XmaxRecent = result; *XmaxRecent = result;
} }
/* /*----------
* GetSnapshotData -- returns information about running transactions. * GetSnapshotData -- returns information about running transactions.
*
* The returned snapshot includes xmin (lowest still-running xact ID),
* xmax (next xact ID to be assigned), and a list of running xact IDs
* in the range xmin <= xid < xmax. It is used as follows:
* All xact IDs < xmin are considered finished.
* All xact IDs >= xmax are considered still running.
* For an xact ID xmin <= xid < xmax, consult list to see whether
* it is considered running or not.
* This ensures that the set of transactions seen as "running" by the
* current xact will not change after it takes the snapshot.
*----------
*/ */
Snapshot Snapshot
GetSnapshotData(bool serializable) GetSnapshotData(bool serializable)
...@@ -287,12 +300,33 @@ GetSnapshotData(bool serializable) ...@@ -287,12 +300,33 @@ GetSnapshotData(bool serializable)
elog(ERROR, "Memory exhausted in GetSnapshotData"); elog(ERROR, "Memory exhausted in GetSnapshotData");
} }
/* /*--------------------
* Unfortunately, we have to call ReadNewTransactionId() after * Unfortunately, we have to call ReadNewTransactionId() after acquiring
* acquiring SInvalLock above. It's not good because * SInvalLock above. It's not good because ReadNewTransactionId() does
* ReadNewTransactionId() does SpinAcquire(XidGenLockId) but * SpinAcquire(XidGenLockId), but *necessary*. We need to be sure that
* _necessary_. * no transactions exit the set of currently-running transactions
* between the time we fetch xmax and the time we finish building our
* snapshot. Otherwise we could have a situation like this:
*
* 1. Tx Old is running (in Read Committed mode).
* 2. Tx S reads new transaction ID into xmax, then
* is swapped out before acquiring SInvalLock.
* 3. Tx New gets new transaction ID (>= S' xmax),
* makes changes and commits.
* 4. Tx Old changes some row R changed by Tx New and commits.
* 5. Tx S finishes getting its snapshot data. It sees Tx Old as
* done, but sees Tx New as still running (since New >= xmax).
*
* Now S will see R changed by both Tx Old and Tx New, *but* does not
* see other changes made by Tx New. If S is supposed to be in
* Serializable mode, this is wrong.
*
* By locking SInvalLock before we read xmax, we ensure that TX Old
* cannot exit the set of running transactions seen by Tx S. Therefore
* both Old and New will be seen as still running => no inconsistency.
*--------------------
*/ */
ReadNewTransactionId(&(snapshot->xmax)); ReadNewTransactionId(&(snapshot->xmax));
for (index = 0; index < segP->lastBackend; index++) for (index = 0; index < segP->lastBackend; index++)
...@@ -302,6 +336,7 @@ GetSnapshotData(bool serializable) ...@@ -302,6 +336,7 @@ GetSnapshotData(bool serializable)
if (pOffset != INVALID_OFFSET) if (pOffset != INVALID_OFFSET)
{ {
PROC *proc = (PROC *) MAKE_PTR(pOffset); PROC *proc = (PROC *) MAKE_PTR(pOffset);
/* Fetch xid just once - see GetNewTransactionId */
TransactionId xid = proc->xid; TransactionId xid = proc->xid;
/* /*
...@@ -325,11 +360,12 @@ GetSnapshotData(bool serializable) ...@@ -325,11 +360,12 @@ GetSnapshotData(bool serializable)
if (serializable) if (serializable)
MyProc->xmin = snapshot->xmin; MyProc->xmin = snapshot->xmin;
/* Serializable snapshot must be computed before any other... */
Assert(MyProc->xmin != InvalidTransactionId);
SpinRelease(SInvalLock); SpinRelease(SInvalLock);
/* Serializable snapshot must be computed before any other... */
Assert(MyProc->xmin != InvalidTransactionId);
snapshot->xcnt = count; snapshot->xcnt = count;
return snapshot; return snapshot;
} }
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/time/tqual.c,v 1.38 2001/07/12 04:11:13 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/utils/time/tqual.c,v 1.39 2001/07/16 22:43:34 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -610,6 +610,13 @@ HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId XmaxRecent) ...@@ -610,6 +610,13 @@ HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId XmaxRecent)
* *
* If the inserting transaction aborted, then the tuple was never visible * If the inserting transaction aborted, then the tuple was never visible
* to any other transaction, so we can delete it immediately. * to any other transaction, so we can delete it immediately.
*
* NOTE: must check TransactionIdIsInProgress (which looks in shared mem)
* before TransactionIdDidCommit/TransactionIdDidAbort (which look in
* pg_log). Otherwise we have a race condition where we might decide
* that a just-committed transaction crashed, because none of the tests
* succeed. xact.c is careful to record commit/abort in pg_log before
* it unsets MyProc->xid in shared memory.
*/ */
if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED)) if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED))
{ {
...@@ -636,19 +643,19 @@ HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId XmaxRecent) ...@@ -636,19 +643,19 @@ HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId XmaxRecent)
} }
tuple->t_infomask |= HEAP_XMIN_COMMITTED; tuple->t_infomask |= HEAP_XMIN_COMMITTED;
} }
else if (TransactionIdIsInProgress(tuple->t_xmin))
return HEAPTUPLE_INSERT_IN_PROGRESS;
else if (TransactionIdDidCommit(tuple->t_xmin))
tuple->t_infomask |= HEAP_XMIN_COMMITTED;
else if (TransactionIdDidAbort(tuple->t_xmin)) else if (TransactionIdDidAbort(tuple->t_xmin))
{ {
tuple->t_infomask |= HEAP_XMIN_INVALID; tuple->t_infomask |= HEAP_XMIN_INVALID;
return HEAPTUPLE_DEAD; return HEAPTUPLE_DEAD;
} }
else if (TransactionIdDidCommit(tuple->t_xmin))
tuple->t_infomask |= HEAP_XMIN_COMMITTED;
else if (TransactionIdIsInProgress(tuple->t_xmin))
return HEAPTUPLE_INSERT_IN_PROGRESS;
else else
{ {
/* /*
* Not Aborted, Not Committed, Not in Progress - * Not in Progress, Not Committed, Not Aborted -
* so it's from crashed process. - vadim 11/26/96 * so it's from crashed process. - vadim 11/26/96
*/ */
tuple->t_infomask |= HEAP_XMIN_INVALID; tuple->t_infomask |= HEAP_XMIN_INVALID;
...@@ -667,19 +674,19 @@ HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId XmaxRecent) ...@@ -667,19 +674,19 @@ HeapTupleSatisfiesVacuum(HeapTupleHeader tuple, TransactionId XmaxRecent)
if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED)) if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED))
{ {
if (TransactionIdDidAbort(tuple->t_xmax)) if (TransactionIdIsInProgress(tuple->t_xmax))
return HEAPTUPLE_DELETE_IN_PROGRESS;
else if (TransactionIdDidCommit(tuple->t_xmax))
tuple->t_infomask |= HEAP_XMAX_COMMITTED;
else if (TransactionIdDidAbort(tuple->t_xmax))
{ {
tuple->t_infomask |= HEAP_XMAX_INVALID; tuple->t_infomask |= HEAP_XMAX_INVALID;
return HEAPTUPLE_LIVE; return HEAPTUPLE_LIVE;
} }
else if (TransactionIdDidCommit(tuple->t_xmax))
tuple->t_infomask |= HEAP_XMAX_COMMITTED;
else if (TransactionIdIsInProgress(tuple->t_xmax))
return HEAPTUPLE_DELETE_IN_PROGRESS;
else else
{ {
/* /*
* Not Aborted, Not Committed, Not in Progress - * Not in Progress, Not Committed, Not Aborted -
* so it's from crashed process. - vadim 06/02/97 * so it's from crashed process. - vadim 06/02/97
*/ */
tuple->t_infomask |= HEAP_XMAX_INVALID; tuple->t_infomask |= HEAP_XMAX_INVALID;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment