Commit 8431e296 authored by Simon Riggs's avatar Simon Riggs

Cleanup initialization of Hot Standby. Clarify working with reanalysis

of requirements and documentation on LogStandbySnapshot(). Fixes
two minor bugs reported by Tom Lane that would lead to an incorrect
snapshot after transaction wraparound. Also fix two other problems
discovered that would give incorrect snapshots in certain cases.
ProcArrayApplyRecoveryInfo() substantially rewritten. Some minor
refactoring of xact_redo_apply() and ExpireTreeKnownAssignedTransactionIds().
parent c2e7f78a
......@@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.289 2010/02/26 02:00:34 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.290 2010/05/13 11:15:38 sriggs Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -4378,7 +4378,7 @@ xact_redo_commit(xl_xact_commit *xlrec, TransactionId xid, XLogRecPtr lsn)
LWLockRelease(XidGenLock);
}
if (!InHotStandby)
if (standbyState == STANDBY_DISABLED)
{
/*
* Mark the transaction committed in pg_clog.
......@@ -4412,12 +4412,12 @@ xact_redo_commit(xl_xact_commit *xlrec, TransactionId xid, XLogRecPtr lsn)
/*
* We must mark clog before we update the ProcArray.
*/
ExpireTreeKnownAssignedTransactionIds(xid, xlrec->nsubxacts, sub_xids);
ExpireTreeKnownAssignedTransactionIds(xid, xlrec->nsubxacts, sub_xids, max_xid);
/*
* Send any cache invalidations attached to the commit. We must
* maintain the same order of invalidation then release locks as
* occurs in .
* occurs in CommitTransaction().
*/
ProcessCommittedInvalidationMessages(inval_msgs, xlrec->nmsgs,
XactCompletionRelcacheInitFileInval(xlrec),
......@@ -4499,7 +4499,12 @@ xact_redo_abort(xl_xact_abort *xlrec, TransactionId xid)
LWLockRelease(XidGenLock);
}
if (InHotStandby)
if (standbyState == STANDBY_DISABLED)
{
/* Mark the transaction aborted in pg_clog, no need for async stuff */
TransactionIdAbortTree(xid, xlrec->nsubxacts, sub_xids);
}
else
{
/*
* If a transaction completion record arrives that has as-yet
......@@ -4511,17 +4516,14 @@ xact_redo_abort(xl_xact_abort *xlrec, TransactionId xid)
* already. Leave it in.
*/
RecordKnownAssignedTransactionIds(max_xid);
}
/* Mark the transaction aborted in pg_clog, no need for async stuff */
TransactionIdAbortTree(xid, xlrec->nsubxacts, sub_xids);
/* Mark the transaction aborted in pg_clog, no need for async stuff */
TransactionIdAbortTree(xid, xlrec->nsubxacts, sub_xids);
if (InHotStandby)
{
/*
* We must mark clog before we update the ProcArray.
* We must update the ProcArray after we have marked clog.
*/
ExpireTreeKnownAssignedTransactionIds(xid, xlrec->nsubxacts, sub_xids);
ExpireTreeKnownAssignedTransactionIds(xid, xlrec->nsubxacts, sub_xids, max_xid);
/*
* There are no flat files that need updating, nor invalidation
......@@ -4596,7 +4598,7 @@ xact_redo(XLogRecPtr lsn, XLogRecord *record)
{
xl_xact_assignment *xlrec = (xl_xact_assignment *) XLogRecGetData(record);
if (InHotStandby)
if (standbyState >= STANDBY_INITIALIZED)
ProcArrayApplyXidAssignment(xlrec->xtop,
xlrec->nsubxacts, xlrec->xsub);
}
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.409 2010/05/03 11:17:52 heikki Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.410 2010/05/13 11:15:38 sriggs Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -5995,6 +5995,7 @@ StartupXLOG(void)
if (wasShutdown)
{
RunningTransactionsData running;
TransactionId latestCompletedXid;
/*
* Construct a RunningTransactions snapshot representing a shut
......@@ -6006,6 +6007,9 @@ StartupXLOG(void)
running.subxid_overflow = false;
running.nextXid = checkPoint.nextXid;
running.oldestRunningXid = oldestActiveXID;
latestCompletedXid = checkPoint.nextXid;
TransactionIdRetreat(latestCompletedXid);
running.latestCompletedXid = latestCompletedXid;
running.xids = xids;
ProcArrayApplyRecoveryInfo(&running);
......@@ -6154,8 +6158,9 @@ StartupXLOG(void)
xlogctl->recoveryLastXTime = recoveryLastXTime;
SpinLockRelease(&xlogctl->info_lck);
/* In Hot Standby mode, keep track of XIDs we've seen */
if (InHotStandby && TransactionIdIsValid(record->xl_xid))
/* If we are attempting to enter Hot Standby mode, process XIDs we see */
if (standbyState >= STANDBY_INITIALIZED &&
TransactionIdIsValid(record->xl_xid))
RecordKnownAssignedTransactionIds(record->xl_xid);
RmgrTable[record->xl_rmid].rm_redo(EndRecPtr, record);
......@@ -7803,6 +7808,7 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
TransactionId *xids;
int nxids;
TransactionId oldestActiveXID;
TransactionId latestCompletedXid;
RunningTransactionsData running;
oldestActiveXID = PrescanPreparedTransactions(&xids, &nxids);
......@@ -7817,6 +7823,9 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
running.subxid_overflow = false;
running.nextXid = checkPoint.nextXid;
running.oldestRunningXid = oldestActiveXID;
latestCompletedXid = checkPoint.nextXid;
TransactionIdRetreat(latestCompletedXid);
running.latestCompletedXid = latestCompletedXid;
running.xids = xids;
ProcArrayApplyRecoveryInfo(&running);
......
This diff is collapsed.
......@@ -11,7 +11,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/ipc/standby.c,v 1.21 2010/05/02 02:10:33 tgl Exp $
* $PostgreSQL: pgsql/src/backend/storage/ipc/standby.c,v 1.22 2010/05/13 11:15:38 sriggs Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -776,6 +776,51 @@ standby_desc(StringInfo buf, uint8 xl_info, char *rec)
/*
* Log details of the current snapshot to WAL. This allows the snapshot state
* to be reconstructed on the standby.
*
* We can move directly to STANDBY_SNAPSHOT_READY at startup if we
* start from a shutdown checkpoint because we know nothing was running
* at that time and our recovery snapshot is known empty. In the more
* typical case of an online checkpoint we need to jump through a few
* hoops to get a correct recovery snapshot and this requires a two or
* sometimes a three stage process.
*
* The initial snapshot must contain all running xids and all current
* AccessExclusiveLocks at a point in time on the standby. Assembling
* that information while the server is running requires many and
* various LWLocks, so we choose to derive that information piece by
* piece and then re-assemble that info on the standby. When that
* information is fully assembled we move to STANDBY_SNAPSHOT_READY.
*
* Since locking on the primary when we derive the information is not
* strict, we note that there is a time window between the derivation and
* writing to WAL of the derived information. That allows race conditions
* that we must resolve, since xids and locks may enter or leave the
* snapshot during that window. This creates the issue that an xid or
* lock may start *after* the snapshot has been derived yet *before* the
* snapshot is logged in the running xacts WAL record. We resolve this by
* starting to accumulate changes at a point just prior to when we derive
* the snapshot on the primary, then ignore duplicates when we later apply
* the snapshot from the running xacts record. This is implemented during
* CreateCheckpoint() where we use the logical checkpoint location as
* our starting point and then write the running xacts record immediately
* before writing the main checkpoint WAL record. Since we always start
* up from a checkpoint and are immediately at our starting point, we
* unconditionally move to STANDBY_INITIALIZED. After this point we
* must do 4 things:
* * move shared nextXid forwards as we see new xids
* * extend the clog and subtrans with each new xid
* * keep track of uncommitted known assigned xids
* * keep track of uncommitted AccessExclusiveLocks
*
* When we see a commit/abort we must remove known assigned xids and locks
* from the completing transaction. Attempted removals that cannot locate
* an entry are expected and must not cause an error when we are in state
* STANDBY_INITIALIZED. This is implemented in StandbyReleaseLocks() and
* KnownAssignedXidsRemove().
*
* Later, when we apply the running xact data we must be careful to ignore
* transactions already committed, since those commits raced ahead when
* making WAL entries.
*/
void
LogStandbySnapshot(TransactionId *oldestActiveXid, TransactionId *nextXid)
......@@ -788,6 +833,12 @@ LogStandbySnapshot(TransactionId *oldestActiveXid, TransactionId *nextXid)
/*
* Get details of any AccessExclusiveLocks being held at the moment.
*
* XXX GetRunningTransactionLocks() currently holds a lock on all partitions
* though it is possible to further optimise the locking. By reference
* counting locks and storing the value on the ProcArray entry for each backend
* we can easily tell if any locks need recording without trying to acquire
* the partition locks and scanning the lock table.
*/
locks = GetRunningTransactionLocks(&nlocks);
if (nlocks > 0)
......@@ -798,6 +849,11 @@ LogStandbySnapshot(TransactionId *oldestActiveXid, TransactionId *nextXid)
* record we write, because standby will open up when it sees this.
*/
running = GetRunningTransactionData();
/*
* The gap between GetRunningTransactionData() and LogCurrentRunningXacts()
* is what most of the fuss is about here, so artifically extending this
* interval is a great way to test the little used parts of the code.
*/
LogCurrentRunningXacts(running);
*oldestActiveXid = running->oldestRunningXid;
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/storage/procarray.h,v 1.31 2010/01/23 16:37:12 sriggs Exp $
* $PostgreSQL: pgsql/src/include/storage/procarray.h,v 1.32 2010/05/13 11:15:38 sriggs Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -35,7 +35,8 @@ extern void ProcArrayApplyXidAssignment(TransactionId topxid,
extern void RecordKnownAssignedTransactionIds(TransactionId xid);
extern void ExpireTreeKnownAssignedTransactionIds(TransactionId xid,
int nsubxids, TransactionId *subxids);
int nsubxids, TransactionId *subxids,
TransactionId max_xid);
extern void ExpireAllKnownAssignedTransactionIds(void);
extern void ExpireOldKnownAssignedTransactionIds(TransactionId xid);
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/storage/standby.h,v 1.9 2010/02/26 02:01:28 momjian Exp $
* $PostgreSQL: pgsql/src/include/storage/standby.h,v 1.10 2010/05/13 11:15:38 sriggs Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -68,6 +68,7 @@ typedef struct xl_running_xacts
bool subxid_overflow; /* snapshot overflowed, subxids missing */
TransactionId nextXid; /* copy of ShmemVariableCache->nextXid */
TransactionId oldestRunningXid; /* *not* oldestXmin */
TransactionId latestCompletedXid; /* so we can set xmax */
TransactionId xids[1]; /* VARIABLE LENGTH ARRAY */
} xl_running_xacts;
......@@ -97,6 +98,7 @@ typedef struct RunningTransactionsData
bool subxid_overflow; /* snapshot overflowed, subxids missing */
TransactionId nextXid; /* copy of ShmemVariableCache->nextXid */
TransactionId oldestRunningXid; /* *not* oldestXmin */
TransactionId latestCompletedXid; /* so we can set xmax */
TransactionId *xids; /* array of (sub)xids still running */
} RunningTransactionsData;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment