Commit 49e92815 authored by Simon Riggs's avatar Simon Riggs

Rework handling of subtransactions in 2PC recovery

The bug fixed by 0874d4f3
caused us to question and rework the handling of
subtransactions in 2PC during and at end of recovery.
Patch adds checks and tests to ensure no further bugs.

This effectively removes the temporary measure put in place
by 546c13e1.

Author: Simon Riggs
Reviewed-by: Tom Lane, Michael Paquier
Discussion: http://postgr.es/m/CANP8+j+vvXmruL_i2buvdhMeVv5TQu0Hm2+C5N+kdVwHJuor8w@mail.gmail.com
parent 0352c15e
...@@ -68,11 +68,9 @@ static bool SubTransPagePrecedes(int page1, int page2); ...@@ -68,11 +68,9 @@ static bool SubTransPagePrecedes(int page1, int page2);
/* /*
* Record the parent of a subtransaction in the subtrans log. * Record the parent of a subtransaction in the subtrans log.
*
* In some cases we may need to overwrite an existing value.
*/ */
void void
SubTransSetParent(TransactionId xid, TransactionId parent, bool overwriteOK) SubTransSetParent(TransactionId xid, TransactionId parent)
{ {
int pageno = TransactionIdToPage(xid); int pageno = TransactionIdToPage(xid);
int entryno = TransactionIdToEntry(xid); int entryno = TransactionIdToEntry(xid);
...@@ -80,6 +78,7 @@ SubTransSetParent(TransactionId xid, TransactionId parent, bool overwriteOK) ...@@ -80,6 +78,7 @@ SubTransSetParent(TransactionId xid, TransactionId parent, bool overwriteOK)
TransactionId *ptr; TransactionId *ptr;
Assert(TransactionIdIsValid(parent)); Assert(TransactionIdIsValid(parent));
Assert(TransactionIdFollows(xid, parent));
LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE); LWLockAcquire(SubtransControlLock, LW_EXCLUSIVE);
...@@ -87,13 +86,17 @@ SubTransSetParent(TransactionId xid, TransactionId parent, bool overwriteOK) ...@@ -87,13 +86,17 @@ SubTransSetParent(TransactionId xid, TransactionId parent, bool overwriteOK)
ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno]; ptr = (TransactionId *) SubTransCtl->shared->page_buffer[slotno];
ptr += entryno; ptr += entryno;
/* Current state should be 0 */ /*
Assert(*ptr == InvalidTransactionId || * It's possible we'll try to set the parent xid multiple times
(*ptr == parent && overwriteOK)); * but we shouldn't ever be changing the xid from one valid xid
* to another valid xid, which would corrupt the data structure.
*ptr = parent; */
if (*ptr != parent)
SubTransCtl->shared->page_dirty[slotno] = true; {
Assert(*ptr == InvalidTransactionId);
*ptr = parent;
SubTransCtl->shared->page_dirty[slotno] = true;
}
LWLockRelease(SubtransControlLock); LWLockRelease(SubtransControlLock);
} }
...@@ -157,6 +160,15 @@ SubTransGetTopmostTransaction(TransactionId xid) ...@@ -157,6 +160,15 @@ SubTransGetTopmostTransaction(TransactionId xid)
if (TransactionIdPrecedes(parentXid, TransactionXmin)) if (TransactionIdPrecedes(parentXid, TransactionXmin))
break; break;
parentXid = SubTransGetParent(parentXid); parentXid = SubTransGetParent(parentXid);
/*
* By convention the parent xid gets allocated first, so should
* always precede the child xid. Anything else points to a corrupted
* data structure that could lead to an infinite loop, so exit.
*/
if (!TransactionIdPrecedes(parentXid, previousXid))
elog(ERROR, "pg_subtrans contains invalid entry: xid %u points to parent xid %u",
previousXid, parentXid);
} }
Assert(TransactionIdIsValid(previousXid)); Assert(TransactionIdIsValid(previousXid));
......
...@@ -221,8 +221,7 @@ static void RemoveGXact(GlobalTransaction gxact); ...@@ -221,8 +221,7 @@ static void RemoveGXact(GlobalTransaction gxact);
static void XlogReadTwoPhaseData(XLogRecPtr lsn, char **buf, int *len); static void XlogReadTwoPhaseData(XLogRecPtr lsn, char **buf, int *len);
static char *ProcessTwoPhaseBuffer(TransactionId xid, static char *ProcessTwoPhaseBuffer(TransactionId xid,
XLogRecPtr prepare_start_lsn, XLogRecPtr prepare_start_lsn,
bool fromdisk, bool overwriteOK, bool setParent, bool fromdisk, bool setParent, bool setNextXid);
bool setNextXid);
static void MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid, static void MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid,
const char *gid, TimestampTz prepared_at, Oid owner, const char *gid, TimestampTz prepared_at, Oid owner,
Oid databaseid); Oid databaseid);
...@@ -1743,8 +1742,7 @@ restoreTwoPhaseData(void) ...@@ -1743,8 +1742,7 @@ restoreTwoPhaseData(void)
xid = (TransactionId) strtoul(clde->d_name, NULL, 16); xid = (TransactionId) strtoul(clde->d_name, NULL, 16);
buf = ProcessTwoPhaseBuffer(xid, InvalidXLogRecPtr, buf = ProcessTwoPhaseBuffer(xid, InvalidXLogRecPtr,
true, false, false, true, false, false);
false);
if (buf == NULL) if (buf == NULL)
continue; continue;
...@@ -1804,8 +1802,7 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p) ...@@ -1804,8 +1802,7 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
buf = ProcessTwoPhaseBuffer(xid, buf = ProcessTwoPhaseBuffer(xid,
gxact->prepare_start_lsn, gxact->prepare_start_lsn,
gxact->ondisk, false, false, gxact->ondisk, false, true);
true);
if (buf == NULL) if (buf == NULL)
continue; continue;
...@@ -1858,12 +1855,12 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p) ...@@ -1858,12 +1855,12 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
* This is never called at the end of recovery - we use * This is never called at the end of recovery - we use
* RecoverPreparedTransactions() at that point. * RecoverPreparedTransactions() at that point.
* *
* Currently we simply call SubTransSetParent() for any subxids of prepared * The lack of calls to SubTransSetParent() calls here is by design;
* transactions. If overwriteOK is true, it's OK if some XIDs have already * those calls are made by RecoverPreparedTransactions() at the end of recovery
* been marked in pg_subtrans. * for those xacts that need this.
*/ */
void void
StandbyRecoverPreparedTransactions(bool overwriteOK) StandbyRecoverPreparedTransactions(void)
{ {
int i; int i;
...@@ -1880,8 +1877,7 @@ StandbyRecoverPreparedTransactions(bool overwriteOK) ...@@ -1880,8 +1877,7 @@ StandbyRecoverPreparedTransactions(bool overwriteOK)
buf = ProcessTwoPhaseBuffer(xid, buf = ProcessTwoPhaseBuffer(xid,
gxact->prepare_start_lsn, gxact->prepare_start_lsn,
gxact->ondisk, overwriteOK, true, gxact->ondisk, false, false);
false);
if (buf != NULL) if (buf != NULL)
pfree(buf); pfree(buf);
} }
...@@ -1895,6 +1891,13 @@ StandbyRecoverPreparedTransactions(bool overwriteOK) ...@@ -1895,6 +1891,13 @@ StandbyRecoverPreparedTransactions(bool overwriteOK)
* each prepared transaction (reacquire locks, etc). * each prepared transaction (reacquire locks, etc).
* *
* This is run during database startup. * This is run during database startup.
*
* At the end of recovery the way we take snapshots will change. We now need
* to mark all running transactions with their full SubTransSetParent() info
* to allow normal snapshots to work correctly if snapshots overflow.
* We do this here because by definition prepared transactions are the only
* type of write transaction still running, so this is necessary and
* complete.
*/ */
void void
RecoverPreparedTransactions(void) RecoverPreparedTransactions(void)
...@@ -1913,15 +1916,21 @@ RecoverPreparedTransactions(void) ...@@ -1913,15 +1916,21 @@ RecoverPreparedTransactions(void)
TwoPhaseFileHeader *hdr; TwoPhaseFileHeader *hdr;
TransactionId *subxids; TransactionId *subxids;
const char *gid; const char *gid;
bool overwriteOK = false;
int i;
xid = gxact->xid; xid = gxact->xid;
/*
* Reconstruct subtrans state for the transaction --- needed
* because pg_subtrans is not preserved over a restart. Note that
* we are linking all the subtransactions directly to the
* top-level XID; there may originally have been a more complex
* hierarchy, but there's no need to restore that exactly.
* It's possible that SubTransSetParent has been set before, if
* the prepared transaction generated xid assignment records.
*/
buf = ProcessTwoPhaseBuffer(xid, buf = ProcessTwoPhaseBuffer(xid,
gxact->prepare_start_lsn, gxact->prepare_start_lsn,
gxact->ondisk, false, false, gxact->ondisk, true, false);
false);
if (buf == NULL) if (buf == NULL)
continue; continue;
...@@ -1939,25 +1948,6 @@ RecoverPreparedTransactions(void) ...@@ -1939,25 +1948,6 @@ RecoverPreparedTransactions(void)
bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileNode)); bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileNode));
bufptr += MAXALIGN(hdr->ninvalmsgs * sizeof(SharedInvalidationMessage)); bufptr += MAXALIGN(hdr->ninvalmsgs * sizeof(SharedInvalidationMessage));
/*
* It's possible that SubTransSetParent has been set before, if
* the prepared transaction generated xid assignment records. Test
* here must match one used in AssignTransactionId().
*/
if (InHotStandby && (hdr->nsubxacts >= PGPROC_MAX_CACHED_SUBXIDS ||
XLogLogicalInfoActive()))
overwriteOK = true;
/*
* Reconstruct subtrans state for the transaction --- needed
* because pg_subtrans is not preserved over a restart. Note that
* we are linking all the subtransactions directly to the
* top-level XID; there may originally have been a more complex
* hierarchy, but there's no need to restore that exactly.
*/
for (i = 0; i < hdr->nsubxacts; i++)
SubTransSetParent(subxids[i], xid, true);
/* /*
* Recreate its GXACT and dummy PGPROC. But, check whether * Recreate its GXACT and dummy PGPROC. But, check whether
* it was added in redo and already has a shmem entry for * it was added in redo and already has a shmem entry for
...@@ -2006,8 +1996,7 @@ RecoverPreparedTransactions(void) ...@@ -2006,8 +1996,7 @@ RecoverPreparedTransactions(void)
* Given a transaction id, read it either from disk or read it directly * Given a transaction id, read it either from disk or read it directly
* via shmem xlog record pointer using the provided "prepare_start_lsn". * via shmem xlog record pointer using the provided "prepare_start_lsn".
* *
* If setParent is true, then use the overwriteOK parameter to set up * If setParent is true, set up subtransaction parent linkages.
* subtransaction parent linkages.
* *
* If setNextXid is true, set ShmemVariableCache->nextXid to the newest * If setNextXid is true, set ShmemVariableCache->nextXid to the newest
* value scanned. * value scanned.
...@@ -2015,7 +2004,7 @@ RecoverPreparedTransactions(void) ...@@ -2015,7 +2004,7 @@ RecoverPreparedTransactions(void)
static char * static char *
ProcessTwoPhaseBuffer(TransactionId xid, ProcessTwoPhaseBuffer(TransactionId xid,
XLogRecPtr prepare_start_lsn, XLogRecPtr prepare_start_lsn,
bool fromdisk, bool overwriteOK, bool fromdisk,
bool setParent, bool setNextXid) bool setParent, bool setNextXid)
{ {
TransactionId origNextXid = ShmemVariableCache->nextXid; TransactionId origNextXid = ShmemVariableCache->nextXid;
...@@ -2142,7 +2131,7 @@ ProcessTwoPhaseBuffer(TransactionId xid, ...@@ -2142,7 +2131,7 @@ ProcessTwoPhaseBuffer(TransactionId xid,
} }
if (setParent) if (setParent)
SubTransSetParent(subxid, xid, overwriteOK); SubTransSetParent(subxid, xid);
} }
return buf; return buf;
......
...@@ -559,7 +559,7 @@ AssignTransactionId(TransactionState s) ...@@ -559,7 +559,7 @@ AssignTransactionId(TransactionState s)
XactTopTransactionId = s->transactionId; XactTopTransactionId = s->transactionId;
if (isSubXact) if (isSubXact)
SubTransSetParent(s->transactionId, s->parent->transactionId, false); SubTransSetParent(s->transactionId, s->parent->transactionId);
/* /*
* If it's a top-level transaction, the predicate locking system needs to * If it's a top-level transaction, the predicate locking system needs to
......
...@@ -6930,7 +6930,7 @@ StartupXLOG(void) ...@@ -6930,7 +6930,7 @@ StartupXLOG(void)
ProcArrayApplyRecoveryInfo(&running); ProcArrayApplyRecoveryInfo(&running);
StandbyRecoverPreparedTransactions(false); StandbyRecoverPreparedTransactions();
} }
} }
...@@ -9692,7 +9692,7 @@ xlog_redo(XLogReaderState *record) ...@@ -9692,7 +9692,7 @@ xlog_redo(XLogReaderState *record)
ProcArrayApplyRecoveryInfo(&running); ProcArrayApplyRecoveryInfo(&running);
StandbyRecoverPreparedTransactions(true); StandbyRecoverPreparedTransactions();
} }
/* ControlFile->checkPointCopy always tracks the latest ckpt XID */ /* ControlFile->checkPointCopy always tracks the latest ckpt XID */
......
...@@ -943,7 +943,7 @@ ProcArrayApplyXidAssignment(TransactionId topxid, ...@@ -943,7 +943,7 @@ ProcArrayApplyXidAssignment(TransactionId topxid,
* have attempted to SubTransSetParent(). * have attempted to SubTransSetParent().
*/ */
for (i = 0; i < nsubxids; i++) for (i = 0; i < nsubxids; i++)
SubTransSetParent(subxids[i], topxid, false); SubTransSetParent(subxids[i], topxid);
/* KnownAssignedXids isn't maintained yet, so we're done for now */ /* KnownAssignedXids isn't maintained yet, so we're done for now */
if (standbyState == STANDBY_INITIALIZED) if (standbyState == STANDBY_INITIALIZED)
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
/* Number of SLRU buffers to use for subtrans */ /* Number of SLRU buffers to use for subtrans */
#define NUM_SUBTRANS_BUFFERS 32 #define NUM_SUBTRANS_BUFFERS 32
extern void SubTransSetParent(TransactionId xid, TransactionId parent, bool overwriteOK); extern void SubTransSetParent(TransactionId xid, TransactionId parent);
extern TransactionId SubTransGetParent(TransactionId xid); extern TransactionId SubTransGetParent(TransactionId xid);
extern TransactionId SubTransGetTopmostTransaction(TransactionId xid); extern TransactionId SubTransGetTopmostTransaction(TransactionId xid);
......
...@@ -46,7 +46,7 @@ extern bool StandbyTransactionIdIsPrepared(TransactionId xid); ...@@ -46,7 +46,7 @@ extern bool StandbyTransactionIdIsPrepared(TransactionId xid);
extern TransactionId PrescanPreparedTransactions(TransactionId **xids_p, extern TransactionId PrescanPreparedTransactions(TransactionId **xids_p,
int *nxids_p); int *nxids_p);
extern void StandbyRecoverPreparedTransactions(bool overwriteOK); extern void StandbyRecoverPreparedTransactions(void);
extern void RecoverPreparedTransactions(void); extern void RecoverPreparedTransactions(void);
extern void CheckPointTwoPhase(XLogRecPtr redo_horizon); extern void CheckPointTwoPhase(XLogRecPtr redo_horizon);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment