Commit 970fb12d authored by Heikki Linnakangas's avatar Heikki Linnakangas

Consistency check should compare last record replayed, not last record read.

EndRecPtr is the last record that we've read, but not necessarily yet
replayed. CheckRecoveryConsistency should compare minRecoveryPoint with the
last replayed record instead. This caused recovery to think it's reached
consistency too early.

Now that we do the check in CheckRecoveryConsistency correctly, we have to
move the call of that function to after redoing a record. The current place,
after reading a record but before replaying it, is wrong. In particular, if
there are no more records after the one ending at minRecoveryPoint, we don't
enter hot standby until one extra record is generated and read by the
standby, and CheckRecoveryConsistency is called. These two bugs conspired
to make the code appear to work correctly, except for the small window
between reading the last record that reaches minRecoveryPoint, and
replaying it.

In the passing, rename recoveryLastRecPtr, which is the last record
replayed, to lastReplayedEndRecPtr. This makes it slightly less confusing
with replayEndRecPtr, which is the last record read that we're about to
replay.

Original report from Kyotaro HORIGUCHI, further diagnosis by Fujii Masao.
Backpatch to 9.0, where Hot Standby subtly changed the test from
"minRecoveryPoint < EndRecPtr" to "minRecoveryPoint <= EndRecPtr". The
former works because where the test is performed, we have always read one
more record than we've replayed.
parent ad69bd05
...@@ -445,11 +445,15 @@ typedef struct XLogCtlData ...@@ -445,11 +445,15 @@ typedef struct XLogCtlData
XLogRecPtr lastCheckPointRecPtr; XLogRecPtr lastCheckPointRecPtr;
CheckPoint lastCheckPoint; CheckPoint lastCheckPoint;
/* end+1 of the last record replayed (or being replayed) */ /*
* lastReplayedEndRecPtr points to end+1 of the last record successfully
* replayed. When we're currently replaying a record, ie. in a redo
* function, replayEndRecPtr points to the end+1 of the record being
* replayed, otherwise it's equal to lastReplayedEndRecPtr.
*/
XLogRecPtr lastReplayedEndRecPtr;
XLogRecPtr replayEndRecPtr; XLogRecPtr replayEndRecPtr;
TimeLineID replayEndTLI; TimeLineID replayEndTLI;
/* end+1 of the last record replayed */
XLogRecPtr recoveryLastRecPtr;
/* timestamp of last COMMIT/ABORT record replayed (or being replayed) */ /* timestamp of last COMMIT/ABORT record replayed (or being replayed) */
TimestampTz recoveryLastXTime; TimestampTz recoveryLastXTime;
/* current effective recovery target timeline */ /* current effective recovery target timeline */
...@@ -5745,7 +5749,7 @@ StartupXLOG(void) ...@@ -5745,7 +5749,7 @@ StartupXLOG(void)
} }
/* /*
* Initialize shared replayEndRecPtr, recoveryLastRecPtr, and * Initialize shared replayEndRecPtr, lastReplayedEndRecPtr, and
* recoveryLastXTime. * recoveryLastXTime.
* *
* This is slightly confusing if we're starting from an online * This is slightly confusing if we're starting from an online
...@@ -5759,7 +5763,7 @@ StartupXLOG(void) ...@@ -5759,7 +5763,7 @@ StartupXLOG(void)
SpinLockAcquire(&xlogctl->info_lck); SpinLockAcquire(&xlogctl->info_lck);
xlogctl->replayEndRecPtr = ReadRecPtr; xlogctl->replayEndRecPtr = ReadRecPtr;
xlogctl->replayEndTLI = ThisTimeLineID; xlogctl->replayEndTLI = ThisTimeLineID;
xlogctl->recoveryLastRecPtr = EndRecPtr; xlogctl->lastReplayedEndRecPtr = EndRecPtr;
xlogctl->recoveryLastXTime = 0; xlogctl->recoveryLastXTime = 0;
xlogctl->currentChunkStartTime = 0; xlogctl->currentChunkStartTime = 0;
xlogctl->recoveryPause = false; xlogctl->recoveryPause = false;
...@@ -5851,9 +5855,6 @@ StartupXLOG(void) ...@@ -5851,9 +5855,6 @@ StartupXLOG(void)
/* Handle interrupt signals of startup process */ /* Handle interrupt signals of startup process */
HandleStartupProcInterrupts(); HandleStartupProcInterrupts();
/* Allow read-only connections if we're consistent now */
CheckRecoveryConsistency();
/* /*
* Pause WAL replay, if requested by a hot-standby session via * Pause WAL replay, if requested by a hot-standby session via
* SetRecoveryPause(). * SetRecoveryPause().
...@@ -5983,16 +5984,19 @@ StartupXLOG(void) ...@@ -5983,16 +5984,19 @@ StartupXLOG(void)
} }
/* /*
* Update shared recoveryLastRecPtr after this record has been * Update lastReplayedEndRecPtr after this record has been
* replayed. * successfully replayed.
*/ */
SpinLockAcquire(&xlogctl->info_lck); SpinLockAcquire(&xlogctl->info_lck);
xlogctl->recoveryLastRecPtr = EndRecPtr; xlogctl->lastReplayedEndRecPtr = EndRecPtr;
SpinLockRelease(&xlogctl->info_lck); SpinLockRelease(&xlogctl->info_lck);
/* Remember this record as the last-applied one */ /* Remember this record as the last-applied one */
LastRec = ReadRecPtr; LastRec = ReadRecPtr;
/* Allow read-only connections if we're consistent now */
CheckRecoveryConsistency();
/* Exit loop if we reached inclusive recovery target */ /* Exit loop if we reached inclusive recovery target */
if (!recoveryContinue) if (!recoveryContinue)
break; break;
...@@ -6383,10 +6387,11 @@ CheckRecoveryConsistency(void) ...@@ -6383,10 +6387,11 @@ CheckRecoveryConsistency(void)
* Have we passed our safe starting point? Note that minRecoveryPoint * Have we passed our safe starting point? Note that minRecoveryPoint
* is known to be incorrectly set if ControlFile->backupEndRequired, * is known to be incorrectly set if ControlFile->backupEndRequired,
* until the XLOG_BACKUP_RECORD arrives to advise us of the correct * until the XLOG_BACKUP_RECORD arrives to advise us of the correct
* minRecoveryPoint. All we prior to that is its not consistent yet. * minRecoveryPoint. All we know prior to that is that we're not
* consistent yet.
*/ */
if (!reachedConsistency && !ControlFile->backupEndRequired && if (!reachedConsistency && !ControlFile->backupEndRequired &&
XLByteLE(minRecoveryPoint, EndRecPtr) && XLByteLE(minRecoveryPoint, XLogCtl->lastReplayedEndRecPtr) &&
XLogRecPtrIsInvalid(ControlFile->backupStartPoint)) XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
{ {
/* /*
...@@ -6398,7 +6403,8 @@ CheckRecoveryConsistency(void) ...@@ -6398,7 +6403,8 @@ CheckRecoveryConsistency(void)
reachedConsistency = true; reachedConsistency = true;
ereport(LOG, ereport(LOG,
(errmsg("consistent recovery state reached at %X/%X", (errmsg("consistent recovery state reached at %X/%X",
(uint32) (EndRecPtr >> 32), (uint32) EndRecPtr))); (uint32) (XLogCtl->lastReplayedEndRecPtr >> 32),
(uint32) XLogCtl->lastReplayedEndRecPtr)));
} }
/* /*
...@@ -9094,7 +9100,7 @@ GetXLogReplayRecPtr(TimeLineID *targetTLI) ...@@ -9094,7 +9100,7 @@ GetXLogReplayRecPtr(TimeLineID *targetTLI)
XLogRecPtr recptr; XLogRecPtr recptr;
SpinLockAcquire(&xlogctl->info_lck); SpinLockAcquire(&xlogctl->info_lck);
recptr = xlogctl->recoveryLastRecPtr; recptr = xlogctl->lastReplayedEndRecPtr;
if (targetTLI) if (targetTLI)
*targetTLI = xlogctl->RecoveryTargetTLI; *targetTLI = xlogctl->RecoveryTargetTLI;
SpinLockRelease(&xlogctl->info_lck); SpinLockRelease(&xlogctl->info_lck);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment