Commit ff8f160b authored by Heikki Linnakangas's avatar Heikki Linnakangas

Put the logic to wait for WAL in standby mode to a separate function.

This is just refactoring with no user-visible effect, to make the code more
readable.
parent f7491616
......@@ -157,6 +157,9 @@ HotStandbyState standbyState = STANDBY_DISABLED;
static XLogRecPtr LastRec;
/* Local copy of WalRcv->receivedUpto */
static XLogRecPtr receivedUpto = 0;
/*
* During recovery, lastFullPageWrites keeps track of full_page_writes that
* the replayed WAL records indicate. It's initialized with full_page_writes
......@@ -538,6 +541,7 @@ static int readFile = -1;
static XLogSegNo readSegNo = 0;
static uint32 readOff = 0;
static uint32 readLen = 0;
static bool readFileHeaderValidated = false;
static int readSource = 0; /* XLOG_FROM_* code */
/*
......@@ -628,6 +632,8 @@ static int XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli,
static int XLogFileReadAnyTLI(XLogSegNo segno, int emode, int sources);
static bool XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
bool randAccess);
static bool WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
bool fetching_ckpt);
static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr);
static void XLogFileClose(void);
static void PreallocXlogFiles(XLogRecPtr endptr);
......@@ -2685,6 +2691,9 @@ XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli,
if (source != XLOG_FROM_STREAM)
XLogReceiptTime = GetCurrentTimestamp();
/* The file header needs to be validated on first access */
readFileHeaderValidated = false;
return fd;
}
if (errno != ENOENT || !notfoundOk) /* unexpected failure? */
......@@ -9233,12 +9242,9 @@ static bool
XLogPageRead(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt,
bool randAccess)
{
static XLogRecPtr receivedUpto = 0;
bool switched_segment = false;
uint32 targetPageOff;
uint32 targetRecOff;
XLogSegNo targetSegNo;
static pg_time_t last_fail_time = 0;
XLByteToSeg(*RecPtr, targetSegNo);
targetPageOff = (((*RecPtr) % XLogSegSize) / XLOG_BLCKSZ) * XLOG_BLCKSZ;
......@@ -9282,12 +9288,156 @@ retry:
(readSource == XLOG_FROM_STREAM && !XLByteLT(*RecPtr, receivedUpto)))
{
if (StandbyMode)
{
if (!WaitForWALToBecomeAvailable(*RecPtr, randAccess,
fetching_ckpt))
goto triggered;
}
else
{
/* In archive or crash recovery. */
if (readFile < 0)
{
int sources;
/* Reset curFileTLI if random fetch. */
if (randAccess)
curFileTLI = 0;
sources = XLOG_FROM_PG_XLOG;
if (InArchiveRecovery)
sources |= XLOG_FROM_ARCHIVE;
readFile = XLogFileReadAnyTLI(readSegNo, emode, sources);
if (readFile < 0)
return false;
}
}
}
/*
* At this point, we have the right segment open and if we're streaming we
* know the requested record is in it.
*/
Assert(readFile != -1);
/*
* If the current segment is being streamed from master, calculate how
* much of the current page we have received already. We know the
* requested record has been received, but this is for the benefit of
* future calls, to allow quick exit at the top of this function.
*/
if (readSource == XLOG_FROM_STREAM)
{
if (((*RecPtr) / XLOG_BLCKSZ) != (receivedUpto / XLOG_BLCKSZ))
{
readLen = XLOG_BLCKSZ;
}
else
readLen = receivedUpto % XLogSegSize - targetPageOff;
}
else
readLen = XLOG_BLCKSZ;
if (!readFileHeaderValidated && targetPageOff != 0)
{
/*
* In standby mode, wait for the requested record to become
* available, either via restore_command succeeding to restore the
* segment, or via walreceiver having streamed the record.
* Whenever switching to a new WAL segment, we read the first page of
* the file and validate its header, even if that's not where the
* target record is. This is so that we can check the additional
* identification info that is present in the first page's "long"
* header.
*/
readOff = 0;
if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
{
char fname[MAXFNAMELEN];
XLogFileName(fname, curFileTLI, readSegNo);
ereport(emode_for_corrupt_record(emode, *RecPtr),
(errcode_for_file_access(),
errmsg("could not read from log segment %s, offset %u: %m",
fname, readOff)));
goto next_record_is_invalid;
}
if (!ValidXLogPageHeader((XLogPageHeader) readBuf, emode))
goto next_record_is_invalid;
}
/* Read the requested page */
readOff = targetPageOff;
if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0)
{
char fname[MAXFNAMELEN];
XLogFileName(fname, curFileTLI, readSegNo);
ereport(emode_for_corrupt_record(emode, *RecPtr),
(errcode_for_file_access(),
errmsg("could not seek in log segment %s to offset %u: %m",
fname, readOff)));
goto next_record_is_invalid;
}
if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
{
char fname[MAXFNAMELEN];
XLogFileName(fname, curFileTLI, readSegNo);
ereport(emode_for_corrupt_record(emode, *RecPtr),
(errcode_for_file_access(),
errmsg("could not read from log segment %s, offset %u: %m",
fname, readOff)));
goto next_record_is_invalid;
}
if (!ValidXLogPageHeader((XLogPageHeader) readBuf, emode))
goto next_record_is_invalid;
readFileHeaderValidated = true;
Assert(targetSegNo == readSegNo);
Assert(targetPageOff == readOff);
Assert(targetRecOff < readLen);
return true;
next_record_is_invalid:
failedSources |= readSource;
if (readFile >= 0)
close(readFile);
readFile = -1;
readLen = 0;
readSource = 0;
/* In standby-mode, keep trying */
if (StandbyMode)
goto retry;
else
return false;
triggered:
if (readFile >= 0)
close(readFile);
readFile = -1;
readLen = 0;
readSource = 0;
return false;
}
/*
* In standby mode, wait for the requested record to become available, either
* via restore_command succeeding to restore the segment, or via walreceiver
* having streamed the record (or via someone copying the segment directly to
* pg_xlog, but that is not documented or recommended).
*
* When the requested record becomes available, the function opens the file
* containing it (if not open already), and returns true. When end of standby
* mode is triggered by the user, and there is no more WAL available, returns
* false.
*/
static bool
WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
bool fetching_ckpt)
{
static pg_time_t last_fail_time = 0;
for (;;)
{
if (WalRcvInProgress())
......@@ -9295,14 +9445,13 @@ retry:
bool havedata;
/*
* If we find an invalid record in the WAL streamed from
* master, something is seriously wrong. There's little
* chance that the problem will just go away, but PANIC is
* not good for availability either, especially in hot
* standby mode. Disconnect, and retry from
* archive/pg_xlog again. The WAL in the archive should be
* identical to what was streamed, so it's unlikely that
* it helps, but one can hope...
* If we find an invalid record in the WAL streamed from master,
* something is seriously wrong. There's little chance that the
* problem will just go away, but PANIC is not good for
* availability either, especially in hot standby mode.
* Disconnect, and retry from archive/pg_xlog again. The WAL in
* the archive should be identical to what was streamed, so it's
* unlikely that it helps, but one can hope...
*/
if (failedSources & XLOG_FROM_STREAM)
{
......@@ -9313,26 +9462,25 @@ retry:
/*
* Walreceiver is active, so see if new data has arrived.
*
* We only advance XLogReceiptTime when we obtain fresh
* WAL from walreceiver and observe that we had already
* processed everything before the most recent "chunk"
* that it flushed to disk. In steady state where we are
* keeping up with the incoming data, XLogReceiptTime will
* be updated on each cycle. When we are behind,
* XLogReceiptTime will not advance, so the grace time
* alloted to conflicting queries will decrease.
* We only advance XLogReceiptTime when we obtain fresh WAL from
* walreceiver and observe that we had already processed
* everything before the most recent "chunk" that it flushed to
* disk. In steady state where we are keeping up with the
* incoming data, XLogReceiptTime will be updated on each cycle.
* When we are behind, XLogReceiptTime will not advance, so the
* grace time allotted to conflicting queries will decrease.
*/
if (XLByteLT(*RecPtr, receivedUpto))
if (XLByteLT(RecPtr, receivedUpto))
havedata = true;
else
{
XLogRecPtr latestChunkStart;
receivedUpto = GetWalRcvWriteRecPtr(&latestChunkStart);
if (XLByteLT(*RecPtr, receivedUpto))
if (XLByteLT(RecPtr, receivedUpto))
{
havedata = true;
if (!XLByteLT(*RecPtr, latestChunkStart))
if (!XLByteLT(RecPtr, latestChunkStart))
{
XLogReceiptTime = GetCurrentTimestamp();
SetCurrentChunkStartTime(XLogReceiptTime);
......@@ -9344,19 +9492,16 @@ retry:
if (havedata)
{
/*
* Great, streamed far enough. Open the file if it's
* not open already. Use XLOG_FROM_STREAM so that
* source info is set correctly and XLogReceiptTime
* isn't changed.
* Great, streamed far enough. Open the file if it's not open
* already. Use XLOG_FROM_STREAM so that source info is set
* correctly and XLogReceiptTime isn't changed.
*/
if (readFile < 0)
{
readFile =
XLogFileRead(readSegNo, PANIC,
recoveryTargetTLI,
readFile = XLogFileRead(readSegNo, PANIC,
curFileTLI,
XLOG_FROM_STREAM, false);
Assert(readFile >= 0);
switched_segment = true;
}
else
{
......@@ -9368,11 +9513,11 @@ retry:
}
/*
* Data not here yet, so check for trigger then sleep for
* five seconds like in the WAL file polling case below.
* Data not here yet, so check for trigger then sleep for five
* seconds like in the WAL file polling case below.
*/
if (CheckForStandbyTrigger())
goto retry;
return false;
/*
* Wait for more WAL to arrive, or timeout to be reached
......@@ -9384,13 +9529,12 @@ retry:
}
else
{
/*
* WAL receiver is not active. Poll the archive.
*/
int sources;
pg_time_t now;
/*
* Until walreceiver manages to reconnect, poll the
* archive.
*/
if (readFile >= 0)
{
close(readFile);
......@@ -9401,22 +9545,20 @@ retry:
curFileTLI = 0;
/*
* Try to restore the file from archive, or read an
* existing file from pg_xlog.
* Try to restore the file from archive, or read an existing file
* from pg_xlog.
*/
sources = XLOG_FROM_ARCHIVE | XLOG_FROM_PG_XLOG;
if (!(sources & ~failedSources))
{
/*
* We've exhausted all options for retrieving the
* file. Retry.
* We've exhausted all options for retrieving the file. Retry.
*/
failedSources = 0;
/*
* Before we sleep, re-scan for possible new timelines
* if we were requested to recover to the latest
* timeline.
* Before we sleep, re-scan for possible new timelines if we
* were requested to recover to the latest timeline.
*/
if (recoveryTargetIsLatest)
{
......@@ -9425,8 +9567,8 @@ retry:
}
/*
* If it hasn't been long since last attempt, sleep to
* avoid busy-waiting.
* If it hasn't been long since last attempt, sleep to avoid
* busy-waiting.
*/
now = (pg_time_t) time(NULL);
if ((now - last_fail_time) < 5)
......@@ -9437,30 +9579,27 @@ retry:
last_fail_time = now;
/*
* If primary_conninfo is set, launch walreceiver to
* try to stream the missing WAL, before retrying to
* restore from archive/pg_xlog.
* If primary_conninfo is set, launch walreceiver to try to
* stream the missing WAL, before retrying to restore from
* archive/pg_xlog.
*
* If fetching_ckpt is TRUE, RecPtr points to the
* initial checkpoint location. In that case, we use
* RedoStartLSN as the streaming start position
* instead of RecPtr, so that when we later jump
* backwards to start redo at RedoStartLSN, we will
* have the logs streamed already.
* If fetching_ckpt is TRUE, RecPtr points to the initial
* checkpoint location. In that case, we use RedoStartLSN as
* the streaming start position instead of RecPtr, so that
* when we later jump backwards to start redo at RedoStartLSN,
* we will have the logs streamed already.
*/
if (PrimaryConnInfo)
{
RequestXLogStreaming(
fetching_ckpt ? RedoStartLSN : *RecPtr,
PrimaryConnInfo);
XLogRecPtr ptr = fetching_ckpt ? RedoStartLSN : RecPtr;
RequestXLogStreaming(ptr, PrimaryConnInfo);
continue;
}
}
/* Don't try to read from a source that just failed */
sources &= ~failedSources;
readFile = XLogFileReadAnyTLI(readSegNo, DEBUG2,
sources);
switched_segment = true;
readFile = XLogFileReadAnyTLI(readSegNo, DEBUG2, sources);
if (readFile >= 0)
break;
......@@ -9470,147 +9609,23 @@ retry:
failedSources |= sources;
/*
* Check to see if the trigger file exists. Note that we
* do this only after failure, so when you create the
* trigger file, we still finish replaying as much as we
* can from archive and pg_xlog before failover.
* Check to see if the trigger file exists. Note that we do this
* only after failure, so when you create the trigger file, we
* still finish replaying as much as we can from archive and
* pg_xlog before failover.
*/
if (CheckForStandbyTrigger())
goto triggered;
}
/*
* This possibly-long loop needs to handle interrupts of
* startup process.
*/
HandleStartupProcInterrupts();
}
}
else
{
/* In archive or crash recovery. */
if (readFile < 0)
{
int sources;
/* Reset curFileTLI if random fetch. */
if (randAccess)
curFileTLI = 0;
sources = XLOG_FROM_PG_XLOG;
if (InArchiveRecovery)
sources |= XLOG_FROM_ARCHIVE;
readFile = XLogFileReadAnyTLI(readSegNo, emode, sources);
switched_segment = true;
if (readFile < 0)
return false;
}
}
}
/*
* At this point, we have the right segment open and if we're streaming we
* know the requested record is in it.
*/
Assert(readFile != -1);
/*
* If the current segment is being streamed from master, calculate how
* much of the current page we have received already. We know the
* requested record has been received, but this is for the benefit of
* future calls, to allow quick exit at the top of this function.
*/
if (readSource == XLOG_FROM_STREAM)
{
if (((*RecPtr) / XLOG_BLCKSZ) != (receivedUpto / XLOG_BLCKSZ))
{
readLen = XLOG_BLCKSZ;
}
else
readLen = receivedUpto % XLogSegSize - targetPageOff;
}
else
readLen = XLOG_BLCKSZ;
if (switched_segment && targetPageOff != 0)
{
/*
* Whenever switching to a new WAL segment, we read the first page of
* the file and validate its header, even if that's not where the
* target record is. This is so that we can check the additional
* identification info that is present in the first page's "long"
* header.
* This possibly-long loop needs to handle interrupts of startup
* process.
*/
readOff = 0;
if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
{
char fname[MAXFNAMELEN];
XLogFileName(fname, curFileTLI, readSegNo);
ereport(emode_for_corrupt_record(emode, *RecPtr),
(errcode_for_file_access(),
errmsg("could not read from log segment %s, offset %u: %m",
fname, readOff)));
goto next_record_is_invalid;
}
if (!ValidXLogPageHeader((XLogPageHeader) readBuf, emode))
goto next_record_is_invalid;
}
/* Read the requested page */
readOff = targetPageOff;
if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0)
{
char fname[MAXFNAMELEN];
XLogFileName(fname, curFileTLI, readSegNo);
ereport(emode_for_corrupt_record(emode, *RecPtr),
(errcode_for_file_access(),
errmsg("could not seek in log segment %s to offset %u: %m",
fname, readOff)));
goto next_record_is_invalid;
}
if (read(readFile, readBuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
{
char fname[MAXFNAMELEN];
XLogFileName(fname, curFileTLI, readSegNo);
ereport(emode_for_corrupt_record(emode, *RecPtr),
(errcode_for_file_access(),
errmsg("could not read from log segment %s, offset %u: %m",
fname, readOff)));
goto next_record_is_invalid;
HandleStartupProcInterrupts();
}
if (!ValidXLogPageHeader((XLogPageHeader) readBuf, emode))
goto next_record_is_invalid;
Assert(targetSegNo == readSegNo);
Assert(targetPageOff == readOff);
Assert(targetRecOff < readLen);
return true;
next_record_is_invalid:
failedSources |= readSource;
if (readFile >= 0)
close(readFile);
readFile = -1;
readLen = 0;
readSource = 0;
/* In standby-mode, keep trying */
if (StandbyMode)
goto retry;
else
return false;
triggered:
if (readFile >= 0)
close(readFile);
readFile = -1;
readLen = 0;
readSource = 0;
return false;
}
/*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment