Commit 2ff65553 authored by Heikki Linnakangas's avatar Heikki Linnakangas

Use the right timeline when beginning to stream from master.

The xlogreader refactoring broke the logic to decide which timeline to start
streaming from. XLogPageRead() uses the timeline history to check which
timeline the requested WAL position falls into. However, after the
refactoring, XLogPageRead() is always first called with the first page in
the segment, to verify the segment header, and only then with the actual WAL
position we're interested in. That first read of the segment's header made
XLogPageRead() to always start streaming from the old timeline containing
the segment header, not the timeline containing the actual record, if there
was a timeline switch within the segment.

I thought I fixed this yesterday, but that fix was too narrow and only fixed
this for the corner-case that the timeline switch happened in the first page
of the segment. To fix this more robustly, pass explicitly the position of
the record we're actually interested in to XLogPageRead, and use that to
decide which timeline to read from, rather than deduce it from the page and
offset.

Per report from Fujii Masao.
parent 88228e6f
...@@ -626,9 +626,10 @@ static int XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli, ...@@ -626,9 +626,10 @@ static int XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli,
int source, bool notexistOk); int source, bool notexistOk);
static int XLogFileReadAnyTLI(XLogSegNo segno, int emode, int source); static int XLogFileReadAnyTLI(XLogSegNo segno, int emode, int source);
static int XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, static int XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
int reqLen, char *readBuf, TimeLineID *readTLI); int reqLen, XLogRecPtr targetRecPtr, char *readBuf,
TimeLineID *readTLI);
static bool WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, static bool WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
bool fetching_ckpt); bool fetching_ckpt, XLogRecPtr tliRecPtr);
static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr); static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr);
static void XLogFileClose(void); static void XLogFileClose(void);
static void PreallocXlogFiles(XLogRecPtr endptr); static void PreallocXlogFiles(XLogRecPtr endptr);
...@@ -8832,7 +8833,7 @@ CancelBackup(void) ...@@ -8832,7 +8833,7 @@ CancelBackup(void)
*/ */
static int static int
XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen, XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
char *readBuf, TimeLineID *readTLI) XLogRecPtr targetRecPtr, char *readBuf, TimeLineID *readTLI)
{ {
XLogPageReadPrivate *private = XLogPageReadPrivate *private =
(XLogPageReadPrivate *) xlogreader->private_data; (XLogPageReadPrivate *) xlogreader->private_data;
...@@ -8880,7 +8881,8 @@ retry: ...@@ -8880,7 +8881,8 @@ retry:
{ {
if (!WaitForWALToBecomeAvailable(targetPagePtr + reqLen, if (!WaitForWALToBecomeAvailable(targetPagePtr + reqLen,
private->randAccess, private->randAccess,
private->fetching_ckpt)) private->fetching_ckpt,
targetRecPtr))
goto triggered; goto triggered;
} }
/* In archive or crash recovery. */ /* In archive or crash recovery. */
...@@ -8980,11 +8982,19 @@ triggered: ...@@ -8980,11 +8982,19 @@ triggered:
} }
/* /*
* In standby mode, wait for the requested record to become available, either * In standby mode, wait for WAL at position 'RecPtr' to become available, either
* via restore_command succeeding to restore the segment, or via walreceiver * via restore_command succeeding to restore the segment, or via walreceiver
* having streamed the record (or via someone copying the segment directly to * having streamed the record (or via someone copying the segment directly to
* pg_xlog, but that is not documented or recommended). * pg_xlog, but that is not documented or recommended).
* *
* If 'fetching_ckpt' is true, we're fetching a checkpoint record, and should
* prepare to read WAL starting from RedoStartLSN after this.
*
* 'RecPtr' might not point to the beginning of the record we're interested
* in, it might also point to the page or segment header. In that case,
* 'tliRecPtr' is the position of the WAL record we're interested in. It is
* used to decide which timeline to stream the requested WAL from.
*
* When the requested record becomes available, the function opens the file * When the requested record becomes available, the function opens the file
* containing it (if not open already), and returns true. When end of standby * containing it (if not open already), and returns true. When end of standby
* mode is triggered by the user, and there is no more WAL available, returns * mode is triggered by the user, and there is no more WAL available, returns
...@@ -8992,7 +9002,7 @@ triggered: ...@@ -8992,7 +9002,7 @@ triggered:
*/ */
static bool static bool
WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
bool fetching_ckpt) bool fetching_ckpt, XLogRecPtr tliRecPtr)
{ {
static pg_time_t last_fail_time = 0; static pg_time_t last_fail_time = 0;
pg_time_t now; pg_time_t now;
...@@ -9076,7 +9086,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, ...@@ -9076,7 +9086,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
else else
{ {
ptr = RecPtr; ptr = RecPtr;
tli = tliOfPointInHistory(ptr, expectedTLEs); tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
if (curFileTLI > 0 && tli < curFileTLI) if (curFileTLI > 0 && tli < curFileTLI)
elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u", elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
......
...@@ -216,6 +216,8 @@ XLogReadRecord(XLogReaderState *state, XLogRecPtr RecPtr, char **errormsg) ...@@ -216,6 +216,8 @@ XLogReadRecord(XLogReaderState *state, XLogRecPtr RecPtr, char **errormsg)
randAccess = true; /* allow readPageTLI to go backwards too */ randAccess = true; /* allow readPageTLI to go backwards too */
} }
state->currRecPtr = RecPtr;
targetPagePtr = RecPtr - (RecPtr % XLOG_BLCKSZ); targetPagePtr = RecPtr - (RecPtr % XLOG_BLCKSZ);
targetRecOff = RecPtr % XLOG_BLCKSZ; targetRecOff = RecPtr % XLOG_BLCKSZ;
...@@ -503,6 +505,7 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen) ...@@ -503,6 +505,7 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
XLogRecPtr targetSegmentPtr = pageptr - targetPageOff; XLogRecPtr targetSegmentPtr = pageptr - targetPageOff;
readLen = state->read_page(state, targetSegmentPtr, XLOG_BLCKSZ, readLen = state->read_page(state, targetSegmentPtr, XLOG_BLCKSZ,
state->currRecPtr,
state->readBuf, &state->readPageTLI); state->readBuf, &state->readPageTLI);
if (readLen < 0) if (readLen < 0)
goto err; goto err;
...@@ -521,6 +524,7 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen) ...@@ -521,6 +524,7 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
* so that we can validate it. * so that we can validate it.
*/ */
readLen = state->read_page(state, pageptr, Max(reqLen, SizeOfXLogShortPHD), readLen = state->read_page(state, pageptr, Max(reqLen, SizeOfXLogShortPHD),
state->currRecPtr,
state->readBuf, &state->readPageTLI); state->readBuf, &state->readPageTLI);
if (readLen < 0) if (readLen < 0)
goto err; goto err;
...@@ -539,6 +543,7 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen) ...@@ -539,6 +543,7 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen)
if (readLen < XLogPageHeaderSize(hdr)) if (readLen < XLogPageHeaderSize(hdr))
{ {
readLen = state->read_page(state, pageptr, XLogPageHeaderSize(hdr), readLen = state->read_page(state, pageptr, XLogPageHeaderSize(hdr),
state->currRecPtr,
state->readBuf, &state->readPageTLI); state->readBuf, &state->readPageTLI);
if (readLen < 0) if (readLen < 0)
goto err; goto err;
......
...@@ -27,6 +27,7 @@ typedef struct XLogReaderState XLogReaderState; ...@@ -27,6 +27,7 @@ typedef struct XLogReaderState XLogReaderState;
typedef int (*XLogPageReadCB) (XLogReaderState *xlogreader, typedef int (*XLogPageReadCB) (XLogReaderState *xlogreader,
XLogRecPtr targetPagePtr, XLogRecPtr targetPagePtr,
int reqLen, int reqLen,
XLogRecPtr targetRecPtr,
char *readBuf, char *readBuf,
TimeLineID *pageTLI); TimeLineID *pageTLI);
...@@ -46,11 +47,17 @@ struct XLogReaderState ...@@ -46,11 +47,17 @@ struct XLogReaderState
* -1 on failure. The callback shall sleep, if necessary, to wait for the * -1 on failure. The callback shall sleep, if necessary, to wait for the
* requested bytes to become available. The callback will not be invoked * requested bytes to become available. The callback will not be invoked
* again for the same page unless more than the returned number of bytes * again for the same page unless more than the returned number of bytes
* are necessary. * are needed.
* *
* *pageTLI should be set to the TLI of the file the page was read from. * targetRecPtr is the position of the WAL record we're reading. Usually
* It is currently used only for error reporting purposes, to reconstruct * it is equal to targetPagePtr + reqLen, but sometimes xlogreader needs
* the name of the WAL file where an error occurred. * to read and verify the page or segment header, before it reads the
* actual WAL record it's interested in. In that case, targetRecPtr can
* be used to determine which timeline to read the page from.
*
* The callback shall set *pageTLI to the TLI of the file the page was
* read from. It is currently used only for error reporting purposes, to
* reconstruct the name of the WAL file where an error occurred.
*/ */
XLogPageReadCB read_page; XLogPageReadCB read_page;
...@@ -90,6 +97,9 @@ struct XLogReaderState ...@@ -90,6 +97,9 @@ struct XLogReaderState
XLogRecPtr latestPagePtr; XLogRecPtr latestPagePtr;
TimeLineID latestPageTLI; TimeLineID latestPageTLI;
/* beginning of the WAL record being read. */
XLogRecPtr currRecPtr;
/* Buffer for current ReadRecord result (expandable) */ /* Buffer for current ReadRecord result (expandable) */
char *readRecordBuf; char *readRecordBuf;
uint32 readRecordBufSize; uint32 readRecordBufSize;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment