From 2ff65553131d8ad2ddbbfe298fffc378f127b15e Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Fri, 18 Jan 2013 11:41:36 +0200 Subject: [PATCH] Use the right timeline when beginning to stream from master. The xlogreader refactoring broke the logic to decide which timeline to start streaming from. XLogPageRead() uses the timeline history to check which timeline the requested WAL position falls into. However, after the refactoring, XLogPageRead() is always first called with the first page in the segment, to verify the segment header, and only then with the actual WAL position we're interested in. That first read of the segment's header made XLogPageRead() to always start streaming from the old timeline containing the segment header, not the timeline containing the actual record, if there was a timeline switch within the segment. I thought I fixed this yesterday, but that fix was too narrow and only fixed this for the corner-case that the timeline switch happened in the first page of the segment. To fix this more robustly, pass explicitly the position of the record we're actually interested in to XLogPageRead, and use that to decide which timeline to read from, rather than deduce it from the page and offset. Per report from Fujii Masao. --- src/backend/access/transam/xlog.c | 24 +++++++++++++++++------- src/backend/access/transam/xlogreader.c | 5 +++++ src/include/access/xlogreader.h | 18 ++++++++++++++---- 3 files changed, 36 insertions(+), 11 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 90ba32ef0f..3ac3b767dd 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -626,9 +626,10 @@ static int XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli, int source, bool notexistOk); static int XLogFileReadAnyTLI(XLogSegNo segno, int emode, int source); static int XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, - int reqLen, char *readBuf, TimeLineID *readTLI); + int reqLen, XLogRecPtr targetRecPtr, char *readBuf, + TimeLineID *readTLI); static bool WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, - bool fetching_ckpt); + bool fetching_ckpt, XLogRecPtr tliRecPtr); static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr); static void XLogFileClose(void); static void PreallocXlogFiles(XLogRecPtr endptr); @@ -8832,7 +8833,7 @@ CancelBackup(void) */ static int XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen, - char *readBuf, TimeLineID *readTLI) + XLogRecPtr targetRecPtr, char *readBuf, TimeLineID *readTLI) { XLogPageReadPrivate *private = (XLogPageReadPrivate *) xlogreader->private_data; @@ -8880,7 +8881,8 @@ retry: { if (!WaitForWALToBecomeAvailable(targetPagePtr + reqLen, private->randAccess, - private->fetching_ckpt)) + private->fetching_ckpt, + targetRecPtr)) goto triggered; } /* In archive or crash recovery. */ @@ -8980,11 +8982,19 @@ triggered: } /* - * In standby mode, wait for the requested record to become available, either + * In standby mode, wait for WAL at position 'RecPtr' to become available, either * via restore_command succeeding to restore the segment, or via walreceiver * having streamed the record (or via someone copying the segment directly to * pg_xlog, but that is not documented or recommended). * + * If 'fetching_ckpt' is true, we're fetching a checkpoint record, and should + * prepare to read WAL starting from RedoStartLSN after this. + * + * 'RecPtr' might not point to the beginning of the record we're interested + * in, it might also point to the page or segment header. In that case, + * 'tliRecPtr' is the position of the WAL record we're interested in. It is + * used to decide which timeline to stream the requested WAL from. + * * When the requested record becomes available, the function opens the file * containing it (if not open already), and returns true. When end of standby * mode is triggered by the user, and there is no more WAL available, returns @@ -8992,7 +9002,7 @@ triggered: */ static bool WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, - bool fetching_ckpt) + bool fetching_ckpt, XLogRecPtr tliRecPtr) { static pg_time_t last_fail_time = 0; pg_time_t now; @@ -9076,7 +9086,7 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, else { ptr = RecPtr; - tli = tliOfPointInHistory(ptr, expectedTLEs); + tli = tliOfPointInHistory(tliRecPtr, expectedTLEs); if (curFileTLI > 0 && tli < curFileTLI) elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u", diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c index 9499f848b0..a358a3d40a 100644 --- a/src/backend/access/transam/xlogreader.c +++ b/src/backend/access/transam/xlogreader.c @@ -216,6 +216,8 @@ XLogReadRecord(XLogReaderState *state, XLogRecPtr RecPtr, char **errormsg) randAccess = true; /* allow readPageTLI to go backwards too */ } + state->currRecPtr = RecPtr; + targetPagePtr = RecPtr - (RecPtr % XLOG_BLCKSZ); targetRecOff = RecPtr % XLOG_BLCKSZ; @@ -503,6 +505,7 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen) XLogRecPtr targetSegmentPtr = pageptr - targetPageOff; readLen = state->read_page(state, targetSegmentPtr, XLOG_BLCKSZ, + state->currRecPtr, state->readBuf, &state->readPageTLI); if (readLen < 0) goto err; @@ -521,6 +524,7 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen) * so that we can validate it. */ readLen = state->read_page(state, pageptr, Max(reqLen, SizeOfXLogShortPHD), + state->currRecPtr, state->readBuf, &state->readPageTLI); if (readLen < 0) goto err; @@ -539,6 +543,7 @@ ReadPageInternal(XLogReaderState *state, XLogRecPtr pageptr, int reqLen) if (readLen < XLogPageHeaderSize(hdr)) { readLen = state->read_page(state, pageptr, XLogPageHeaderSize(hdr), + state->currRecPtr, state->readBuf, &state->readPageTLI); if (readLen < 0) goto err; diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h index 36907d6330..3829ce2ab1 100644 --- a/src/include/access/xlogreader.h +++ b/src/include/access/xlogreader.h @@ -27,6 +27,7 @@ typedef struct XLogReaderState XLogReaderState; typedef int (*XLogPageReadCB) (XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen, + XLogRecPtr targetRecPtr, char *readBuf, TimeLineID *pageTLI); @@ -46,11 +47,17 @@ struct XLogReaderState * -1 on failure. The callback shall sleep, if necessary, to wait for the * requested bytes to become available. The callback will not be invoked * again for the same page unless more than the returned number of bytes - * are necessary. + * are needed. * - * *pageTLI should be set to the TLI of the file the page was read from. - * It is currently used only for error reporting purposes, to reconstruct - * the name of the WAL file where an error occurred. + * targetRecPtr is the position of the WAL record we're reading. Usually + * it is equal to targetPagePtr + reqLen, but sometimes xlogreader needs + * to read and verify the page or segment header, before it reads the + * actual WAL record it's interested in. In that case, targetRecPtr can + * be used to determine which timeline to read the page from. + * + * The callback shall set *pageTLI to the TLI of the file the page was + * read from. It is currently used only for error reporting purposes, to + * reconstruct the name of the WAL file where an error occurred. */ XLogPageReadCB read_page; @@ -90,6 +97,9 @@ struct XLogReaderState XLogRecPtr latestPagePtr; TimeLineID latestPageTLI; + /* beginning of the WAL record being read. */ + XLogRecPtr currRecPtr; + /* Buffer for current ReadRecord result (expandable) */ char *readRecordBuf; uint32 readRecordBufSize; -- 2.40.0