From 24c19e6bf96d556b3a9517ce42f54b5756ca0384 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 22 Nov 2012 11:23:46 +0200 Subject: [PATCH] Avoid bogus "out-of-sequence timeline ID" errors in standby-mode. When startup process opens a WAL segment after replaying part of it, it validates the first page on the WAL segment, even though the page it's really interested in later in the file. As part of the validation, it checks that the TLI on the page header is >= the TLI it saw on the last page it read. If the segment contains a timeline switch, and we have already replayed it, and then re-open the WAL segment (because of streaming replication got disconnected and reconnected, for example), the TLI check will fail when the first page is validated. Fix that by relaxing the TLI check when re-opening a WAL segment. Backpatch to 9.0. Earlier versions had the same code, but before standby mode was introduced in 9.0, recovery never tried to re-read a segment after partially replaying it. Reported by Amit Kapila, while testing a new feature. --- src/backend/access/transam/xlog.c | 33 ++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 09d4dffc75..0d2540c3c2 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -570,6 +570,7 @@ static uint32 readRecordBufSize = 0; static XLogRecPtr ReadRecPtr; /* start of last record read */ static XLogRecPtr EndRecPtr; /* end+1 of last record read */ static TimeLineID lastPageTLI = 0; +static TimeLineID lastSegmentTLI = 0; static XLogRecPtr minRecoveryPoint; /* local copy of * ControlFile->minRecoveryPoint */ @@ -644,7 +645,7 @@ static void CleanupBackupHistory(void); static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force); static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt); static void CheckRecoveryConsistency(void); -static bool ValidXLogPageHeader(XLogPageHeader hdr, int emode); +static bool ValidXLogPageHeader(XLogPageHeader hdr, int emode, bool segmentonly); static bool ValidXLogRecordHeader(XLogRecPtr *RecPtr, XLogRecord *record, int emode, bool randAccess); static XLogRecord *ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt); @@ -3339,7 +3340,8 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, bool fetching_ckpt) * to go backwards (but we can't reset that variable right here, since * we might not change files at all). */ - lastPageTLI = 0; /* see comment in ValidXLogPageHeader */ + /* see comment in ValidXLogPageHeader */ + lastPageTLI = lastSegmentTLI = 0; randAccess = true; /* allow curFileTLI to go backwards too */ } @@ -3579,7 +3581,7 @@ next_record_is_invalid: * ReadRecord. It's not intended for use from anywhere else. */ static bool -ValidXLogPageHeader(XLogPageHeader hdr, int emode) +ValidXLogPageHeader(XLogPageHeader hdr, int emode, bool segmentonly) { XLogRecPtr recaddr; @@ -3681,19 +3683,32 @@ ValidXLogPageHeader(XLogPageHeader hdr, int emode) * successive pages of a consistent WAL sequence. * * Of course this check should only be applied when advancing sequentially - * across pages; therefore ReadRecord resets lastPageTLI to zero when - * going to a random page. + * across pages; therefore ReadRecord resets lastPageTLI and + * lastSegmentTLI to zero when going to a random page. + * + * Sometimes we re-open a segment that's already been partially replayed. + * In that case we cannot perform the normal TLI check: if there is a + * timeline switch within the segment, the first page has a smaller TLI + * than later pages following the timeline switch, and we might've read + * them already. As a weaker test, we still check that it's not smaller + * than the TLI we last saw at the beginning of a segment. Pass + * segmentonly = true when re-validating the first page like that, and the + * page you're actually interested in comes later. */ - if (hdr->xlp_tli < lastPageTLI) + if (hdr->xlp_tli < (segmentonly ? lastSegmentTLI : lastPageTLI)) { ereport(emode_for_corrupt_record(emode, recaddr), (errmsg("out-of-sequence timeline ID %u (after %u) in log segment %s, offset %u", - hdr->xlp_tli, lastPageTLI, + hdr->xlp_tli, + segmentonly ? lastSegmentTLI : lastPageTLI, XLogFileNameP(curFileTLI, readSegNo), readOff))); return false; } lastPageTLI = hdr->xlp_tli; + if (readOff == 0) + lastSegmentTLI = hdr->xlp_tli; + return true; } @@ -9366,7 +9381,7 @@ retry: fname, readOff))); goto next_record_is_invalid; } - if (!ValidXLogPageHeader((XLogPageHeader) readBuf, emode)) + if (!ValidXLogPageHeader((XLogPageHeader) readBuf, emode, true)) goto next_record_is_invalid; } @@ -9392,7 +9407,7 @@ retry: fname, readOff))); goto next_record_is_invalid; } - if (!ValidXLogPageHeader((XLogPageHeader) readBuf, emode)) + if (!ValidXLogPageHeader((XLogPageHeader) readBuf, emode, false)) goto next_record_is_invalid; readFileHeaderValidated = true; -- 2.40.0