From: Simon Riggs Date: Thu, 31 Jan 2013 19:29:32 +0000 (+0000) Subject: Switch timelines if we crash soon after promotion. X-Git-Tag: REL9_3_BETA1~399 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=3f0ab052330905f1ad2183684e75e6a2cbfa0c76;p=postgresql Switch timelines if we crash soon after promotion. Previous patch to skip checkpoints at end of recovery didn't correctly perform crash recovery, fumbling the timeline switch. Now we record the minRecoveryPointTLI of the newly selected timeline, so that we crash recover to the correct timeline. Bug report from Fujii Masao, investigated by me. --- diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index bcd379dca7..d644e3982c 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -4828,6 +4828,22 @@ StartupXLOG(void) ereport(LOG, (errmsg("starting archive recovery"))); } + else if (ControlFile->minRecoveryPointTLI > 0) + { + /* + * If the minRecoveryPointTLI is set when not in Archive Recovery + * it means that we have crashed after ending recovery and + * yet before we wrote a new checkpoint on the new timeline. + * That means we are doing a crash recovery that needs to cross + * timelines to get to our newly assigned timeline again. + * The timeline we are headed for is exact and not 'latest'. + * As soon as we hit a checkpoint, the minRecoveryPointTLI is + * reset, so we will not enter crash recovery again. + */ + Assert(ControlFile->minRecoveryPointTLI != 1); + recoveryTargetTLI = ControlFile->minRecoveryPointTLI; + recoveryTargetIsLatest = false; + } /* * Take ownership of the wakeup latch if we're going to sleep during @@ -5075,6 +5091,12 @@ StartupXLOG(void) ereport(LOG, (errmsg("database system was not properly shut down; " "automatic recovery in progress"))); + if (recoveryTargetTLI > 0) + ereport(LOG, + (errmsg("crash recovery starts in timeline %u " + "and has target timeline %u", + ControlFile->checkPointCopy.ThisTimeLineID, + recoveryTargetTLI))); ControlFile->state = DB_IN_CRASH_RECOVERY; } ControlFile->prevCheckPoint = ControlFile->checkPoint; @@ -6945,6 +6967,7 @@ CreateEndOfRecoveryRecord(void) { xl_end_of_recovery xlrec; XLogRecData rdata; + XLogRecPtr recptr; /* sanity check */ if (!RecoveryInProgress()) @@ -6962,7 +6985,20 @@ CreateEndOfRecoveryRecord(void) rdata.buffer = InvalidBuffer; rdata.next = NULL; - (void) XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY, &rdata); + recptr = XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY, &rdata); + + XLogFlush(recptr); + + /* + * Update the control file so that crash recovery can follow + * the timeline changes to this point. + */ + LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); + ControlFile->time = (pg_time_t) xlrec.end_time; + ControlFile->minRecoveryPoint = recptr; + ControlFile->minRecoveryPointTLI = ThisTimeLineID; + UpdateControlFile(); + LWLockRelease(ControlFileLock); END_CRIT_SECTION();