typedef struct XLogPageReadPrivate
{
const char *datadir;
- TimeLineID tli;
+ int tliIndex;
} XLogPageReadPrivate;
static int SimpleXLogPageRead(XLogReaderState *xlogreader,
/*
* Read WAL from the datadir/pg_xlog, starting from 'startpoint' on timeline
- * 'tli', until 'endpoint'. Make note of the data blocks touched by the WAL
- * records, and return them in a page map.
+ * index 'tliIndex' in target timeline history, until 'endpoint'. Make note of
+ * the data blocks touched by the WAL records, and return them in a page map.
*/
void
-extractPageMap(const char *datadir, XLogRecPtr startpoint, TimeLineID tli,
+extractPageMap(const char *datadir, XLogRecPtr startpoint, int tliIndex,
XLogRecPtr endpoint)
{
XLogRecord *record;
XLogPageReadPrivate private;
private.datadir = datadir;
- private.tli = tli;
+ private.tliIndex = tliIndex;
xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, &private);
if (xlogreader == NULL)
pg_fatal("out of memory\n");
* doing anything with the record itself.
*/
XLogRecPtr
-readOneRecord(const char *datadir, XLogRecPtr ptr, TimeLineID tli)
+readOneRecord(const char *datadir, XLogRecPtr ptr, int tliIndex)
{
XLogRecord *record;
XLogReaderState *xlogreader;
XLogRecPtr endptr;
private.datadir = datadir;
- private.tli = tli;
+ private.tliIndex = tliIndex;
xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, &private);
if (xlogreader == NULL)
pg_fatal("out of memory\n");
* Find the previous checkpoint preceding given WAL position.
*/
void
-findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, TimeLineID tli,
+findLastCheckpoint(const char *datadir, XLogRecPtr forkptr, int tliIndex,
XLogRecPtr *lastchkptrec, TimeLineID *lastchkpttli,
XLogRecPtr *lastchkptredo)
{
forkptr += (forkptr % XLogSegSize == 0) ? SizeOfXLogLongPHD : SizeOfXLogShortPHD;
private.datadir = datadir;
- private.tli = tli;
+ private.tliIndex = tliIndex;
xlogreader = XLogReaderAllocate(&SimpleXLogPageRead, &private);
if (xlogreader == NULL)
pg_fatal("out of memory\n");
{
XLogPageReadPrivate *private = (XLogPageReadPrivate *) xlogreader->private_data;
uint32 targetPageOff;
- XLogSegNo targetSegNo PG_USED_FOR_ASSERTS_ONLY;
+ XLogRecPtr targetSegEnd;
+ XLogSegNo targetSegNo;
XLByteToSeg(targetPagePtr, targetSegNo);
+ XLogSegNoOffsetToRecPtr(targetSegNo + 1, 0, targetSegEnd);
targetPageOff = targetPagePtr % XLogSegSize;
/*
{
char xlogfname[MAXFNAMELEN];
- XLogFileName(xlogfname, private->tli, xlogreadsegno);
+ /*
+ * Since incomplete segments are copied into next timelines, switch to
+ * the timeline holding the required segment. Assuming this scan can be
+ * done both forward and backward, consider also switching timeline
+ * accordingly.
+ */
+ while (private->tliIndex < targetNentries - 1 &&
+ targetHistory[private->tliIndex].end < targetSegEnd)
+ private->tliIndex++;
+ while (private->tliIndex > 0 &&
+ targetHistory[private->tliIndex].begin >= targetSegEnd)
+ private->tliIndex--;
+
+ XLogFileName(xlogfname, targetHistory[private->tliIndex].tli, xlogreadsegno);
snprintf(xlogfpath, MAXPGPATH, "%s/" XLOGDIR "/%s", private->datadir, xlogfname);
Assert(targetSegNo == xlogreadsegno);
- *pageTLI = private->tli;
+ *pageTLI = targetHistory[private->tliIndex].tli;
return XLOG_BLCKSZ;
}
/*-------------------------------------------------------------------------
*
* pg_rewind.c
- * Synchronizes an old master server to a new timeline
+ * Synchronizes a PostgreSQL data directory to a new timeline
*
* Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group
*
size_t size);
static void updateControlFile(ControlFileData *ControlFile);
static void sanityChecks(void);
-static void findCommonAncestorTimeline(XLogRecPtr *recptr, TimeLineID *tli);
+static void findCommonAncestorTimeline(XLogRecPtr *recptr, int *tliIndex);
static ControlFileData ControlFile_target;
static ControlFileData ControlFile_source;
bool showprogress = false;
bool dry_run = false;
+/* Target history */
+TimeLineHistoryEntry *targetHistory;
+int targetNentries;
+
static void
usage(const char *progname)
{
int option_index;
int c;
XLogRecPtr divergerec;
- TimeLineID lastcommontli;
+ int lastcommontliIndex;
XLogRecPtr chkptrec;
TimeLineID chkpttli;
XLogRecPtr chkptredo;
if (ControlFile_target.checkPointCopy.ThisTimeLineID == ControlFile_source.checkPointCopy.ThisTimeLineID)
pg_fatal("source and target cluster are on the same timeline\n");
- findCommonAncestorTimeline(&divergerec, &lastcommontli);
+ findCommonAncestorTimeline(&divergerec, &lastcommontliIndex);
printf(_("servers diverged at WAL position %X/%X on timeline %u\n"),
- (uint32) (divergerec >> 32), (uint32) divergerec, lastcommontli);
+ (uint32) (divergerec >> 32), (uint32) divergerec,
+ targetHistory[lastcommontliIndex].tli);
/*
* Check for the possibility that the target is in fact a direct ancestor
/* Read the checkpoint record on the target to see where it ends. */
chkptendrec = readOneRecord(datadir_target,
ControlFile_target.checkPoint,
- ControlFile_target.checkPointCopy.ThisTimeLineID);
+ targetNentries - 1);
/*
* If the histories diverged exactly at the end of the shutdown
exit(0);
}
- findLastCheckpoint(datadir_target, divergerec, lastcommontli,
+ findLastCheckpoint(datadir_target, divergerec,
+ lastcommontliIndex,
&chkptrec, &chkpttli, &chkptredo);
printf(_("rewinding from last common checkpoint at %X/%X on timeline %u\n"),
(uint32) (chkptrec >> 32), (uint32) chkptrec,
* we would need to replay until the end of WAL here.
*/
pg_log(PG_PROGRESS, "reading WAL in target\n");
- extractPageMap(datadir_target, chkptrec, lastcommontli,
+ extractPageMap(datadir_target, chkptrec, lastcommontliIndex,
ControlFile_target.checkPoint);
filemap_finalize();
/*
* Target cluster better not be running. This doesn't guard against
* someone starting the cluster concurrently. Also, this is probably more
- * strict than necessary; it's OK if the master was not shut down cleanly,
- * as long as it isn't running at the moment.
+ * strict than necessary; it's OK if the target node was not shut down
+ * cleanly, as long as it isn't running at the moment.
*/
- if (ControlFile_target.state != DB_SHUTDOWNED)
+ if (ControlFile_target.state != DB_SHUTDOWNED &&
+ ControlFile_target.state != DB_SHUTDOWNED_IN_RECOVERY)
pg_fatal("target server must be shut down cleanly\n");
/*
* server is shut down. There isn't any very strong reason for this
* limitation, but better safe than sorry.
*/
- if (datadir_source && ControlFile_source.state != DB_SHUTDOWNED)
+ if (datadir_source &&
+ ControlFile_source.state != DB_SHUTDOWNED &&
+ ControlFile_source.state != DB_SHUTDOWNED_IN_RECOVERY)
pg_fatal("source data directory must be shut down cleanly\n");
}
/*
- * Determine the TLI of the last common timeline in the histories of the two
- * clusters. *tli is set to the last common timeline, and *recptr is set to
- * the position where the histories diverged (ie. the first WAL record that's
- * not the same in both clusters).
- *
- * Control files of both clusters must be read into ControlFile_target/source
- * before calling this.
+ * Find minimum from two XLOG positions assuming InvalidXLogRecPtr means
+ * infinity as src/include/access/timeline.h states. This routine should
+ * be used only when comparing XLOG positions related to history files.
*/
-static void
-findCommonAncestorTimeline(XLogRecPtr *recptr, TimeLineID *tli)
+static XLogRecPtr
+MinXLogRecPtr(XLogRecPtr a, XLogRecPtr b)
{
- TimeLineID targettli;
- TimeLineHistoryEntry *sourceHistory;
- int nentries;
- int i;
- TimeLineID sourcetli;
+ if (XLogRecPtrIsInvalid(a))
+ return b;
+ else if (XLogRecPtrIsInvalid(b))
+ return a;
+ else
+ return Min(a, b);
+}
+
+/*
+ * Retrieve timeline history for given control file which should behold
+ * either source or target.
+ */
+static TimeLineHistoryEntry *
+getTimelineHistory(ControlFileData *controlFile, int *nentries)
+{
+ TimeLineHistoryEntry *history;
+ TimeLineID tli;
- targettli = ControlFile_target.checkPointCopy.ThisTimeLineID;
- sourcetli = ControlFile_source.checkPointCopy.ThisTimeLineID;
+ tli = controlFile->checkPointCopy.ThisTimeLineID;
- /* Timeline 1 does not have a history file, so no need to check */
- if (sourcetli == 1)
+ /*
+ * Timeline 1 does not have a history file, so there is no need to check and
+ * fake an entry with infinite start and end positions.
+ */
+ if (tli == 1)
{
- sourceHistory = (TimeLineHistoryEntry *) pg_malloc(sizeof(TimeLineHistoryEntry));
- sourceHistory->tli = sourcetli;
- sourceHistory->begin = sourceHistory->end = InvalidXLogRecPtr;
- nentries = 1;
+ history = (TimeLineHistoryEntry *) pg_malloc(sizeof(TimeLineHistoryEntry));
+ history->tli = tli;
+ history->begin = history->end = InvalidXLogRecPtr;
+ *nentries = 1;
}
else
{
char path[MAXPGPATH];
char *histfile;
- TLHistoryFilePath(path, sourcetli);
- histfile = fetchFile(path, NULL);
+ TLHistoryFilePath(path, tli);
+
+ /* Get history file from appropriate source */
+ if (controlFile == &ControlFile_source)
+ histfile = fetchFile(path, NULL);
+ else if (controlFile == &ControlFile_target)
+ histfile = slurpFile(datadir_target, path, NULL);
+ else
+ pg_fatal("Invalid control file");
- sourceHistory = rewind_parseTimeLineHistory(histfile,
- ControlFile_source.checkPointCopy.ThisTimeLineID,
- &nentries);
+ history = rewind_parseTimeLineHistory(histfile, tli, nentries);
pg_free(histfile);
}
- /*
- * Trace the history backwards, until we hit the target timeline.
- *
- * TODO: This assumes that there are no timeline switches on the target
- * cluster after the fork.
- */
- for (i = nentries - 1; i >= 0; i--)
+ if (debug)
{
- TimeLineHistoryEntry *entry = &sourceHistory[i];
+ int i;
+
+ if (controlFile == &ControlFile_source)
+ printf("Source timeline history:\n");
+ else if (controlFile == &ControlFile_target)
+ printf("Target timeline history:\n");
+ else
+ Assert(false);
- if (entry->tli == targettli)
+ /*
+ * Print the target timeline history.
+ */
+ for (i = 0; i < targetNentries; i++)
{
- /* found it */
- *recptr = entry->end;
- *tli = entry->tli;
+ TimeLineHistoryEntry *entry;
- pg_free(sourceHistory);
- return;
+ entry = &history[i];
+ printf("%d: %X/%X - %X/%X\n", entry->tli,
+ (uint32) (entry->begin >> 32), (uint32) (entry->begin),
+ (uint32) (entry->end >> 32), (uint32) (entry->end));
}
}
- pg_fatal("could not find common ancestor of the source and target cluster's timelines\n");
+ return history;
+}
+
+/*
+ * Determine the TLI of the last common timeline in the timeline history of the
+ * two clusters. targetHistory is filled with target timeline history and
+ * targetNentries is number of items in targetHistory. *tliIndex is set to the
+ * index of last common timeline in targetHistory array, and *recptr is set to
+ * the position where the timeline history diverged (ie. the first WAL record
+ * that's not the same in both clusters).
+ *
+ * Control files of both clusters must be read into ControlFile_target/source
+ * before calling this routine.
+ */
+static void
+findCommonAncestorTimeline(XLogRecPtr *recptr, int *tliIndex)
+{
+ TimeLineHistoryEntry *sourceHistory;
+ int sourceNentries;
+ int i, n;
+
+ /* Retrieve timelines for both source and target */
+ sourceHistory = getTimelineHistory(&ControlFile_source, &sourceNentries);
+ targetHistory = getTimelineHistory(&ControlFile_target, &targetNentries);
+
+ /*
+ * Trace the history forward, until we hit the timeline diverge. It may
+ * still be possible that the source and target nodes used the same
+ * timeline number in their history but with different start position
+ * depending on the history files that each node has fetched in previous
+ * recovery processes. Hence check the start position of the new timeline
+ * as well and move down by one extra timeline entry if they do not match.
+ */
+ n = Min(sourceNentries, targetNentries);
+ for (i = 0; i < n; i++)
+ {
+ if (sourceHistory[i].tli != targetHistory[i].tli ||
+ sourceHistory[i].begin != targetHistory[i].begin)
+ break;
+ }
+
+ if (i > 0)
+ {
+ i--;
+ *recptr = MinXLogRecPtr(sourceHistory[i].end, targetHistory[i].end);
+ *tliIndex = i;
+
+ pg_free(sourceHistory);
+ return;
+ }
+ else
+ {
+ pg_fatal("could not find common ancestor of the source and target cluster's timelines\n");
+ }
}