]> granicus.if.org Git - postgresql/commitdiff
Don't archive bogus recycled or preallocated files after timeline switch.
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>
Mon, 13 Apr 2015 13:53:49 +0000 (16:53 +0300)
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>
Mon, 13 Apr 2015 14:26:49 +0000 (17:26 +0300)
After a timeline switch, we would leave behind recycled WAL segments that
are in the future, but on the old timeline. After promotion, and after they
become old enough to be recycled again, we would notice that they don't have
a .ready or .done file, create a .ready file for them, and archive them.
That's bogus, because the files contain garbage, recycled from an older
timeline (or prealloced as zeros). We shouldn't archive such files.

This could happen when we're following a timeline switch during replay, or
when we switch to new timeline at end-of-recovery.

To fix, whenever we switch to a new timeline, scan the data directory for
WAL segments on the old timeline, but with a higher segment number, and
remove them. Those don't belong to our timeline history, and are most
likely bogus recycled or preallocated files. They could also be valid files
that we streamed from the primary ahead of time, but in any case, they're
not needed to recover to the new timeline.

src/backend/access/transam/xlog.c

index 1adc584b53092004c219a11cb08f38828b626561..eb86a135abf674de2773750d17e6ae5ec4eda68d 100644 (file)
@@ -609,6 +609,7 @@ static void XLogArchiveNotify(const char *xlog);
 static void XLogArchiveNotifySeg(uint32 log, uint32 seg);
 static bool XLogArchiveCheckDone(const char *xlog);
 static bool XLogArchiveIsBusy(const char *xlog);
+extern bool XLogArchiveIsReady(const char *xlog);
 static void XLogArchiveCleanup(const char *xlog);
 static void readRecoveryCommandFile(void);
 static void exitArchiveRecovery(TimeLineID endTLI,
@@ -646,6 +647,8 @@ static void ExecuteRecoveryCommand(char *command, char *commandName,
                                           bool failOnerror);
 static void PreallocXlogFiles(XLogRecPtr endptr);
 static void RemoveOldXlogFiles(uint32 log, uint32 seg, XLogRecPtr endptr);
+static void RemoveXlogFile(const char *segname, XLogRecPtr endptr);
+static void RemoveNonParentXlogFiles(XLogRecPtr switchpoint, TimeLineID newTLI);
 static void UpdateLastRemovedPtr(char *filename);
 static void ValidateXLOGDirectoryStructure(void);
 static void CleanupBackupHistory(void);
@@ -1455,6 +1458,25 @@ XLogArchiveIsBusy(const char *xlog)
        return true;
 }
 
+/*
+ * XLogArchiveIsReady
+ *
+ * Check to see if an XLOG segment file has an archive notification (.ready)
+ * file.
+ */
+bool
+XLogArchiveIsReady(const char *xlog)
+{
+       char            archiveStatusPath[MAXPGPATH];
+       struct stat stat_buf;
+
+       StatusFilePath(archiveStatusPath, xlog, ".ready");
+       if (stat(archiveStatusPath, &stat_buf) == 0)
+               return true;
+
+       return false;
+}
+
 /*
  * XLogArchiveCleanup
  *
@@ -3332,25 +3354,9 @@ UpdateLastRemovedPtr(char *filename)
 static void
 RemoveOldXlogFiles(uint32 log, uint32 seg, XLogRecPtr endptr)
 {
-       uint32          endlogId;
-       uint32          endlogSeg;
-       int                     max_advance;
        DIR                *xldir;
        struct dirent *xlde;
        char            lastoff[MAXFNAMELEN];
-       char            path[MAXPGPATH];
-
-#ifdef WIN32
-       char            newpath[MAXPGPATH];
-#endif
-       struct stat statbuf;
-
-       /*
-        * Initialize info about where to try to recycle to.  We allow recycling
-        * segments up to XLOGfileslop segments beyond the current XLOG location.
-        */
-       XLByteToPrevSeg(endptr, endlogId, endlogSeg);
-       max_advance = XLOGfileslop;
 
        xldir = AllocateDir(XLOGDIR);
        if (xldir == NULL)
@@ -3366,6 +3372,11 @@ RemoveOldXlogFiles(uint32 log, uint32 seg, XLogRecPtr endptr)
 
        while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
        {
+               /* Ignore files that are not XLOG segments */
+               if (strlen(xlde->d_name) != 24 ||
+                       strspn(xlde->d_name, "0123456789ABCDEF") != 24)
+                       continue;
+
                /*
                 * We ignore the timeline part of the XLOG segment identifiers in
                 * deciding whether a segment is still needed.  This ensures that we
@@ -3377,9 +3388,7 @@ RemoveOldXlogFiles(uint32 log, uint32 seg, XLogRecPtr endptr)
                 * We use the alphanumeric sorting property of the filenames to decide
                 * which ones are earlier than the lastoff segment.
                 */
-               if (strlen(xlde->d_name) == 24 &&
-                       strspn(xlde->d_name, "0123456789ABCDEF") == 24 &&
-                       strcmp(xlde->d_name + 8, lastoff + 8) <= 0)
+               if (strcmp(xlde->d_name + 8, lastoff + 8) <= 0)
                {
                        /*
                         * Normally we don't delete old XLOG files during recovery to
@@ -3394,86 +3403,107 @@ RemoveOldXlogFiles(uint32 log, uint32 seg, XLogRecPtr endptr)
                         */
                        if (WalRcvInProgress() || XLogArchiveCheckDone(xlde->d_name))
                        {
-                               snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlde->d_name);
-
                                /* Update the last removed location in shared memory first */
                                UpdateLastRemovedPtr(xlde->d_name);
 
-                               /*
-                                * Before deleting the file, see if it can be recycled as a
-                                * future log segment. Only recycle normal files, pg_standby
-                                * for example can create symbolic links pointing to a
-                                * separate archive directory.
-                                */
-                               if (lstat(path, &statbuf) == 0 && S_ISREG(statbuf.st_mode) &&
-                                       InstallXLogFileSegment(&endlogId, &endlogSeg, path,
-                                                                                  true, &max_advance, true))
-                               {
-                                       ereport(DEBUG2,
-                                                       (errmsg("recycled transaction log file \"%s\"",
-                                                                       xlde->d_name)));
-                                       CheckpointStats.ckpt_segs_recycled++;
-                                       /* Needn't recheck that slot on future iterations */
-                                       if (max_advance > 0)
-                                       {
-                                               NextLogSeg(endlogId, endlogSeg);
-                                               max_advance--;
-                                       }
-                               }
-                               else
-                               {
-                                       /* No need for any more future segments... */
-                                       int                     rc;
+                               RemoveXlogFile(xlde->d_name, endptr);
+                       }
+               }
+       }
 
-                                       ereport(DEBUG2,
-                                                       (errmsg("removing transaction log file \"%s\"",
-                                                                       xlde->d_name)));
+       FreeDir(xldir);
+}
 
+/*
+ * Recycle or remove a log file that's no longer needed.
+ *
+ * endptr is current (or recent) end of xlog; this is used to determine
+ * whether we want to recycle rather than delete no-longer-wanted log files.
+ */
+static void
+RemoveXlogFile(const char *segname, XLogRecPtr endptr)
+{
+       char            path[MAXPGPATH];
 #ifdef WIN32
+       char            newpath[MAXPGPATH];
+#endif
+       struct stat statbuf;
+       uint32          endlogId;
+       uint32          endlogSeg;
+       int                     max_advance;
 
-                                       /*
-                                        * On Windows, if another process (e.g another backend)
-                                        * holds the file open in FILE_SHARE_DELETE mode, unlink
-                                        * will succeed, but the file will still show up in
-                                        * directory listing until the last handle is closed. To
-                                        * avoid confusing the lingering deleted file for a live
-                                        * WAL file that needs to be archived, rename it before
-                                        * deleting it.
-                                        *
-                                        * If another process holds the file open without
-                                        * FILE_SHARE_DELETE flag, rename will fail. We'll try
-                                        * again at the next checkpoint.
-                                        */
-                                       snprintf(newpath, MAXPGPATH, "%s.deleted", path);
-                                       if (rename(path, newpath) != 0)
-                                       {
-                                               ereport(LOG,
-                                                               (errcode_for_file_access(),
-                                                                errmsg("could not rename old transaction log file \"%s\": %m",
-                                                                               path)));
-                                               continue;
-                                       }
-                                       rc = unlink(newpath);
+       /*
+        * Initialize info about where to try to recycle to.  We allow recycling
+        * segments up to XLOGfileslop segments beyond the current XLOG location.
+        */
+       XLByteToPrevSeg(endptr, endlogId, endlogSeg);
+       max_advance = XLOGfileslop;
+
+       snprintf(path, MAXPGPATH, XLOGDIR "/%s", segname);
+
+       /*
+        * Before deleting the file, see if it can be recycled as a future log
+        * segment. Only recycle normal files, pg_standby for example can create
+        * symbolic links pointing to a separate archive directory.
+        */
+       if (lstat(path, &statbuf) == 0 && S_ISREG(statbuf.st_mode) &&
+               InstallXLogFileSegment(&endlogId, &endlogSeg, path,
+                                                          true, &max_advance, true))
+       {
+               ereport(DEBUG2,
+                               (errmsg("recycled transaction log file \"%s\"", segname)));
+               CheckpointStats.ckpt_segs_recycled++;
+               /* Needn't recheck that slot on future iterations */
+               if (max_advance > 0)
+               {
+                       NextLogSeg(endlogId, endlogSeg);
+                       max_advance--;
+               }
+       }
+       else
+       {
+               /* No need for any more future segments... */
+               int                     rc;
+
+               ereport(DEBUG2,
+                               (errmsg("removing transaction log file \"%s\"", segname)));
+
+#ifdef WIN32
+               /*
+                * On Windows, if another process (e.g another backend) holds the file
+                * open in FILE_SHARE_DELETE mode, unlink will succeed, but the file
+                * will still show up in directory listing until the last handle is
+                * closed. To avoid confusing the lingering deleted file for a live
+                * WAL file that needs to be archived, rename it before deleting it.
+                *
+                * If another process holds the file open without FILE_SHARE_DELETE
+                * flag, rename will fail. We'll try again at the next checkpoint.
+                */
+               snprintf(newpath, MAXPGPATH, "%s.deleted", path);
+               if (rename(path, newpath) != 0)
+               {
+                       ereport(LOG,
+                                       (errcode_for_file_access(),
+                                        errmsg("could not rename old transaction log file \"%s\": %m",
+                                                       path)));
+                       return;
+               }
+               rc = unlink(newpath);
 #else
-                                       rc = unlink(path);
+               rc = unlink(path);
 #endif
-                                       if (rc != 0)
-                                       {
-                                               ereport(LOG,
-                                                               (errcode_for_file_access(),
-                                                                errmsg("could not remove old transaction log file \"%s\": %m",
-                                                                               path)));
-                                               continue;
-                                       }
-                                       CheckpointStats.ckpt_segs_removed++;
-                               }
-
-                               XLogArchiveCleanup(xlde->d_name);
-                       }
+               if (rc != 0)
+               {
+                       ereport(LOG,
+                                       (errcode_for_file_access(),
+                                        errmsg("could not remove old transaction log file \"%s\": %m",
+                                                       path)));
+                       return;
                }
+               CheckpointStats.ckpt_segs_removed++;
        }
 
-       FreeDir(xldir);
+       XLogArchiveCleanup(segname);
 }
 
 /*
@@ -5629,6 +5659,76 @@ exitArchiveRecovery(TimeLineID endTLI, uint32 endLogId, uint32 endLogSeg)
                        (errmsg("archive recovery complete")));
 }
 
+/*
+ * Remove WAL files that are not part of the given timeline's history.
+ *
+ * This is called during recovery, whenever we switch to follow a new
+ * timeline, and at the end of recovery when we create a new timeline. We
+ * wouldn't otherwise care about extra WAL files lying in pg_xlog, but they
+ * can be pre-allocated or recycled WAL segments on the old timeline that we
+ * haven't used yet, and contain garbage. If we just leave them in pg_xlog,
+ * they will eventually be archived, and we can't let that happen. Files that
+ * belong to our timeline history are valid, because we have successfully
+ * replayed them, but from others we can't be sure.
+ *
+ * 'switchpoint' is the current point in WAL where we switch to new timeline,
+ * and 'newTLI' is the new timeline we switch to.
+ */
+static void
+RemoveNonParentXlogFiles(XLogRecPtr switchpoint, TimeLineID newTLI)
+{
+       DIR                *xldir;
+       struct dirent *xlde;
+       char            switchseg[MAXFNAMELEN];
+       uint32          endlogId;
+       uint32          endlogSeg;
+
+       XLByteToPrevSeg(switchpoint, endlogId, endlogSeg);
+
+       xldir = AllocateDir(XLOGDIR);
+       if (xldir == NULL)
+               ereport(ERROR,
+                               (errcode_for_file_access(),
+                                errmsg("could not open transaction log directory \"%s\": %m",
+                                               XLOGDIR)));
+
+       /*
+        * Construct a filename of the last segment to be kept.
+        */
+       XLogFileName(switchseg, newTLI, endlogId, endlogSeg);
+
+       elog(DEBUG2, "attempting to remove WAL segments newer than log file %s",
+                switchseg);
+
+       while ((xlde = ReadDir(xldir, XLOGDIR)) != NULL)
+       {
+               /* Ignore files that are not XLOG segments */
+               if (strlen(xlde->d_name) != 24 ||
+                       strspn(xlde->d_name, "0123456789ABCDEF") != 24)
+                       continue;
+
+               /*
+                * Remove files that are on a timeline older than the new one we're
+                * switching to, but with a segment number >= the first segment on
+                * the new timeline.
+                */
+               if (strncmp(xlde->d_name, switchseg, 8) < 0 &&
+                       strcmp(xlde->d_name + 8, switchseg + 8) > 0)
+               {
+                       /*
+                        * If the file has already been marked as .ready, however, don't
+                        * remove it yet. It should be OK to remove it - files that are
+                        * not part of our timeline history are not required for recovery
+                        * - but seems safer to let them be archived and removed later.
+                        */
+                       if (!XLogArchiveIsReady(xlde->d_name))
+                               RemoveXlogFile(xlde->d_name, switchpoint);
+               }
+       }
+
+       FreeDir(xldir);
+}
+
 /*
  * For point-in-time recovery, this function decides whether we want to
  * stop applying the XLOG at or after the current record.
@@ -6075,6 +6175,7 @@ StartupXLOG(void)
        bool            wasShutdown;
        bool            reachedStopPoint = false;
        bool            haveBackupLabel = false;
+       bool            didArchiveRecovery = false;
        XLogRecPtr      RecPtr,
                                checkPointLoc,
                                EndOfLog;
@@ -6844,7 +6945,10 @@ StartupXLOG(void)
         * we will use that below.)
         */
        if (InArchiveRecovery)
+       {
+               didArchiveRecovery = true;
                exitArchiveRecovery(curFileTLI, endLogId, endLogSeg);
+       }
 
        /*
         * Prepare to write WAL starting at EndOfLog position, and init xlog
@@ -6957,6 +7061,12 @@ StartupXLOG(void)
                                                                   true);
        }
 
+       /*
+        * Clean up any (possibly bogus) future WAL segments on the old timeline.
+        */
+       if (didArchiveRecovery)
+               RemoveNonParentXlogFiles(EndOfLog, ThisTimeLineID);
+
        /*
         * Preallocate additional log files, if wanted.
         */