]> granicus.if.org Git - postgresql/commitdiff
Avoid unlikely data-loss scenarios due to rename() without fsync.
authorAndres Freund <andres@anarazel.de>
Thu, 10 Mar 2016 02:53:54 +0000 (18:53 -0800)
committerAndres Freund <andres@anarazel.de>
Thu, 10 Mar 2016 02:53:54 +0000 (18:53 -0800)
Renaming a file using rename(2) is not guaranteed to be durable in face
of crashes. Use the previously added durable_rename()/durable_link_or_rename()
in various places where we previously just renamed files.

Most of the changed call sites are arguably not critical, but it seems
better to err on the side of too much durability.  The most prominent
known case where the previously missing fsyncs could cause data loss is
crashes at the end of a checkpoint. After the actual checkpoint has been
performed, old WAL files are recycled. When they're filled, their
contents are fdatasynced, but we did not fsync the containing
directory. An OS/hardware crash in an unfortunate moment could then end
up leaving that file with its old name, but new content; WAL replay
would thus not replay it.

Reported-By: Tomas Vondra
Author: Michael Paquier, Tomas Vondra, Andres Freund
Discussion: 56583BDD.9060302@2ndquadrant.com
Backpatch: All supported branches

contrib/pg_stat_statements/pg_stat_statements.c
src/backend/access/transam/timeline.c
src/backend/access/transam/xlog.c
src/backend/access/transam/xlogarchive.c
src/backend/postmaster/pgarch.c

index 5a841d79d47e1200d90a3baf6b987ffc8f6fbce6..2100fb2296a09a81877608658aea8349d30940e7 100644 (file)
@@ -588,11 +588,7 @@ pgss_shmem_shutdown(int code, Datum arg)
        /*
         * Rename file into place, so we atomically replace the old one.
         */
-       if (rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE) != 0)
-               ereport(LOG,
-                               (errcode_for_file_access(),
-                                errmsg("could not rename pg_stat_statement file \"%s\": %m",
-                                               PGSS_DUMP_FILE ".tmp")));
+       (void) durable_rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE, LOG);
 
        return;
 
index 200808f7771a759744c8bf39728e79d62604ce58..fec5a3acb0a5296d5aee29477820be19cdccb0dc 100644 (file)
@@ -418,24 +418,10 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
        TLHistoryFilePath(path, newTLI);
 
        /*
-        * Prefer link() to rename() here just to be really sure that we don't
-        * overwrite an existing file.  However, there shouldn't be one, so
-        * rename() is an acceptable substitute except for the truly paranoid.
+        * Perform the rename using link if available, paranoidly trying to avoid
+        * overwriting an existing file (there shouldn't be one).
         */
-#if HAVE_WORKING_LINK
-       if (link(tmppath, path) < 0)
-               ereport(ERROR,
-                               (errcode_for_file_access(),
-                                errmsg("could not link file \"%s\" to \"%s\": %m",
-                                               tmppath, path)));
-       unlink(tmppath);
-#else
-       if (rename(tmppath, path) < 0)
-               ereport(ERROR,
-                               (errcode_for_file_access(),
-                                errmsg("could not rename file \"%s\" to \"%s\": %m",
-                                               tmppath, path)));
-#endif
+       durable_link_or_rename(tmppath, path, ERROR);
 
        /* The history file can be archived immediately. */
        if (XLogArchivingActive())
@@ -508,24 +494,10 @@ writeTimeLineHistoryFile(TimeLineID tli, char *content, int size)
        TLHistoryFilePath(path, tli);
 
        /*
-        * Prefer link() to rename() here just to be really sure that we don't
-        * overwrite an existing logfile.  However, there shouldn't be one, so
-        * rename() is an acceptable substitute except for the truly paranoid.
+        * Perform the rename using link if available, paranoidly trying to avoid
+        * overwriting an existing file (there shouldn't be one).
         */
-#if HAVE_WORKING_LINK
-       if (link(tmppath, path) < 0)
-               ereport(ERROR,
-                               (errcode_for_file_access(),
-                                errmsg("could not link file \"%s\" to \"%s\": %m",
-                                               tmppath, path)));
-       unlink(tmppath);
-#else
-       if (rename(tmppath, path) < 0)
-               ereport(ERROR,
-                               (errcode_for_file_access(),
-                                errmsg("could not rename file \"%s\" to \"%s\": %m",
-                                               tmppath, path)));
-#endif
+       durable_link_or_rename(tmppath, path, ERROR);
 }
 
 /*
index dadaa94bc8272476d9ed40e98ffa5a1c8482ed89..8814c00221d4ed78cab1a7495306664dcb6981fa 100644 (file)
@@ -2553,34 +2553,16 @@ InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
        }
 
        /*
-        * Prefer link() to rename() here just to be really sure that we don't
-        * overwrite an existing logfile.  However, there shouldn't be one, so
-        * rename() is an acceptable substitute except for the truly paranoid.
+        * Perform the rename using link if available, paranoidly trying to avoid
+        * overwriting an existing file (there shouldn't be one).
         */
-#if HAVE_WORKING_LINK
-       if (link(tmppath, path) < 0)
+       if (durable_link_or_rename(tmppath, path, LOG) != 0)
        {
                if (use_lock)
                        LWLockRelease(ControlFileLock);
-               ereport(LOG,
-                               (errcode_for_file_access(),
-                                errmsg("could not link file \"%s\" to \"%s\" (initialization of log file): %m",
-                                               tmppath, path)));
-               return false;
-       }
-       unlink(tmppath);
-#else
-       if (rename(tmppath, path) < 0)
-       {
-               if (use_lock)
-                       LWLockRelease(ControlFileLock);
-               ereport(LOG,
-                               (errcode_for_file_access(),
-                                errmsg("could not rename file \"%s\" to \"%s\" (initialization of log file): %m",
-                                               tmppath, path)));
+               /* durable_link_or_rename already emitted log message */
                return false;
        }
-#endif
 
        if (use_lock)
                LWLockRelease(ControlFileLock);
@@ -4474,11 +4456,7 @@ exitArchiveRecovery(TimeLineID endTLI, XLogSegNo endLogSegNo)
         * re-enter archive recovery mode in a subsequent crash.
         */
        unlink(RECOVERY_COMMAND_DONE);
-       if (rename(RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE) != 0)
-               ereport(FATAL,
-                               (errcode_for_file_access(),
-                                errmsg("could not rename file \"%s\" to \"%s\": %m",
-                                               RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE)));
+       durable_rename(RECOVERY_COMMAND_FILE, RECOVERY_COMMAND_DONE, FATAL);
 
        ereport(LOG,
                        (errmsg("archive recovery complete")));
@@ -5472,11 +5450,7 @@ StartupXLOG(void)
                if (haveBackupLabel)
                {
                        unlink(BACKUP_LABEL_OLD);
-                       if (rename(BACKUP_LABEL_FILE, BACKUP_LABEL_OLD) != 0)
-                               ereport(FATAL,
-                                               (errcode_for_file_access(),
-                                                errmsg("could not rename file \"%s\" to \"%s\": %m",
-                                                               BACKUP_LABEL_FILE, BACKUP_LABEL_OLD)));
+                       durable_rename(BACKUP_LABEL_FILE, BACKUP_LABEL_OLD, FATAL);
                }
 
                /* Check that the GUCs used to generate the WAL allow recovery */
@@ -9524,7 +9498,7 @@ CancelBackup(void)
        /* remove leftover file from previously canceled backup if it exists */
        unlink(BACKUP_LABEL_OLD);
 
-       if (rename(BACKUP_LABEL_FILE, BACKUP_LABEL_OLD) == 0)
+       if (durable_rename(BACKUP_LABEL_FILE, BACKUP_LABEL_OLD, DEBUG1) == 0)
        {
                ereport(LOG,
                                (errmsg("online backup mode canceled"),
index a9c3791a5bced5e469faca5da9a6dda07c6b6f06..c6cae9fc1048a5e1ced2d923727d0f5241c1f5c8 100644 (file)
@@ -468,11 +468,7 @@ KeepFileRestoredFromArchive(char *path, char *xlogfname)
                reload = true;
        }
 
-       if (rename(path, xlogfpath) < 0)
-               ereport(ERROR,
-                               (errcode_for_file_access(),
-                                errmsg("could not rename file \"%s\" to \"%s\": %m",
-                                               path, xlogfpath)));
+       durable_rename(path, xlogfpath, ERROR);
 
        /*
         * Create .done file forcibly to prevent the restored segment from being
@@ -575,12 +571,7 @@ XLogArchiveForceDone(const char *xlog)
        StatusFilePath(archiveReady, xlog, ".ready");
        if (stat(archiveReady, &stat_buf) == 0)
        {
-               if (rename(archiveReady, archiveDone) < 0)
-                       ereport(WARNING,
-                                       (errcode_for_file_access(),
-                                        errmsg("could not rename file \"%s\" to \"%s\": %m",
-                                                       archiveReady, archiveDone)));
-
+               (void) durable_rename(archiveReady, archiveDone, WARNING);
                return;
        }
 
index c6ef9e1083e9851948e05645f9f79149ce2ccf36..11e6b0608d26eca945e5a517b77962f83832d7cc 100644 (file)
@@ -750,9 +750,5 @@ pgarch_archiveDone(char *xlog)
 
        StatusFilePath(rlogready, xlog, ".ready");
        StatusFilePath(rlogdone, xlog, ".done");
-       if (rename(rlogready, rlogdone) < 0)
-               ereport(WARNING,
-                               (errcode_for_file_access(),
-                                errmsg("could not rename file \"%s\" to \"%s\": %m",
-                                               rlogready, rlogdone)));
+       (void) durable_rename(rlogready, rlogdone, WARNING);
 }