]> granicus.if.org Git - postgresql/commitdiff
Fsync directory after creating or unlinking file.
authorTeodor Sigaev <teodor@sigaev.ru>
Mon, 27 Mar 2017 16:33:01 +0000 (19:33 +0300)
committerTeodor Sigaev <teodor@sigaev.ru>
Mon, 27 Mar 2017 16:33:01 +0000 (19:33 +0300)
If file was created/deleted just before powerloss it's possible that
file system will miss that. To prevent it, call fsync() where creating/
unlinkg file is critical.

Author: Michael Paquier
Reviewed-by: Ashutosh Bapat, Takayuki Tsunakawa, me
src/backend/access/transam/clog.c
src/backend/access/transam/commit_ts.c
src/backend/access/transam/twophase.c
src/backend/access/transam/xlog.c
src/backend/storage/file/fd.c
src/include/storage/fd.h

index 2d335109303ed5c2be040001151ee90f36aa10db..7a007a6ba50349395ef6643d4c78dbf549a02812 100644 (file)
@@ -577,6 +577,13 @@ ShutdownCLOG(void)
        /* Flush dirty CLOG pages to disk */
        TRACE_POSTGRESQL_CLOG_CHECKPOINT_START(false);
        SimpleLruFlush(ClogCtl, false);
+
+       /*
+        * fsync pg_xact to ensure that any files flushed previously are durably
+        * on disk.
+        */
+       fsync_fname("pg_xact", true);
+
        TRACE_POSTGRESQL_CLOG_CHECKPOINT_DONE(false);
 }
 
@@ -589,6 +596,13 @@ CheckPointCLOG(void)
        /* Flush dirty CLOG pages to disk */
        TRACE_POSTGRESQL_CLOG_CHECKPOINT_START(true);
        SimpleLruFlush(ClogCtl, true);
+
+       /*
+        * fsync pg_xact to ensure that any files flushed previously are durably
+        * on disk.
+        */
+       fsync_fname("pg_xact", true);
+
        TRACE_POSTGRESQL_CLOG_CHECKPOINT_DONE(true);
 }
 
index 8e1df6e0eabefa798a2be9c59c1124c60db0fdc8..03ffa20908404481f4b19d55cd909d58a01d0c9d 100644 (file)
@@ -746,6 +746,12 @@ ShutdownCommitTs(void)
 {
        /* Flush dirty CommitTs pages to disk */
        SimpleLruFlush(CommitTsCtl, false);
+
+       /*
+        * fsync pg_commit_ts to ensure that any files flushed previously are durably
+        * on disk.
+        */
+       fsync_fname("pg_commit_ts", true);
 }
 
 /*
@@ -756,6 +762,12 @@ CheckPointCommitTs(void)
 {
        /* Flush dirty CommitTs pages to disk */
        SimpleLruFlush(CommitTsCtl, true);
+
+       /*
+        * fsync pg_commit_ts to ensure that any files flushed previously are durably
+        * on disk.
+        */
+       fsync_fname("pg_commit_ts", true);
 }
 
 /*
index 4b4999fd7b498bd75d196d222f6501df33ed104f..83169cccc301179a601b33d7ae0f87145ddd2450 100644 (file)
@@ -1650,6 +1650,14 @@ CheckPointTwoPhase(XLogRecPtr redo_horizon)
        }
        LWLockRelease(TwoPhaseStateLock);
 
+       /*
+        * Flush unconditionally the parent directory to make any information
+        * durable on disk.  Two-phase files could have been removed and those
+        * removals need to be made persistent as well as any files newly created
+        * previously since the last checkpoint.
+        */
+       fsync_fname(TWOPHASE_DIR, true);
+
        TRACE_POSTGRESQL_TWOPHASE_CHECKPOINT_DONE();
 
        if (log_checkpoints && serialized_xacts > 0)
index 58790e0e96e29a77c72211c1f20118ce95608daa..61ca81d1d2437328deec23462651677a87c786d9 100644 (file)
@@ -3475,7 +3475,7 @@ InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
        if (!find_free)
        {
                /* Force installation: get rid of any pre-existing segment file */
-               unlink(path);
+               durable_unlink(path, DEBUG1);
        }
        else
        {
@@ -4026,16 +4026,13 @@ RemoveXlogFile(const char *segname, XLogRecPtr PriorRedoPtr, XLogRecPtr endptr)
                                          path)));
                        return;
                }
-               rc = unlink(newpath);
+               rc = durable_unlink(newpath, LOG);
 #else
-               rc = unlink(path);
+               rc = durable_unlink(path, LOG);
 #endif
                if (rc != 0)
                {
-                       ereport(LOG,
-                                       (errcode_for_file_access(),
-                          errmsg("could not remove old transaction log file \"%s\": %m",
-                                         path)));
+                       /* Message already logged by durable_unlink() */
                        return;
                }
                CheckpointStats.ckpt_segs_removed++;
@@ -10771,17 +10768,13 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
                                                (errcode_for_file_access(),
                                                 errmsg("could not read file \"%s\": %m",
                                                                BACKUP_LABEL_FILE)));
-                       if (unlink(BACKUP_LABEL_FILE) != 0)
-                               ereport(ERROR,
-                                               (errcode_for_file_access(),
-                                                errmsg("could not remove file \"%s\": %m",
-                                                               BACKUP_LABEL_FILE)));
+                       durable_unlink(BACKUP_LABEL_FILE, ERROR);
 
                        /*
                         * Remove tablespace_map file if present, it is created only if there
                         * are tablespaces.
                         */
-                       unlink(TABLESPACE_MAP);
+                       durable_unlink(TABLESPACE_MAP, DEBUG1);
                }
                PG_END_ENSURE_ERROR_CLEANUP(pg_stop_backup_callback, (Datum) BoolGetDatum(exclusive));
        }
index f0ed2e9b5f4d05c0aa20f7d8f0c4bdf0a4683b30..b14979496c79d3ea5df80682456fdf95982ac167 100644 (file)
@@ -657,6 +657,43 @@ durable_rename(const char *oldfile, const char *newfile, int elevel)
        return 0;
 }
 
+/*
+ * durable_unlink -- remove a file in a durable manner
+ *
+ * This routine ensures that, after returning, the effect of removing file
+ * persists in case of a crash. A crash while this routine is running will
+ * leave the system in no mixed state.
+ *
+ * It does so by using fsync on the parent directory of the file after the
+ * actual removal is done.
+ *
+ * Log errors with the severity specified by caller.
+ *
+ * Returns 0 if the operation succeeded, -1 otherwise. Note that errno is not
+ * valid upon return.
+ */
+int
+durable_unlink(const char *fname, int elevel)
+{
+       if (unlink(fname) < 0)
+       {
+               ereport(elevel,
+                               (errcode_for_file_access(),
+                                errmsg("could not remove file \"%s\": %m",
+                                               fname)));
+               return -1;
+       }
+
+       /*
+        * To guarantee that the removal of the file is persistent, fsync
+        * its parent directory.
+        */
+       if (fsync_parent_path(fname, elevel) != 0)
+               return -1;
+
+       return 0;
+}
+
 /*
  * durable_link_or_rename -- rename a file in a durable manner.
  *
index ac37502928a7804e7efaba5d02744d73c20cbe62..05680499e4e532c61e6e832815a2f41839ee184f 100644 (file)
@@ -119,6 +119,7 @@ extern int  pg_fdatasync(int fd);
 extern void pg_flush_data(int fd, off_t offset, off_t amount);
 extern void fsync_fname(const char *fname, bool isdir);
 extern int     durable_rename(const char *oldfile, const char *newfile, int loglevel);
+extern int     durable_unlink(const char *fname, int loglevel);
 extern int     durable_link_or_rename(const char *oldfile, const char *newfile, int loglevel);
 extern void SyncDataDirectory(void);