From bc34223bc1e2c51dff2007b3d3bd492a09b5a491 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut Date: Thu, 29 Sep 2016 12:00:00 -0400 Subject: [PATCH] pg_basebackup pg_receivexlog: Issue fsync more carefully Several places weren't careful about fsyncing in the way. See 1d4a0ab1 and 606e0f98 for details about required fsyncs. This adds a couple of functions in src/common/ that have an equivalent in the backend: durable_rename(), fsync_parent_path() From: Michael Paquier --- src/bin/pg_basebackup/pg_basebackup.c | 27 +++++++ src/bin/pg_basebackup/receivelog.c | 52 +++++++------ src/common/file_utils.c | 105 ++++++++++++++++++++++++-- src/include/common/file_utils.h | 7 +- 4 files changed, 162 insertions(+), 29 deletions(-) diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index d077544d62..cd7d095103 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -27,6 +27,7 @@ #include #endif +#include "common/file_utils.h" #include "common/string.h" #include "fe_utils/string_utils.h" #include "getopt_long.h" @@ -1196,6 +1197,10 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum) if (copybuf != NULL) PQfreemem(copybuf); + + /* sync the resulting tar file, errors are not considered fatal */ + if (strcmp(basedir, "-") != 0) + (void) fsync_fname(filename, false, progname); } @@ -1472,6 +1477,11 @@ ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum) if (basetablespace && writerecoveryconf) WriteRecoveryConf(); + + /* + * No data is synced here, everything is done for all tablespaces at the + * end. + */ } /* @@ -1950,6 +1960,23 @@ BaseBackup(void) PQclear(res); PQfinish(conn); + /* + * Make data persistent on disk once backup is completed. For tar + * format once syncing the parent directory is fine, each tar file + * created per tablespace has been already synced. In plain format, + * all the data of the base directory is synced, taking into account + * all the tablespaces. Errors are not considered fatal. + */ + if (format == 't') + { + if (strcmp(basedir, "-") != 0) + (void) fsync_fname(basedir, true, progname); + } + else + { + (void) fsync_pgdata(basedir, progname); + } + if (verbose) fprintf(stderr, "%s: base backup completed\n", progname); } diff --git a/src/bin/pg_basebackup/receivelog.c b/src/bin/pg_basebackup/receivelog.c index 3a921ebf2d..6b78a60f27 100644 --- a/src/bin/pg_basebackup/receivelog.c +++ b/src/bin/pg_basebackup/receivelog.c @@ -26,6 +26,7 @@ #include "libpq-fe.h" #include "access/xlog_internal.h" +#include "common/file_utils.h" /* fd and filename for currently open WAL file */ @@ -71,17 +72,13 @@ mark_file_as_archived(const char *basedir, const char *fname) return false; } - if (fsync(fd) != 0) - { - fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"), - progname, tmppath, strerror(errno)); - - close(fd); + close(fd); + if (fsync_fname(tmppath, false, progname) != 0) return false; - } - close(fd); + if (fsync_parent_path(tmppath, progname) != 0) + return false; return true; } @@ -132,6 +129,16 @@ open_walfile(StreamCtl *stream, XLogRecPtr startpoint) { /* File is open and ready to use */ walfile = f; + + /* + * fsync, in case of a previous crash between padding and fsyncing the + * file. + */ + if (fsync_fname(fn, false, progname) != 0) + return false; + if (fsync_parent_path(fn, progname) != 0) + return false; + return true; } if (statbuf.st_size != 0) @@ -160,6 +167,17 @@ open_walfile(StreamCtl *stream, XLogRecPtr startpoint) } free(zerobuf); + /* + * fsync WAL file and containing directory, to ensure the file is + * persistently created and zeroed. That's particularly important when + * using synchronous mode, where the file is modified and fsynced + * in-place, without a directory fsync. + */ + if (fsync_fname(fn, false, progname) != 0) + return false; + if (fsync_parent_path(fn, progname) != 0) + return false; + if (lseek(f, SEEK_SET, 0) != 0) { fprintf(stderr, @@ -220,10 +238,9 @@ close_walfile(StreamCtl *stream, XLogRecPtr pos) snprintf(oldfn, sizeof(oldfn), "%s/%s%s", stream->basedir, current_walfile_name, stream->partial_suffix); snprintf(newfn, sizeof(newfn), "%s/%s", stream->basedir, current_walfile_name); - if (rename(oldfn, newfn) != 0) + if (durable_rename(oldfn, newfn, progname) != 0) { - fprintf(stderr, _("%s: could not rename file \"%s\": %s\n"), - progname, current_walfile_name, strerror(errno)); + /* durable_rename produced a log entry */ return false; } } @@ -341,14 +358,6 @@ writeTimeLineHistoryFile(StreamCtl *stream, char *filename, char *content) return false; } - if (fsync(fd) != 0) - { - close(fd); - fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"), - progname, tmppath, strerror(errno)); - return false; - } - if (close(fd) != 0) { fprintf(stderr, _("%s: could not close file \"%s\": %s\n"), @@ -359,10 +368,9 @@ writeTimeLineHistoryFile(StreamCtl *stream, char *filename, char *content) /* * Now move the completed history file into place with its final name. */ - if (rename(tmppath, path) < 0) + if (durable_rename(tmppath, path, progname) < 0) { - fprintf(stderr, _("%s: could not rename file \"%s\" to \"%s\": %s\n"), - progname, tmppath, path, strerror(errno)); + /* durable_rename produced a log entry */ return false; } diff --git a/src/common/file_utils.c b/src/common/file_utils.c index b6f62f7bf1..04cd365e76 100644 --- a/src/common/file_utils.c +++ b/src/common/file_utils.c @@ -34,7 +34,7 @@ static void pre_sync_fname(const char *fname, bool isdir, const char *progname); #endif static void walkdir(const char *path, - void (*action) (const char *fname, bool isdir, const char *progname), + int (*action) (const char *fname, bool isdir, const char *progname), bool process_symlinks, const char *progname); /* @@ -120,7 +120,7 @@ fsync_pgdata(const char *pg_data, const char *progname) */ static void walkdir(const char *path, - void (*action) (const char *fname, bool isdir, const char *progname), + int (*action) (const char *fname, bool isdir, const char *progname), bool process_symlinks, const char *progname) { DIR *dir; @@ -228,7 +228,7 @@ pre_sync_fname(const char *fname, bool isdir, const char *progname) * directories on systems where that isn't allowed/required. Reports * other errors non-fatally. */ -void +int fsync_fname(const char *fname, bool isdir, const char *progname) { int fd; @@ -256,10 +256,10 @@ fsync_fname(const char *fname, bool isdir, const char *progname) if (fd < 0) { if (errno == EACCES || (isdir && errno == EISDIR)) - return; + return 0; fprintf(stderr, _("%s: could not open file \"%s\": %s\n"), progname, fname, strerror(errno)); - return; + return -1; } returncode = fsync(fd); @@ -269,8 +269,103 @@ fsync_fname(const char *fname, bool isdir, const char *progname) * those errors. Anything else needs to be reported. */ if (returncode != 0 && !(isdir && errno == EBADF)) + { fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"), progname, fname, strerror(errno)); + return -1; + } (void) close(fd); + return 0; +} + +/* + * fsync_parent_path -- fsync the parent path of a file or directory + * + * This is aimed at making file operations persistent on disk in case of + * an OS crash or power failure. + */ +int +fsync_parent_path(const char *fname, const char *progname) +{ + char parentpath[MAXPGPATH]; + + strlcpy(parentpath, fname, MAXPGPATH); + get_parent_directory(parentpath); + + /* + * get_parent_directory() returns an empty string if the input argument is + * just a file name (see comments in path.c), so handle that as being the + * current directory. + */ + if (strlen(parentpath) == 0) + strlcpy(parentpath, ".", MAXPGPATH); + + if (fsync_fname(parentpath, true, progname) != 0) + return -1; + + return 0; +} + +/* + * durable_rename -- rename(2) wrapper, issuing fsyncs required for durability + * + * Wrapper around rename, similar to the backend version. + */ +int +durable_rename(const char *oldfile, const char *newfile, const char *progname) +{ + int fd; + + /* + * First fsync the old and target path (if it exists), to ensure that they + * are properly persistent on disk. Syncing the target file is not + * strictly necessary, but it makes it easier to reason about crashes; + * because it's then guaranteed that either source or target file exists + * after a crash. + */ + if (fsync_fname(oldfile, false, progname) != 0) + return -1; + + fd = open(newfile, PG_BINARY | O_RDWR, 0); + if (fd < 0) + { + if (errno != ENOENT) + { + fprintf(stderr, _("%s: could not open file \"%s\": %s\n"), + progname, newfile, strerror(errno)); + return -1; + } + } + else + { + if (fsync(fd) != 0) + { + fprintf(stderr, _("%s: could not fsync file \"%s\": %s\n"), + progname, newfile, strerror(errno)); + close(fd); + return -1; + } + close(fd); + } + + /* Time to do the real deal... */ + if (rename(oldfile, newfile) != 0) + { + fprintf(stderr, _("%s: could not rename file \"%s\" to \"%s\": %s\n"), + progname, oldfile, newfile, strerror(errno)); + return -1; + } + + /* + * To guarantee renaming the file is persistent, fsync the file with its + * new name, and its containing directory. + */ + if (fsync_fname(newfile, false, progname) != 0) + return -1; + + if (fsync_parent_path(newfile, progname) != 0) + return -1; + + return 0; } diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h index d3794df7a0..1cb263d9e2 100644 --- a/src/include/common/file_utils.h +++ b/src/include/common/file_utils.h @@ -15,8 +15,11 @@ #ifndef FILE_UTILS_H #define FILE_UTILS_H -extern void fsync_fname(const char *fname, bool isdir, - const char *progname); +extern int fsync_fname(const char *fname, bool isdir, + const char *progname); extern void fsync_pgdata(const char *pg_data, const char *progname); +extern int durable_rename(const char *oldfile, const char *newfile, + const char *progname); +extern int fsync_parent_path(const char *fname, const char *progname); #endif /* FILE_UTILS_H */ -- 2.40.0