From 8b77981f89bc6734dd35772a0130302f323c9cd3 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 8 Dec 2010 20:01:29 -0500 Subject: [PATCH] Force default wal_sync_method to be fdatasync on Linux. Recent versions of the Linux system header files cause xlogdefs.h to believe that open_datasync should be the default sync method, whereas formerly fdatasync was the default on Linux. open_datasync is a bad choice, first because it doesn't actually outperform fdatasync (in fact the reverse), and second because we try to use O_DIRECT with it, causing failures on certain filesystems (e.g., ext4 with data=journal option). This part of the patch is largely per a proposal from Marti Raudsepp. More extensive changes are likely to follow in HEAD, but this is as much change as we want to back-patch. Also clean up confusing code and incorrect documentation surrounding the fsync_writethrough option. Those changes shouldn't result in any actual behavioral change, but I chose to back-patch them anyway to keep the branches looking similar in this area. In 9.0 and HEAD, also do some copy-editing on the WAL Reliability documentation section. Back-patch to all supported branches, since any of them might get used on modern Linux versions. --- doc/src/sgml/config.sgml | 9 +++++---- src/backend/access/transam/xlog.c | 10 +++++----- src/backend/storage/file/fd.c | 9 +++++---- src/backend/utils/misc/postgresql.conf.sample | 2 +- src/include/port/linux.h | 10 ++++++++++ src/include/port/win32.h | 14 +++++++++----- 6 files changed, 35 insertions(+), 19 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 15645c31d0..ffc0d9cc14 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -1404,12 +1404,12 @@ SET ENABLE_SEQSCAN TO OFF; - fsync_writethrough (call fsync() at each commit, forcing write-through of any disk write cache) + fsync (call fsync() at each commit) - fsync (call fsync() at each commit) + fsync_writethrough (call fsync() at each commit, forcing write-through of any disk write cache) @@ -1419,10 +1419,11 @@ SET ENABLE_SEQSCAN TO OFF; + The open_* options also use O_DIRECT if available. Not all of these choices are available on all platforms. The default is the first method in the above list that is supported - by the platform. - The open_* options also use O_DIRECT if available. + by the platform, except that fdatasync is the default on + Linux. This parameter can only be set in the postgresql.conf file or on the server command line. diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 024093c51f..594437badc 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -93,7 +93,11 @@ #endif #endif -#if defined(OPEN_DATASYNC_FLAG) +#if defined(PLATFORM_DEFAULT_SYNC_METHOD) +#define DEFAULT_SYNC_METHOD_STR PLATFORM_DEFAULT_SYNC_METHOD_STR +#define DEFAULT_SYNC_METHOD PLATFORM_DEFAULT_SYNC_METHOD +#define DEFAULT_SYNC_FLAGBIT PLATFORM_DEFAULT_SYNC_FLAGBIT +#elif defined(OPEN_DATASYNC_FLAG) #define DEFAULT_SYNC_METHOD_STR "open_datasync" #define DEFAULT_SYNC_METHOD SYNC_METHOD_OPEN #define DEFAULT_SYNC_FLAGBIT OPEN_DATASYNC_FLAG @@ -101,10 +105,6 @@ #define DEFAULT_SYNC_METHOD_STR "fdatasync" #define DEFAULT_SYNC_METHOD SYNC_METHOD_FDATASYNC #define DEFAULT_SYNC_FLAGBIT 0 -#elif defined(HAVE_FSYNC_WRITETHROUGH_ONLY) -#define DEFAULT_SYNC_METHOD_STR "fsync_writethrough" -#define DEFAULT_SYNC_METHOD SYNC_METHOD_FSYNC_WRITETHROUGH -#define DEFAULT_SYNC_FLAGBIT 0 #else #define DEFAULT_SYNC_METHOD_STR "fsync" #define DEFAULT_SYNC_METHOD SYNC_METHOD_FSYNC diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 127d7b605d..052007dca6 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -237,12 +237,13 @@ static void RemovePgTempFilesInDir(const char *tmpdirname); int pg_fsync(int fd) { -#ifndef HAVE_FSYNC_WRITETHROUGH_ONLY - if (sync_method != SYNC_METHOD_FSYNC_WRITETHROUGH) - return pg_fsync_no_writethrough(fd); + /* #if is to skip the sync_method test if there's no need for it */ +#if defined(HAVE_FSYNC_WRITETHROUGH) && !defined(FSYNC_WRITETHROUGH_IS_FSYNC) + if (sync_method == SYNC_METHOD_FSYNC_WRITETHROUGH) + return pg_fsync_writethrough(fd); else #endif - return pg_fsync_writethrough(fd); + return pg_fsync_no_writethrough(fd); } diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 115b3765ff..83c5b8678f 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -153,7 +153,7 @@ #wal_sync_method = fsync # the default is the first option # supported by the operating system: # open_datasync - # fdatasync + # fdatasync (default on Linux) # fsync # fsync_writethrough # open_sync diff --git a/src/include/port/linux.h b/src/include/port/linux.h index 191338c99f..30e178db55 100644 --- a/src/include/port/linux.h +++ b/src/include/port/linux.h @@ -12,3 +12,13 @@ * to have a kernel version test here. */ #define HAVE_LINUX_EIDRM_BUG + +/* + * Set the default wal_sync_method to fdatasync. With recent Linux versions, + * xlogdefs.h's normal rules will prefer open_datasync, which (a) doesn't + * perform better and (b) causes outright failures on ext4 data=journal + * filesystems, because those don't support O_DIRECT. + */ +#define PLATFORM_DEFAULT_SYNC_METHOD_STR "fdatasync" +#define PLATFORM_DEFAULT_SYNC_METHOD SYNC_METHOD_FDATASYNC +#define PLATFORM_DEFAULT_SYNC_FLAGBIT 0 diff --git a/src/include/port/win32.h b/src/include/port/win32.h index 50c711e121..61c82e8d06 100644 --- a/src/include/port/win32.h +++ b/src/include/port/win32.h @@ -25,14 +25,18 @@ /* Must be here to avoid conflicting with prototype in windows.h */ #define mkdir(a,b) mkdir(a) -#define HAVE_FSYNC_WRITETHROUGH -#define HAVE_FSYNC_WRITETHROUGH_ONLY #define ftruncate(a,b) chsize(a,b) + +/* Windows doesn't have fsync() as such, use _commit() */ +#define fsync(fd) _commit(fd) + /* - * Even though we don't support 'fsync' as a wal_sync_method, - * we do fsync() a few other places where _commit() is just fine. + * For historical reasons, we allow setting wal_sync_method to + * fsync_writethrough on Windows, even though it's really identical to fsync + * (both code paths wind up at _commit()). */ -#define fsync(fd) _commit(fd) +#define HAVE_FSYNC_WRITETHROUGH +#define FSYNC_WRITETHROUGH_IS_FSYNC #define USES_WINSOCK -- 2.40.0