From 44dfc9cd71141759cefadcf3e2965e88188b24dc Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 8 Dec 2010 20:01:24 -0500 Subject: [PATCH] Force default wal_sync_method to be fdatasync on Linux. Recent versions of the Linux system header files cause xlogdefs.h to believe that open_datasync should be the default sync method, whereas formerly fdatasync was the default on Linux. open_datasync is a bad choice, first because it doesn't actually outperform fdatasync (in fact the reverse), and second because we try to use O_DIRECT with it, causing failures on certain filesystems (e.g., ext4 with data=journal option). This part of the patch is largely per a proposal from Marti Raudsepp. More extensive changes are likely to follow in HEAD, but this is as much change as we want to back-patch. Also clean up confusing code and incorrect documentation surrounding the fsync_writethrough option. Those changes shouldn't result in any actual behavioral change, but I chose to back-patch them anyway to keep the branches looking similar in this area. In 9.0 and HEAD, also do some copy-editing on the WAL Reliability documentation section. Back-patch to all supported branches, since any of them might get used on modern Linux versions. --- doc/src/sgml/config.sgml | 9 +++++---- src/backend/storage/file/fd.c | 9 +++++---- src/backend/utils/misc/postgresql.conf.sample | 2 +- src/include/access/xlogdefs.h | 10 +++++----- src/include/port/linux.h | 10 ++++++++++ src/include/port/win32.h | 14 +++++++++----- 6 files changed, 35 insertions(+), 19 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 46f870ca23..6890bd5f20 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -1469,12 +1469,12 @@ SET ENABLE_SEQSCAN TO OFF; - fsync_writethrough (call fsync() at each commit, forcing write-through of any disk write cache) + fsync (call fsync() at each commit) - fsync (call fsync() at each commit) + fsync_writethrough (call fsync() at each commit, forcing write-through of any disk write cache) @@ -1484,10 +1484,11 @@ SET ENABLE_SEQSCAN TO OFF; - Not all of these choices are available on all platforms. The open_* options also use O_DIRECT if available. + Not all of these choices are available on all platforms. The default is the first method in the above list that is supported - by the platform. The default is not necessarily ideal; it might be + by the platform, except that fdatasync is the default on + Linux. The default is not necessarily ideal; it might be necessary to change this setting or other aspects of your system configuration in order to create a crash-safe configuration or achieve optimal performance. diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 008abc5e43..aeb5e0fb78 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -247,12 +247,13 @@ static void RemovePgTempFilesInDir(const char *tmpdirname); int pg_fsync(int fd) { -#ifndef HAVE_FSYNC_WRITETHROUGH_ONLY - if (sync_method != SYNC_METHOD_FSYNC_WRITETHROUGH) - return pg_fsync_no_writethrough(fd); + /* #if is to skip the sync_method test if there's no need for it */ +#if defined(HAVE_FSYNC_WRITETHROUGH) && !defined(FSYNC_WRITETHROUGH_IS_FSYNC) + if (sync_method == SYNC_METHOD_FSYNC_WRITETHROUGH) + return pg_fsync_writethrough(fd); else #endif - return pg_fsync_writethrough(fd); + return pg_fsync_no_writethrough(fd); } diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 4487b21c93..abfbd09776 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -155,7 +155,7 @@ #wal_sync_method = fsync # the default is the first option # supported by the operating system: # open_datasync - # fdatasync + # fdatasync (default on Linux) # fsync # fsync_writethrough # open_sync diff --git a/src/include/access/xlogdefs.h b/src/include/access/xlogdefs.h index 4129025d37..e3a6e59b13 100644 --- a/src/include/access/xlogdefs.h +++ b/src/include/access/xlogdefs.h @@ -108,7 +108,11 @@ typedef uint32 TimeLineID; #endif #endif -#if defined(OPEN_DATASYNC_FLAG) +#if defined(PLATFORM_DEFAULT_SYNC_METHOD) +#define DEFAULT_SYNC_METHOD_STR PLATFORM_DEFAULT_SYNC_METHOD_STR +#define DEFAULT_SYNC_METHOD PLATFORM_DEFAULT_SYNC_METHOD +#define DEFAULT_SYNC_FLAGBIT PLATFORM_DEFAULT_SYNC_FLAGBIT +#elif defined(OPEN_DATASYNC_FLAG) #define DEFAULT_SYNC_METHOD_STR "open_datasync" #define DEFAULT_SYNC_METHOD SYNC_METHOD_OPEN #define DEFAULT_SYNC_FLAGBIT OPEN_DATASYNC_FLAG @@ -116,10 +120,6 @@ typedef uint32 TimeLineID; #define DEFAULT_SYNC_METHOD_STR "fdatasync" #define DEFAULT_SYNC_METHOD SYNC_METHOD_FDATASYNC #define DEFAULT_SYNC_FLAGBIT 0 -#elif defined(HAVE_FSYNC_WRITETHROUGH_ONLY) -#define DEFAULT_SYNC_METHOD_STR "fsync_writethrough" -#define DEFAULT_SYNC_METHOD SYNC_METHOD_FSYNC_WRITETHROUGH -#define DEFAULT_SYNC_FLAGBIT 0 #else #define DEFAULT_SYNC_METHOD_STR "fsync" #define DEFAULT_SYNC_METHOD SYNC_METHOD_FSYNC diff --git a/src/include/port/linux.h b/src/include/port/linux.h index 0f4432a4ef..629fe785df 100644 --- a/src/include/port/linux.h +++ b/src/include/port/linux.h @@ -12,3 +12,13 @@ * to have a kernel version test here. */ #define HAVE_LINUX_EIDRM_BUG + +/* + * Set the default wal_sync_method to fdatasync. With recent Linux versions, + * xlogdefs.h's normal rules will prefer open_datasync, which (a) doesn't + * perform better and (b) causes outright failures on ext4 data=journal + * filesystems, because those don't support O_DIRECT. + */ +#define PLATFORM_DEFAULT_SYNC_METHOD_STR "fdatasync" +#define PLATFORM_DEFAULT_SYNC_METHOD SYNC_METHOD_FDATASYNC +#define PLATFORM_DEFAULT_SYNC_FLAGBIT 0 diff --git a/src/include/port/win32.h b/src/include/port/win32.h index f374bd45f0..1de7ff31c2 100644 --- a/src/include/port/win32.h +++ b/src/include/port/win32.h @@ -34,14 +34,18 @@ /* Must be here to avoid conflicting with prototype in windows.h */ #define mkdir(a,b) mkdir(a) -#define HAVE_FSYNC_WRITETHROUGH -#define HAVE_FSYNC_WRITETHROUGH_ONLY #define ftruncate(a,b) chsize(a,b) + +/* Windows doesn't have fsync() as such, use _commit() */ +#define fsync(fd) _commit(fd) + /* - * Even though we don't support 'fsync' as a wal_sync_method, - * we do fsync() a few other places where _commit() is just fine. + * For historical reasons, we allow setting wal_sync_method to + * fsync_writethrough on Windows, even though it's really identical to fsync + * (both code paths wind up at _commit()). */ -#define fsync(fd) _commit(fd) +#define HAVE_FSYNC_WRITETHROUGH +#define FSYNC_WRITETHROUGH_IS_FSYNC #define USES_WINSOCK -- 2.50.0