From fc49e24fa69a15efacd5b8958115ed9c43c48f9a Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Tue, 19 Sep 2017 22:03:48 -0700 Subject: [PATCH] Make WAL segment size configurable at initdb time. For performance reasons a larger segment size than the default 16MB can be useful. A larger segment size has two main benefits: Firstly, in setups using archiving, it makes it easier to write scripts that can keep up with higher amounts of WAL, secondly, the WAL has to be written and synced to disk less frequently. But at the same time large segment size are disadvantageous for smaller databases. So far the segment size had to be configured at compile time, often making it unrealistic to choose one fitting to a particularly load. Therefore change it to a initdb time setting. This includes a breaking changes to the xlogreader.h API, which now requires the current segment size to be configured. For that and similar reasons a number of binaries had to be taught how to recognize the current segment size. Author: Beena Emerson, editorialized by Andres Freund Reviewed-By: Andres Freund, David Steele, Kuntal Ghosh, Michael Paquier, Peter Eisentraut, Robert Hass, Tushar Ahuja Discussion: https://postgr.es/m/CAOG9ApEAcQ--1ieKbhFzXSQPw_YLmepaa4hNdnY5+ZULpt81Mw@mail.gmail.com --- configure | 54 ---- configure.in | 31 --- contrib/pg_standby/pg_standby.c | 115 +++++++- doc/src/sgml/backup.sgml | 2 +- doc/src/sgml/installation.sgml | 14 - doc/src/sgml/ref/initdb.sgml | 15 ++ doc/src/sgml/wal.sgml | 13 +- src/backend/access/transam/twophase.c | 3 +- src/backend/access/transam/xlog.c | 255 +++++++++++------- src/backend/access/transam/xlogarchive.c | 14 +- src/backend/access/transam/xlogfuncs.c | 10 +- src/backend/access/transam/xlogreader.c | 32 +-- src/backend/access/transam/xlogutils.c | 36 ++- src/backend/bootstrap/bootstrap.c | 15 +- src/backend/postmaster/checkpointer.c | 5 +- src/backend/replication/basebackup.c | 34 +-- src/backend/replication/logical/logical.c | 2 +- .../replication/logical/reorderbuffer.c | 19 +- src/backend/replication/slot.c | 2 +- src/backend/replication/walreceiver.c | 14 +- src/backend/replication/walreceiverfuncs.c | 4 +- src/backend/replication/walsender.c | 16 +- src/backend/utils/misc/guc.c | 20 +- src/backend/utils/misc/pg_controldata.c | 5 +- src/backend/utils/misc/postgresql.conf.sample | 2 +- src/bin/initdb/initdb.c | 58 +++- src/bin/pg_basebackup/pg_basebackup.c | 7 +- src/bin/pg_basebackup/pg_receivewal.c | 16 +- src/bin/pg_basebackup/receivelog.c | 36 +-- src/bin/pg_basebackup/streamutil.c | 76 ++++++ src/bin/pg_basebackup/streamutil.h | 2 + src/bin/pg_controldata/pg_controldata.c | 15 +- src/bin/pg_resetwal/pg_resetwal.c | 55 ++-- src/bin/pg_rewind/parsexlog.c | 30 ++- src/bin/pg_rewind/pg_rewind.c | 12 +- src/bin/pg_rewind/pg_rewind.h | 1 + src/bin/pg_test_fsync/pg_test_fsync.c | 7 +- src/bin/pg_upgrade/test.sh | 4 +- src/bin/pg_waldump/pg_waldump.c | 246 ++++++++++++----- src/include/access/xlog.h | 1 + src/include/access/xlog_internal.h | 76 +++--- src/include/access/xlogreader.h | 8 +- src/include/catalog/pg_control.h | 2 +- src/include/pg_config.h.in | 5 - src/include/pg_config_manual.h | 6 + src/tools/msvc/Solution.pm | 2 - 46 files changed, 897 insertions(+), 500 deletions(-) diff --git a/configure b/configure index 0d76e5ea42..5c38149a3d 100755 --- a/configure +++ b/configure @@ -821,7 +821,6 @@ enable_tap_tests with_blocksize with_segsize with_wal_blocksize -with_wal_segsize with_CC enable_depend enable_cassert @@ -1518,8 +1517,6 @@ Optional Packages: --with-segsize=SEGSIZE set table segment size in GB [1] --with-wal-blocksize=BLOCKSIZE set WAL block size in kB [8] - --with-wal-segsize=SEGSIZE - set WAL segment size in MB [16] --with-CC=CMD set compiler (deprecated) --with-icu build with ICU support --with-tcl build Tcl modules (PL/Tcl) @@ -3733,57 +3730,6 @@ cat >>confdefs.h <<_ACEOF _ACEOF -# -# WAL segment size -# -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for WAL segment size" >&5 -$as_echo_n "checking for WAL segment size... " >&6; } - - - -# Check whether --with-wal-segsize was given. -if test "${with_wal_segsize+set}" = set; then : - withval=$with_wal_segsize; - case $withval in - yes) - as_fn_error $? "argument required for --with-wal-segsize option" "$LINENO" 5 - ;; - no) - as_fn_error $? "argument required for --with-wal-segsize option" "$LINENO" 5 - ;; - *) - wal_segsize=$withval - ;; - esac - -else - wal_segsize=16 -fi - - -case ${wal_segsize} in - 1) ;; - 2) ;; - 4) ;; - 8) ;; - 16) ;; - 32) ;; - 64) ;; - 128) ;; - 256) ;; - 512) ;; - 1024) ;; - *) as_fn_error $? "Invalid WAL segment size. Allowed values are 1,2,4,8,16,32,64,128,256,512,1024." "$LINENO" 5 -esac -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: ${wal_segsize}MB" >&5 -$as_echo "${wal_segsize}MB" >&6; } - - -cat >>confdefs.h <<_ACEOF -#define XLOG_SEG_SIZE (${wal_segsize} * 1024 * 1024) -_ACEOF - - # # C compiler # diff --git a/configure.in b/configure.in index bdc41b071f..176b29a792 100644 --- a/configure.in +++ b/configure.in @@ -343,37 +343,6 @@ AC_DEFINE_UNQUOTED([XLOG_BLCKSZ], ${XLOG_BLCKSZ}, [ Changing XLOG_BLCKSZ requires an initdb. ]) -# -# WAL segment size -# -AC_MSG_CHECKING([for WAL segment size]) -PGAC_ARG_REQ(with, wal-segsize, [SEGSIZE], [set WAL segment size in MB [16]], - [wal_segsize=$withval], - [wal_segsize=16]) -case ${wal_segsize} in - 1) ;; - 2) ;; - 4) ;; - 8) ;; - 16) ;; - 32) ;; - 64) ;; - 128) ;; - 256) ;; - 512) ;; - 1024) ;; - *) AC_MSG_ERROR([Invalid WAL segment size. Allowed values are 1,2,4,8,16,32,64,128,256,512,1024.]) -esac -AC_MSG_RESULT([${wal_segsize}MB]) - -AC_DEFINE_UNQUOTED([XLOG_SEG_SIZE], [(${wal_segsize} * 1024 * 1024)], [ - XLOG_SEG_SIZE is the size of a single WAL file. This must be a power of 2 - and larger than XLOG_BLCKSZ (preferably, a great deal larger than - XLOG_BLCKSZ). - - Changing XLOG_SEG_SIZE requires an initdb. -]) - # # C compiler # diff --git a/contrib/pg_standby/pg_standby.c b/contrib/pg_standby/pg_standby.c index d7fa2a80c6..6aeca6e8f7 100644 --- a/contrib/pg_standby/pg_standby.c +++ b/contrib/pg_standby/pg_standby.c @@ -36,6 +36,8 @@ const char *progname; +int WalSegSz = -1; + /* Options and defaults */ int sleeptime = 5; /* amount of time to sleep between file checks */ int waittime = -1; /* how long we have been waiting, -1 no wait @@ -100,6 +102,10 @@ int nextWALFileType; struct stat stat_buf; +static bool SetWALFileNameForCleanup(void); +static bool SetWALSegSize(void); + + /* ===================================================================== * * Customizable section @@ -175,6 +181,35 @@ CustomizableNextWALFileReady(void) { if (stat(WALFilePath, &stat_buf) == 0) { + /* + * If we've not seen any WAL segments, we don't know the WAL segment + * size, which we need. If it looks like a WAL segment, determine size + * of segments for the cluster. + */ + if (WalSegSz == -1 && IsXLogFileName(nextWALFileName)) + { + if (SetWALSegSize()) + { + /* + * Successfully determined WAL segment size. Can compute + * cleanup cutoff now. + */ + need_cleanup = SetWALFileNameForCleanup(); + if (debug) + { + fprintf(stderr, + _("WAL segment size: %d \n"), WalSegSz); + fprintf(stderr, "Keep archive history: "); + + if (need_cleanup) + fprintf(stderr, "%s and later\n", + exclusiveCleanupFileName); + else + fprintf(stderr, "no cleanup required\n"); + } + } + } + /* * If it's a backup file, return immediately. If it's a regular file * return only if it's the right size already. @@ -184,7 +219,7 @@ CustomizableNextWALFileReady(void) nextWALFileType = XLOG_BACKUP_LABEL; return true; } - else if (stat_buf.st_size == XLOG_SEG_SIZE) + else if (WalSegSz > 0 && stat_buf.st_size == WalSegSz) { #ifdef WIN32 @@ -204,7 +239,7 @@ CustomizableNextWALFileReady(void) /* * If still too small, wait until it is the correct size */ - if (stat_buf.st_size > XLOG_SEG_SIZE) + if (WalSegSz > 0 && stat_buf.st_size > WalSegSz) { if (debug) { @@ -218,8 +253,6 @@ CustomizableNextWALFileReady(void) return false; } -#define MaxSegmentsPerLogFile ( 0xFFFFFFFF / XLOG_SEG_SIZE ) - static void CustomizableCleanupPriorWALFiles(void) { @@ -315,6 +348,7 @@ SetWALFileNameForCleanup(void) uint32 log_diff = 0, seg_diff = 0; bool cleanup = false; + int max_segments_per_logfile = (0xFFFFFFFF / WalSegSz); if (restartWALFileName) { @@ -336,12 +370,12 @@ SetWALFileNameForCleanup(void) sscanf(nextWALFileName, "%08X%08X%08X", &tli, &log, &seg); if (tli > 0 && seg > 0) { - log_diff = keepfiles / MaxSegmentsPerLogFile; - seg_diff = keepfiles % MaxSegmentsPerLogFile; + log_diff = keepfiles / max_segments_per_logfile; + seg_diff = keepfiles % max_segments_per_logfile; if (seg_diff > seg) { log_diff++; - seg = MaxSegmentsPerLogFile - (seg_diff - seg); + seg = max_segments_per_logfile - (seg_diff - seg); } else seg -= seg_diff; @@ -364,6 +398,66 @@ SetWALFileNameForCleanup(void) return cleanup; } +/* + * Try to set the wal segment size from the WAL file specified by WALFilePath. + * + * Return true if size could be determined, false otherwise. + */ +static bool +SetWALSegSize(void) +{ + bool ret_val = false; + int fd; + char *buf = (char *) malloc(XLOG_BLCKSZ); + + Assert(WalSegSz == -1); + + if ((fd = open(WALFilePath, O_RDWR, 0)) < 0) + { + fprintf(stderr, "%s: couldn't open WAL file \"%s\"\n", + progname, WALFilePath); + return false; + } + if (read(fd, buf, XLOG_BLCKSZ) == XLOG_BLCKSZ) + { + XLogLongPageHeader longhdr = (XLogLongPageHeader) buf; + + WalSegSz = longhdr->xlp_seg_size; + + if (IsValidWalSegSize(WalSegSz)) + { + /* successfully retrieved WAL segment size */ + ret_val = true; + } + else + fprintf(stderr, + "%s: WAL segment size must be a power of two between 1MB and 1GB, but the WAL file header specifies %d bytes\n", + progname, WalSegSz); + close(fd); + } + else + { + /* + * Don't complain loudly, this is to be expected for segments being + * created. + */ + if (errno != 0) + { + if (debug) + fprintf(stderr, "could not read file \"%s\": %s", + WALFilePath, strerror(errno)); + } + else + { + if (debug) + fprintf(stderr, "not enough data in file \"%s\"", WALFilePath); + } + } + + fflush(stderr); + return ret_val; +} + /* * CheckForExternalTrigger() * @@ -708,8 +802,6 @@ main(int argc, char **argv) CustomizableInitialize(); - need_cleanup = SetWALFileNameForCleanup(); - if (debug) { fprintf(stderr, "Trigger file: %s\n", triggerPath ? triggerPath : ""); @@ -721,11 +813,6 @@ main(int argc, char **argv) fprintf(stderr, "Max wait interval: %d %s\n", maxwaittime, (maxwaittime > 0 ? "seconds" : "forever")); fprintf(stderr, "Command for restore: %s\n", restoreCommand); - fprintf(stderr, "Keep archive history: "); - if (need_cleanup) - fprintf(stderr, "%s and later\n", exclusiveCleanupFileName); - else - fprintf(stderr, "no cleanup required\n"); fflush(stderr); } diff --git a/doc/src/sgml/backup.sgml b/doc/src/sgml/backup.sgml index 95aeb35507..bd55e8bb77 100644 --- a/doc/src/sgml/backup.sgml +++ b/doc/src/sgml/backup.sgml @@ -562,7 +562,7 @@ tar -cf backup.tar /usr/local/pgsql/data produces an indefinitely long sequence of WAL records. The system physically divides this sequence into WAL segment files, which are normally 16MB apiece (although the segment size - can be altered when building PostgreSQL). The segment + can be altered during initdb). The segment files are given numeric names that reflect their position in the abstract WAL sequence. When not using WAL archiving, the system normally creates just a few segment files and then diff --git a/doc/src/sgml/installation.sgml b/doc/src/sgml/installation.sgml index b178d3074b..a1bae95145 100644 --- a/doc/src/sgml/installation.sgml +++ b/doc/src/sgml/installation.sgml @@ -1058,20 +1058,6 @@ su - postgres - - - - - Set the WAL segment size, in megabytes. This is - the size of each individual file in the WAL log. It may be useful - to adjust this size to control the granularity of WAL log shipping. - The default size is 16 megabytes. - The value must be a power of 2 between 1 and 1024 (megabytes). - Note that changing this value requires an initdb. - - - - diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml index 6efb2e442d..732fecab8e 100644 --- a/doc/src/sgml/ref/initdb.sgml +++ b/doc/src/sgml/ref/initdb.sgml @@ -316,6 +316,21 @@ PostgreSQL documentation + + + + Set the WAL segment size, in megabytes. This is + the size of each individual file in the WAL log. It may be useful + to adjust this size to control the granularity of WAL log shipping. + This option can only be set during initialization, and cannot be + changed later. + The default size is 16 megabytes. + The value must be a power of 2 between 1 and 1024 (megabytes). + + + + + diff --git a/doc/src/sgml/wal.sgml b/doc/src/sgml/wal.sgml index 940c37b21a..ddcef5fbf5 100644 --- a/doc/src/sgml/wal.sgml +++ b/doc/src/sgml/wal.sgml @@ -752,13 +752,12 @@ WAL logs are stored in the directory pg_wal under the data directory, as a set of segment files, normally each 16 MB in size (but the size can be changed - by altering the