]> granicus.if.org Git - postgresql/commitdiff
Add wal_recycle and wal_init_zero GUCs.
authorThomas Munro <tmunro@postgresql.org>
Tue, 2 Apr 2019 01:37:14 +0000 (14:37 +1300)
committerThomas Munro <tmunro@postgresql.org>
Tue, 2 Apr 2019 01:37:14 +0000 (14:37 +1300)
On at least ZFS, it can be beneficial to create new WAL files every
time and not to bother zero-filling them.  Since it's not clear which
other filesystems might benefit from one or both of those things,
add individual GUCs to control those two behaviors independently and
make only very general statements in the docs.

Author: Jerry Jelinek, with some adjustments by Thomas Munro
Reviewed-by: Alvaro Herrera, Andres Freund, Tomas Vondra, Robert Haas and others
Discussion: https://postgr.es/m/CACPQ5Fo00QR7LNAcd1ZjgoBi4y97%2BK760YABs0vQHH5dLdkkMA%40mail.gmail.com

doc/src/sgml/config.sgml
src/backend/access/transam/xlog.c
src/backend/utils/misc/guc.c
src/backend/utils/misc/postgresql.conf.sample
src/include/access/xlog.h

index d383de2512851a75165e2fd30b7212b0ea4b0d1c..2166b99fc4eeb23605da6b8e9c20406818b5771d 100644 (file)
@@ -3590,6 +3590,41 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"'  # Windows
        </listitem>
       </varlistentry>
 
+     <varlistentry id="guc-wal-init-zero" xreflabel="wal_init_zero">
+      <term><varname>wal_init_zero</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>wal_init_zero</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        If set to <literal>on</literal> (the default), this option causes new
+        WAL files to be filled with zeroes.  On some filesystems, this ensures
+        that space is allocated before we need to write WAL records.  However,
+        <firstterm>Copy-On-Write</firstterm> (COW) filesystems may not benefit
+        from this technique, so the option is given to skip the unnecessary
+        work.  If set to <literal>off</literal>, only the final byte is written
+        when the file is created so that it has the expected size.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry id="guc-wal-recycle" xreflabel="wal_recycle">
+      <term><varname>wal_recycle</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>wal_recycle</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        If set to <literal>on</literal> (the default), this option causes WAL
+        files to be recycled by renaming them, avoiding the need to create new
+        ones.  On COW filesystems, it may be faster to create new ones, so the
+        option is given to disable this behavior.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-wal-sender-timeout" xreflabel="wal_sender_timeout">
       <term><varname>wal_sender_timeout</varname> (<type>integer</type>)
       <indexterm>
index a181e33dd4d27c981cc12d370c8059c8573b4fcf..c6ca96079c1f9704a330871743a53a3dfa8348c0 100644 (file)
@@ -95,6 +95,8 @@ bool          wal_log_hints = false;
 bool           wal_compression = false;
 char      *wal_consistency_checking_string = NULL;
 bool      *wal_consistency_checking = NULL;
+bool           wal_init_zero = true;
+bool           wal_recycle = true;
 bool           log_checkpoints = false;
 int                    sync_method = DEFAULT_SYNC_METHOD;
 int                    wal_level = WAL_LEVEL_MINIMAL;
@@ -3209,6 +3211,7 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
        XLogSegNo       max_segno;
        int                     fd;
        int                     nbytes;
+       int                     save_errno;
 
        XLogFilePath(path, ThisTimeLineID, logsegno, wal_segment_size);
 
@@ -3248,39 +3251,61 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
                                (errcode_for_file_access(),
                                 errmsg("could not create file \"%s\": %m", tmppath)));
 
-       /*
-        * Zero-fill the file.  We have to do this the hard way to ensure that all
-        * the file space has really been allocated --- on platforms that allow
-        * "holes" in files, just seeking to the end doesn't allocate intermediate
-        * space.  This way, we know that we have all the space and (after the
-        * fsync below) that all the indirect blocks are down on disk.  Therefore,
-        * fdatasync(2) or O_DSYNC will be sufficient to sync future writes to the
-        * log file.
-        */
        memset(zbuffer.data, 0, XLOG_BLCKSZ);
-       for (nbytes = 0; nbytes < wal_segment_size; nbytes += XLOG_BLCKSZ)
+
+       pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE);
+       save_errno = 0;
+       if (wal_init_zero)
        {
+               /*
+                * Zero-fill the file.  With this setting, we do this the hard way to
+                * ensure that all the file space has really been allocated.  On
+                * platforms that allow "holes" in files, just seeking to the end
+                * doesn't allocate intermediate space.  This way, we know that we
+                * have all the space and (after the fsync below) that all the
+                * indirect blocks are down on disk.  Therefore, fdatasync(2) or
+                * O_DSYNC will be sufficient to sync future writes to the log file.
+                */
+               for (nbytes = 0; nbytes < wal_segment_size; nbytes += XLOG_BLCKSZ)
+               {
+                       errno = 0;
+                       if (write(fd, zbuffer.data, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+                       {
+                               /* if write didn't set errno, assume no disk space */
+                               save_errno = errno ? errno : ENOSPC;
+                               break;
+                       }
+               }
+       }
+       else
+       {
+               /*
+                * Otherwise, seeking to the end and writing a solitary byte is
+                * enough.
+                */
                errno = 0;
-               pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE);
-               if ((int) write(fd, zbuffer.data, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ)
+               if (pg_pwrite(fd, zbuffer.data, 1, wal_segment_size - 1) != 1)
                {
-                       int                     save_errno = errno;
+                       /* if write didn't set errno, assume no disk space */
+                       save_errno = errno ? errno : ENOSPC;
+               }
+       }
+       pgstat_report_wait_end();
 
-                       /*
-                        * If we fail to make the file, delete it to release disk space
-                        */
-                       unlink(tmppath);
+       if (save_errno)
+       {
+               /*
+                * If we fail to make the file, delete it to release disk space
+                */
+               unlink(tmppath);
 
-                       close(fd);
+               close(fd);
 
-                       /* if write didn't set errno, assume problem is no disk space */
-                       errno = save_errno ? save_errno : ENOSPC;
+               errno = save_errno;
 
-                       ereport(ERROR,
-                                       (errcode_for_file_access(),
-                                        errmsg("could not write to file \"%s\": %m", tmppath)));
-               }
-               pgstat_report_wait_end();
+               ereport(ERROR,
+                               (errcode_for_file_access(),
+                                errmsg("could not write to file \"%s\": %m", tmppath)));
        }
 
        pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_SYNC);
@@ -4049,14 +4074,19 @@ RemoveXlogFile(const char *segname, XLogRecPtr RedoRecPtr, XLogRecPtr endptr)
        XLogSegNo       endlogSegNo;
        XLogSegNo       recycleSegNo;
 
-       /*
-        * Initialize info about where to try to recycle to.
-        */
-       XLByteToSeg(endptr, endlogSegNo, wal_segment_size);
-       if (RedoRecPtr == InvalidXLogRecPtr)
-               recycleSegNo = endlogSegNo + 10;
+       if (wal_recycle)
+       {
+               /*
+                * Initialize info about where to try to recycle to.
+                */
+               XLByteToSeg(endptr, endlogSegNo, wal_segment_size);
+               if (RedoRecPtr == InvalidXLogRecPtr)
+                       recycleSegNo = endlogSegNo + 10;
+               else
+                       recycleSegNo = XLOGfileslop(RedoRecPtr);
+       }
        else
-               recycleSegNo = XLOGfileslop(RedoRecPtr);
+               recycleSegNo = 0;               /* keep compiler quiet */
 
        snprintf(path, MAXPGPATH, XLOGDIR "/%s", segname);
 
@@ -4065,7 +4095,8 @@ RemoveXlogFile(const char *segname, XLogRecPtr RedoRecPtr, XLogRecPtr endptr)
         * segment. Only recycle normal files, pg_standby for example can create
         * symbolic links pointing to a separate archive directory.
         */
-       if (endlogSegNo <= recycleSegNo &&
+       if (wal_recycle &&
+               endlogSegNo <= recycleSegNo &&
                lstat(path, &statbuf) == 0 && S_ISREG(statbuf.st_mode) &&
                InstallXLogFileSegment(&endlogSegNo, path,
                                                           true, recycleSegNo, true))
index aa564d153a53b255a3ca5ebe1c98e32a455576d1..cd5a65be75b58778aa6e53863501170e036a991e 100644 (file)
@@ -1174,6 +1174,26 @@ static struct config_bool ConfigureNamesBool[] =
                NULL, NULL, NULL
        },
 
+       {
+               {"wal_init_zero", PGC_SUSET, WAL_SETTINGS,
+                       gettext_noop("Writes zeroes to new WAL files before first use."),
+                       NULL
+               },
+               &wal_init_zero,
+               true,
+               NULL, NULL, NULL
+       },
+
+       {
+               {"wal_recycle", PGC_SUSET, WAL_SETTINGS,
+                       gettext_noop("Recycles WAL files by renaming them."),
+                       NULL
+               },
+               &wal_recycle,
+               true,
+               NULL, NULL, NULL
+       },
+
        {
                {"log_checkpoints", PGC_SIGHUP, LOGGING_WHAT,
                        gettext_noop("Logs each checkpoint."),
index cccb5f145a24fe0dbadda5de693debdfecb35c42..9b15361403ebdf08cda98e8da9e72e6e5a0e5fc1 100644 (file)
 #wal_compression = off                 # enable compression of full-page writes
 #wal_log_hints = off                   # also do full page writes of non-critical updates
                                        # (change requires restart)
+#wal_init_zero = on                    # zero-fill new WAL files
+#wal_recycle = on                      # recycle WAL files
 #wal_buffers = -1                      # min 32kB, -1 sets based on shared_buffers
                                        # (change requires restart)
 #wal_writer_delay = 200ms              # 1-10000 milliseconds
index eb6c44649dc821c790bdf34dd82193bc309c7ea3..2af938bfdcb7ff2acd88751c53f505580d0aed6e 100644 (file)
@@ -116,6 +116,8 @@ extern bool EnableHotStandby;
 extern bool fullPageWrites;
 extern bool wal_log_hints;
 extern bool wal_compression;
+extern bool wal_init_zero;
+extern bool wal_recycle;
 extern bool *wal_consistency_checking;
 extern char *wal_consistency_checking_string;
 extern bool log_checkpoints;