Increase distance between flush requests during bulk file copies.

author Tom Lane <tgl@sss.pgh.pa.us>

Sun, 8 Oct 2017 19:25:26 +0000 (15:25 -0400)

committer Tom Lane <tgl@sss.pgh.pa.us>

Sun, 8 Oct 2017 19:25:26 +0000 (15:25 -0400)
author Tom Lane <tgl@sss.pgh.pa.us>
Sun, 8 Oct 2017 19:25:26 +0000 (15:25 -0400)
committer Tom Lane <tgl@sss.pgh.pa.us>
Sun, 8 Oct 2017 19:25:26 +0000 (15:25 -0400)
diff --git a/src/backend/storage/file/copydir.c b/src/backend/storage/file/copydir.c

index a964e47f5021d7d103a720ef692c67b9c41da499..ec467d9a86dce3917d7a217c6ecc9abc61ecac5b 100644 (file)
--- a/src/backend/storage/file/copydir.c
+++ b/src/backend/storage/file/copydir.c
@@ -139,10 +139,24 @@ copy_file(char *fromfile, char *tofile)
         int                     dstfd;
         int                     nbytes;
         off_t           offset;
+       off_t           flush_offset;
  
-       /* Use palloc to ensure we get a maxaligned buffer */
+       /* Size of copy buffer (read and write requests) */
  #define COPY_BUF_SIZE (8 * BLCKSZ)
  
+       /*
+        * Size of data flush requests.  It seems beneficial on most platforms to
+        * do this every 1MB or so.  But macOS, at least with early releases of
+        * APFS, is really unfriendly to small mmap/msync requests, so there do it
+        * only every 32MB.
+        */
+#if defined(__darwin__)
+#define FLUSH_DISTANCE (32 * 1024 * 1024)
+#else
+#define FLUSH_DISTANCE (1024 * 1024)
+#endif
+
+       /* Use palloc to ensure we get a maxaligned buffer */
         buffer = palloc(COPY_BUF_SIZE);
  
         /*
@@ -164,11 +178,23 @@ copy_file(char *fromfile, char *tofile)
         /*
          * Do the data copying.
          */
+       flush_offset = 0;
         for (offset = 0;; offset += nbytes)
         {
                 /* If we got a cancel signal during the copy of the file, quit */
                 CHECK_FOR_INTERRUPTS();
  
+               /*
+                * We fsync the files later, but during the copy, flush them every so
+                * often to avoid spamming the cache and hopefully get the kernel to
+                * start writing them out before the fsync comes.
+                */
+               if (offset - flush_offset >= FLUSH_DISTANCE)
+               {
+                       pg_flush_data(dstfd, flush_offset, offset - flush_offset);
+                       flush_offset = offset;
+               }
+
                 nbytes = read(srcfd, buffer, COPY_BUF_SIZE);
                 if (nbytes < 0)
                         ereport(ERROR,
@@ -186,15 +212,11 @@ copy_file(char *fromfile, char *tofile)
                                         (errcode_for_file_access(),
                                          errmsg("could not write to file \"%s\": %m", tofile)));
                 }
-
-               /*
-                * We fsync the files later but first flush them to avoid spamming the
-                * cache and hopefully get the kernel to start writing them out before
-                * the fsync comes.
-                */
-               pg_flush_data(dstfd, offset, nbytes);
         }
  
+       if (offset > flush_offset)
+               pg_flush_data(dstfd, flush_offset, offset - flush_offset);
+
         if (CloseTransientFile(dstfd))
                 ereport(ERROR,
                                 (errcode_for_file_access(),
author	Tom Lane <tgl@sss.pgh.pa.us>
	Sun, 8 Oct 2017 19:25:26 +0000 (15:25 -0400)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Sun, 8 Oct 2017 19:25:26 +0000 (15:25 -0400)