]> granicus.if.org Git - postgresql/commitdiff
pg_upgrade: Allow use of file cloning
authorPeter Eisentraut <peter_e@gmx.net>
Wed, 7 Nov 2018 17:05:54 +0000 (18:05 +0100)
committerPeter Eisentraut <peter_e@gmx.net>
Wed, 7 Nov 2018 17:35:20 +0000 (18:35 +0100)
Add another transfer mode --clone to pg_upgrade (besides the existing
--link and the default copy), using special file cloning calls.  This
makes the file transfer faster and more space efficient, achieving
speed similar to --link mode without the associated drawbacks.

On Linux, file cloning is supported on Btrfs and XFS (if formatted with
reflink support).  On macOS, file cloning is supported on APFS.

Reviewed-by: Michael Paquier <michael@paquier.xyz>
configure
configure.in
doc/src/sgml/ref/pgupgrade.sgml
src/bin/pg_upgrade/check.c
src/bin/pg_upgrade/file.c
src/bin/pg_upgrade/option.c
src/bin/pg_upgrade/pg_upgrade.h
src/bin/pg_upgrade/relfilenode.c
src/include/pg_config.h.in

index a600fdfac3814a66421a92f086cd4cfc5d7b869d..c76650aaf4ed0015599fcc5c7486f22b705f0ec8 100755 (executable)
--- a/configure
+++ b/configure
@@ -15130,7 +15130,7 @@ fi
 LIBS_including_readline="$LIBS"
 LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
 
-for ac_func in cbrt clock_gettime fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll posix_fallocate ppoll pstat pthread_is_threaded_np readlink setproctitle setproctitle_fast setsid shm_open strchrnul symlink sync_file_range utime utimes wcstombs_l
+for ac_func in cbrt clock_gettime copyfile fdatasync getifaddrs getpeerucred getrlimit mbstowcs_l memmove poll posix_fallocate ppoll pstat pthread_is_threaded_np readlink setproctitle setproctitle_fast setsid shm_open strchrnul symlink sync_file_range utime utimes wcstombs_l
 do :
   as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
 ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
index 75dbe8e734bd96679a0a176da045e0d0f1d648cd..8b0b6d1e43f9b84a498a1764ed7215d827b1b244 100644 (file)
@@ -1602,6 +1602,7 @@ LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
 AC_CHECK_FUNCS(m4_normalize([
        cbrt
        clock_gettime
+       copyfile
        fdatasync
        getifaddrs
        getpeerucred
index d51146d641d81733a977975346d2005002eef0f6..2d722b2e792451a0b18669a460a7a83b8d728451 100644 (file)
       <listitem><para>display version information, then exit</para></listitem>
      </varlistentry>
 
+     <varlistentry>
+      <term><option>--clone</option></term>
+      <listitem>
+       <para>
+        Use efficient file cloning (also known as <quote>reflinks</quote> on
+        some systems) instead of copying files to the new cluster.  This can
+        result in near-instantaneous copying of the data files, giving the
+        speed advantages of <option>-k</option>/<option>--link</option> while
+        leaving the old cluster untouched.
+       </para>
+
+       <para>
+        File cloning is only supported on some operating systems and file
+        systems.  If it is selected but not supported, the
+        <application>pg_upgrade</application> run will error.  At present, it
+        is supported on Linux (kernel 4.5 or later) with Btrfs and XFS (on
+        file systems created with reflink support, which is not the default
+        for XFS at this writing), and on macOS with APFS.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><option>-?</option></term>
       <term><option>--help</option></term>
@@ -340,7 +362,7 @@ NET STOP postgresql-&majorversion;
      Always run the <application>pg_upgrade</application> binary of the new server, not the old one.
      <application>pg_upgrade</application> requires the specification of the old and new cluster's
      data and executable (<filename>bin</filename>) directories. You can also specify
-     user and port values, and whether you want the data files linked
+     user and port values, and whether you want the data files linked or cloned
      instead of the default copy behavior.
     </para>
 
@@ -351,8 +373,12 @@ NET STOP postgresql-&majorversion;
      once you start the new cluster after the upgrade.  Link mode also
      requires that the old and new cluster data directories be in the
      same file system.  (Tablespaces and <filename>pg_wal</filename> can be on
-     different file systems.)  See <literal>pg_upgrade --help</literal> for a full
-     list of options.
+     different file systems.)
+     The clone mode provides the same speed and disk space advantages but will
+     not leave the old cluster unusable after the upgrade.  The clone mode
+     also requires that the old and new data directories be in the same file
+     system.  The clone mode is only available on certain operating systems
+     and file systems.
     </para>
 
     <para>
@@ -388,8 +414,9 @@ pg_upgrade.exe
      to perform only the checks, even if the old server is still
      running. <command>pg_upgrade --check</command> will also outline any
      manual adjustments you will need to make after the upgrade.  If you
-     are going to be using link mode, you should use the <option>--link</option>
-     option with <option>--check</option> to enable link-mode-specific checks.
+     are going to be using link or clone mode, you should use the option
+     <option>--link</option> or <option>--clone</option> with
+     <option>--check</option> to enable mode-specific checks.
      <command>pg_upgrade</command> requires write permission in the current directory.
     </para>
 
@@ -722,7 +749,8 @@ psql --username=postgres --file=script.sql postgres
 
   <para>
    If you want to use link mode and you do not want your old cluster
-   to be modified when the new cluster is started, make a copy of the
+   to be modified when the new cluster is started, consider using the clone mode.
+   If that is not available, make a copy of the
    old cluster and upgrade that in link mode. To make a valid copy
    of the old cluster, use <command>rsync</command> to create a dirty
    copy of the old cluster while the server is running, then shut down
index 5a78d603dc92c42c81601833cd09c12b02e475ca..555e5dcbba24bb377ec1f158d7c6a6809b4b6aa2 100644 (file)
@@ -149,8 +149,17 @@ check_new_cluster(void)
 
        check_loadable_libraries();
 
-       if (user_opts.transfer_mode == TRANSFER_MODE_LINK)
-               check_hard_link();
+       switch (user_opts.transfer_mode)
+       {
+               case TRANSFER_MODE_CLONE:
+                       check_file_clone();
+                       break;
+               case TRANSFER_MODE_COPY:
+                       break;
+               case TRANSFER_MODE_LINK:
+                       check_hard_link();
+                       break;
+       }
 
        check_is_install_user(&new_cluster);
 
index c27cc93dc2e8a66bf00d8908c0864e915838bc7f..244dd4d88b5973b71373cc01fb3b8f1e71337b49 100644 (file)
 
 #include <sys/stat.h>
 #include <fcntl.h>
+#ifdef HAVE_COPYFILE
+#include <copyfile.h>
+#endif
+#ifdef __linux__
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+#endif
 
 
 #ifdef WIN32
@@ -25,6 +32,47 @@ static int   win32_pghardlink(const char *src, const char *dst);
 #endif
 
 
+/*
+ * cloneFile()
+ *
+ * Clones/reflinks a relation file from src to dst.
+ *
+ * schemaName/relName are relation's SQL name (used for error messages only).
+ */
+void
+cloneFile(const char *src, const char *dst,
+                 const char *schemaName, const char *relName)
+{
+#if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
+       if (copyfile(src, dst, NULL, COPYFILE_CLONE_FORCE) < 0)
+               pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
+                                schemaName, relName, src, dst, strerror(errno));
+#elif defined(__linux__) && defined(FICLONE)
+       int                     src_fd;
+       int                     dest_fd;
+
+       if ((src_fd = open(src, O_RDONLY | PG_BINARY, 0)) < 0)
+               pg_fatal("error while cloning relation \"%s.%s\": could not open file \"%s\": %s\n",
+                                schemaName, relName, src, strerror(errno));
+
+       if ((dest_fd = open(dst, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
+                                               pg_file_create_mode)) < 0)
+               pg_fatal("error while cloning relation \"%s.%s\": could not create file \"%s\": %s\n",
+                                schemaName, relName, dst, strerror(errno));
+
+       if (ioctl(dest_fd, FICLONE, src_fd) < 0)
+       {
+               unlink(dst);
+               pg_fatal("error while cloning relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
+                                schemaName, relName, src, dst, strerror(errno));
+       }
+
+       close(src_fd);
+       close(dest_fd);
+#endif
+}
+
+
 /*
  * copyFile()
  *
@@ -270,6 +318,48 @@ rewriteVisibilityMap(const char *fromfile, const char *tofile,
        close(src_fd);
 }
 
+void
+check_file_clone(void)
+{
+       char            existing_file[MAXPGPATH];
+       char            new_link_file[MAXPGPATH];
+
+       snprintf(existing_file, sizeof(existing_file), "%s/PG_VERSION", old_cluster.pgdata);
+       snprintf(new_link_file, sizeof(new_link_file), "%s/PG_VERSION.clonetest", new_cluster.pgdata);
+       unlink(new_link_file);          /* might fail */
+
+#if defined(HAVE_COPYFILE) && defined(COPYFILE_CLONE_FORCE)
+       if (copyfile(existing_file, new_link_file, NULL, COPYFILE_CLONE_FORCE) < 0)
+               pg_fatal("could not clone file between old and new data directories: %s\n",
+                                strerror(errno));
+#elif defined(__linux__) && defined(FICLONE)
+       {
+               int                     src_fd;
+               int                     dest_fd;
+
+               if ((src_fd = open(existing_file, O_RDONLY | PG_BINARY, 0)) < 0)
+                       pg_fatal("could not open file \"%s\": %s\n",
+                                        existing_file, strerror(errno));
+
+               if ((dest_fd = open(new_link_file, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
+                                                       pg_file_create_mode)) < 0)
+                       pg_fatal("could not create file \"%s\": %s\n",
+                                        new_link_file, strerror(errno));
+
+               if (ioctl(dest_fd, FICLONE, src_fd) < 0)
+                       pg_fatal("could not clone file between old and new data directories: %s\n",
+                                        strerror(errno));
+
+               close(src_fd);
+               close(dest_fd);
+       }
+#else
+       pg_fatal("file cloning not supported on this platform\n");
+#endif
+
+       unlink(new_link_file);
+}
+
 void
 check_hard_link(void)
 {
index 9dbc9225a64c7d0fa941943e5fd69ff43f367924..7cc92f2dcb39f5d2d81bd74e0f4f2acd1ee4aada 100644 (file)
@@ -53,6 +53,8 @@ parseCommandLine(int argc, char *argv[])
                {"retain", no_argument, NULL, 'r'},
                {"jobs", required_argument, NULL, 'j'},
                {"verbose", no_argument, NULL, 'v'},
+               {"clone", no_argument, NULL, 1},
+
                {NULL, 0, NULL, 0}
        };
        int                     option;                 /* Command line option */
@@ -203,6 +205,10 @@ parseCommandLine(int argc, char *argv[])
                                log_opts.verbose = true;
                                break;
 
+                       case 1:
+                               user_opts.transfer_mode = TRANSFER_MODE_CLONE;
+                               break;
+
                        default:
                                pg_fatal("Try \"%s --help\" for more information.\n",
                                                 os_info.progname);
@@ -293,6 +299,7 @@ usage(void)
        printf(_("  -U, --username=NAME           cluster superuser (default \"%s\")\n"), os_info.user);
        printf(_("  -v, --verbose                 enable verbose internal logging\n"));
        printf(_("  -V, --version                 display version information, then exit\n"));
+       printf(_("  --clone                       clone instead of copying files to new cluster\n"));
        printf(_("  -?, --help                    show this help, then exit\n"));
        printf(_("\n"
                         "Before running pg_upgrade you must:\n"
index f83a3eeb6748002f51b40829c420faf18dd64dcb..51bd211d46a060401c5df7b734f5602eb8c39215 100644 (file)
@@ -230,10 +230,11 @@ typedef struct
 } ControlData;
 
 /*
- * Enumeration to denote link modes
+ * Enumeration to denote transfer modes
  */
 typedef enum
 {
+       TRANSFER_MODE_CLONE,
        TRANSFER_MODE_COPY,
        TRANSFER_MODE_LINK
 } transferMode;
@@ -372,12 +373,15 @@ bool              pid_lock_file_exists(const char *datadir);
 
 /* file.c */
 
+void cloneFile(const char *src, const char *dst,
+                const char *schemaName, const char *relName);
 void copyFile(const char *src, const char *dst,
                 const char *schemaName, const char *relName);
 void linkFile(const char *src, const char *dst,
                 const char *schemaName, const char *relName);
 void rewriteVisibilityMap(const char *fromfile, const char *tofile,
                                         const char *schemaName, const char *relName);
+void           check_file_clone(void);
 void           check_hard_link(void);
 
 /* fopen_priv() is no longer different from fopen() */
index ed604f26ca705299249b5860c11406c3ce827a9d..3b16c92a027df3db9cd3620441991dd8cc363efb 100644 (file)
@@ -30,10 +30,18 @@ void
 transfer_all_new_tablespaces(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr,
                                                         char *old_pgdata, char *new_pgdata)
 {
-       if (user_opts.transfer_mode == TRANSFER_MODE_LINK)
-               pg_log(PG_REPORT, "Linking user relation files\n");
-       else
-               pg_log(PG_REPORT, "Copying user relation files\n");
+       switch (user_opts.transfer_mode)
+       {
+               case TRANSFER_MODE_CLONE:
+                       pg_log(PG_REPORT, "Cloning user relation files\n");
+                       break;
+               case TRANSFER_MODE_COPY:
+                       pg_log(PG_REPORT, "Copying user relation files\n");
+                       break;
+               case TRANSFER_MODE_LINK:
+                       pg_log(PG_REPORT, "Linking user relation files\n");
+                       break;
+       }
 
        /*
         * Transferring files by tablespace is tricky because a single database
@@ -250,17 +258,23 @@ transfer_relfile(FileNameMap *map, const char *type_suffix, bool vm_must_add_fro
                                   old_file, new_file);
                        rewriteVisibilityMap(old_file, new_file, map->nspname, map->relname);
                }
-               else if (user_opts.transfer_mode == TRANSFER_MODE_COPY)
-               {
-                       pg_log(PG_VERBOSE, "copying \"%s\" to \"%s\"\n",
-                                  old_file, new_file);
-                       copyFile(old_file, new_file, map->nspname, map->relname);
-               }
                else
-               {
-                       pg_log(PG_VERBOSE, "linking \"%s\" to \"%s\"\n",
-                                  old_file, new_file);
-                       linkFile(old_file, new_file, map->nspname, map->relname);
-               }
+                       switch (user_opts.transfer_mode)
+                       {
+                               case TRANSFER_MODE_CLONE:
+                                       pg_log(PG_VERBOSE, "cloning \"%s\" to \"%s\"\n",
+                                                  old_file, new_file);
+                                       cloneFile(old_file, new_file, map->nspname, map->relname);
+                                       break;
+                               case TRANSFER_MODE_COPY:
+                                       pg_log(PG_VERBOSE, "copying \"%s\" to \"%s\"\n",
+                                                  old_file, new_file);
+                                       copyFile(old_file, new_file, map->nspname, map->relname);
+                                       break;
+                               case TRANSFER_MODE_LINK:
+                                       pg_log(PG_VERBOSE, "linking \"%s\" to \"%s\"\n",
+                                                  old_file, new_file);
+                                       linkFile(old_file, new_file, map->nspname, map->relname);
+                       }
        }
 }
index 5a996e75572203de7b73d240c3a8937631553b0c..b1f709358e4c819f15dc540e3ea7fe9e30cf54ac 100644 (file)
 /* Define to 1 if your compiler handles computed gotos. */
 #undef HAVE_COMPUTED_GOTO
 
+/* Define to 1 if you have the `copyfile' function. */
+#undef HAVE_COPYFILE
+
 /* Define to 1 if you have the <crtdefs.h> header file. */
 #undef HAVE_CRTDEFS_H