]> granicus.if.org Git - zfs/commitdiff
OpenZFS 8115 - parallel zfs mount
authorSebastien Roy <seb@delphix.com>
Mon, 5 Nov 2018 15:40:05 +0000 (08:40 -0700)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Thu, 15 Nov 2018 19:33:58 +0000 (11:33 -0800)
Porting Notes:
* Use thread pools (tpool) API instead of introducing taskq interfaces
  to libzfs.
* Use pthread_mutext for locks as mutex_t isn't available.
* Ignore alternative libshare initialization since OpenZFS-7955 is
  not present on zfsonlinux.

Authored by: Sebastien Roy <seb@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com>
Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Paul Dagnelie <pcd@delphix.com>
Reviewed by: Prashanth Sreenivasa <pks@delphix.com>
Authored by: Brian Behlendorf <behlendorf1@llnl.gov>
Approved by: Matt Ahrens <mahrens@delphix.com>
Ported-by: Don Brady <don.brady@delphix.com>
OpenZFS-issue: https://www.illumos.org/issues/8115
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/a3f0e2b569
Closes #8092

cmd/zfs/zfs_main.c
include/libzfs.h
include/libzfs_impl.h
lib/libzfs/libzfs_dataset.c
lib/libzfs/libzfs_mount.c
tests/runfiles/linux.run
tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile.am
tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib
tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_fail.ksh [new file with mode: 0755]
tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_mountpoints.ksh [new file with mode: 0755]

index 6e0a6d5bc216c5675fa179e18822ec9a41fe321a..2b7fe93038b37f44bb7be2ca552ab370cd00846c 100644 (file)
@@ -6059,7 +6059,12 @@ zfs_do_holds(int argc, char **argv)
 
 #define        CHECK_SPINNER 30
 #define        SPINNER_TIME 3          /* seconds */
-#define        MOUNT_TIME 5            /* seconds */
+#define        MOUNT_TIME 1            /* seconds */
+
+typedef struct get_all_state {
+       boolean_t       ga_verbose;
+       get_all_cb_t    *ga_cbp;
+} get_all_state_t;
 
 static int
 get_one_dataset(zfs_handle_t *zhp, void *data)
@@ -6068,10 +6073,10 @@ get_one_dataset(zfs_handle_t *zhp, void *data)
        static int spinval = 0;
        static int spincheck = 0;
        static time_t last_spin_time = (time_t)0;
-       get_all_cb_t *cbp = data;
+       get_all_state_t *state = data;
        zfs_type_t type = zfs_get_type(zhp);
 
-       if (cbp->cb_verbose) {
+       if (state->ga_verbose) {
                if (--spincheck < 0) {
                        time_t now = time(NULL);
                        if (last_spin_time + SPINNER_TIME < now) {
@@ -6097,25 +6102,23 @@ get_one_dataset(zfs_handle_t *zhp, void *data)
                zfs_close(zhp);
                return (0);
        }
-       libzfs_add_handle(cbp, zhp);
-       assert(cbp->cb_used <= cbp->cb_alloc);
+       libzfs_add_handle(state->ga_cbp, zhp);
+       assert(state->ga_cbp->cb_used <= state->ga_cbp->cb_alloc);
 
        return (0);
 }
 
 static void
-get_all_datasets(zfs_handle_t ***dslist, size_t *count, boolean_t verbose)
+get_all_datasets(get_all_cb_t *cbp, boolean_t verbose)
 {
-       get_all_cb_t cb = { 0 };
-       cb.cb_verbose = verbose;
-       cb.cb_getone = get_one_dataset;
+       get_all_state_t state = {
+           .ga_verbose = verbose,
+           .ga_cbp = cbp
+       };
 
        if (verbose)
                set_progress_header(gettext("Reading ZFS config"));
-       (void) zfs_iter_root(g_zfs, get_one_dataset, &cb);
-
-       *dslist = cb.cb_handles;
-       *count = cb.cb_used;
+       (void) zfs_iter_root(g_zfs, get_one_dataset, &state);
 
        if (verbose)
                finish_progress(gettext("done."));
@@ -6126,8 +6129,19 @@ get_all_datasets(zfs_handle_t ***dslist, size_t *count, boolean_t verbose)
  * similar, we have a common function with an extra parameter to determine which
  * mode we are using.
  */
-#define        OP_SHARE        0x1
-#define        OP_MOUNT        0x2
+typedef enum { OP_SHARE, OP_MOUNT } share_mount_op_t;
+
+typedef struct share_mount_state {
+       share_mount_op_t        sm_op;
+       boolean_t       sm_verbose;
+       int     sm_flags;
+       char    *sm_options;
+       char    *sm_proto; /* only valid for OP_SHARE */
+       pthread_mutex_t sm_lock; /* protects the remaining fields */
+       uint_t  sm_total; /* number of filesystems to process */
+       uint_t  sm_done; /* number of filesystems processed */
+       int     sm_status; /* -1 if any of the share/mount operations failed */
+} share_mount_state_t;
 
 /*
  * Share or mount a dataset.
@@ -6385,6 +6399,29 @@ report_mount_progress(int current, int total)
                update_progress(info);
 }
 
+/*
+ * zfs_foreach_mountpoint() callback that mounts or shares one filesystem and
+ * updates the progress meter.
+ */
+static int
+share_mount_one_cb(zfs_handle_t *zhp, void *arg)
+{
+       share_mount_state_t *sms = arg;
+       int ret;
+
+       ret = share_mount_one(zhp, sms->sm_op, sms->sm_flags, sms->sm_proto,
+           B_FALSE, sms->sm_options);
+
+       pthread_mutex_lock(&sms->sm_lock);
+       if (ret != 0)
+               sms->sm_status = ret;
+       sms->sm_done++;
+       if (sms->sm_verbose)
+               report_mount_progress(sms->sm_done, sms->sm_total);
+       pthread_mutex_unlock(&sms->sm_lock);
+       return (ret);
+}
+
 static void
 append_options(char *mntopts, char *newopts)
 {
@@ -6459,8 +6496,6 @@ share_mount(int op, int argc, char **argv)
 
        /* check number of arguments */
        if (do_all) {
-               zfs_handle_t **dslist = NULL;
-               size_t i, count = 0;
                char *protocol = NULL;
 
                if (op == OP_SHARE && argc > 0) {
@@ -6481,27 +6516,35 @@ share_mount(int op, int argc, char **argv)
                }
 
                start_progress_timer();
-               get_all_datasets(&dslist, &count, verbose);
+               get_all_cb_t cb = { 0 };
+               get_all_datasets(&cb, verbose);
 
-               if (count == 0) {
+               if (cb.cb_used == 0) {
                        if (options != NULL)
                                free(options);
                        return (0);
                }
 
-               qsort(dslist, count, sizeof (void *), libzfs_dataset_cmp);
-
-               for (i = 0; i < count; i++) {
-                       if (verbose)
-                               report_mount_progress(i, count);
+               share_mount_state_t share_mount_state = { 0 };
+               share_mount_state.sm_op = op;
+               share_mount_state.sm_verbose = verbose;
+               share_mount_state.sm_flags = flags;
+               share_mount_state.sm_options = options;
+               share_mount_state.sm_proto = protocol;
+               share_mount_state.sm_total = cb.cb_used;
+               pthread_mutex_init(&share_mount_state.sm_lock, NULL);
 
-                       if (share_mount_one(dslist[i], op, flags, protocol,
-                           B_FALSE, options) != 0)
-                               ret = 1;
-                       zfs_close(dslist[i]);
-               }
+               /*
+                * libshare isn't mt-safe, so only do the operation in parallel
+                * if we're mounting.
+                */
+               zfs_foreach_mountpoint(g_zfs, cb.cb_handles, cb.cb_used,
+                   share_mount_one_cb, &share_mount_state, op == OP_MOUNT);
+               ret = share_mount_state.sm_status;
 
-               free(dslist);
+               for (int i = 0; i < cb.cb_used; i++)
+                       zfs_close(cb.cb_handles[i]);
+               free(cb.cb_handles);
        } else if (argc == 0) {
                struct mnttab entry;
 
index d34658055da33a2168cadd83a3df4a3afe6a5e7e..762d57bef4eba2a31361e0229e27ef8e7a5b7ae4 100644 (file)
@@ -573,12 +573,11 @@ typedef struct get_all_cb {
        zfs_handle_t    **cb_handles;
        size_t          cb_alloc;
        size_t          cb_used;
-       boolean_t       cb_verbose;
-       int             (*cb_getone)(zfs_handle_t *, void *);
 } get_all_cb_t;
 
+void zfs_foreach_mountpoint(libzfs_handle_t *, zfs_handle_t **, size_t,
+    zfs_iter_f, void *, boolean_t);
 void libzfs_add_handle(get_all_cb_t *, zfs_handle_t *);
-int libzfs_dataset_cmp(const void *, const void *);
 
 /*
  * Functions to create and destroy datasets.
index 568103f4be360710c44310f8b4f38389d8b8265b..9a46b9f12960a16594b9b6034934a40fe937790c 100644 (file)
@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
  * Copyright (c) 2018 Datto Inc.
  */
 
@@ -60,6 +60,13 @@ struct libzfs_handle {
        void *libzfs_sharehdl; /* libshare handle */
        uint_t libzfs_shareflags;
        boolean_t libzfs_mnttab_enable;
+       /*
+        * We need a lock to handle the case where parallel mount
+        * threads are populating the mnttab cache simultaneously. The
+        * lock only protects the integrity of the avl tree, and does
+        * not protect the contents of the mnttab entries themselves.
+        */
+       pthread_mutex_t libzfs_mnttab_cache_lock;
        avl_tree_t libzfs_mnttab_cache;
        int libzfs_pool_iter;
        char libzfs_chassis_id[256];
index e79a936f94e7a67d4aa791dc89290f64a2bcbd12..237933c371f0ed35b59dd1ea6748e7b2ed451a39 100644 (file)
@@ -791,6 +791,7 @@ libzfs_mnttab_cache_compare(const void *arg1, const void *arg2)
 void
 libzfs_mnttab_init(libzfs_handle_t *hdl)
 {
+       pthread_mutex_init(&hdl->libzfs_mnttab_cache_lock, NULL);
        assert(avl_numnodes(&hdl->libzfs_mnttab_cache) == 0);
        avl_create(&hdl->libzfs_mnttab_cache, libzfs_mnttab_cache_compare,
            sizeof (mnttab_node_t), offsetof(mnttab_node_t, mtn_node));
@@ -849,6 +850,7 @@ libzfs_mnttab_fini(libzfs_handle_t *hdl)
                free(mtn);
        }
        avl_destroy(&hdl->libzfs_mnttab_cache);
+       (void) pthread_mutex_destroy(&hdl->libzfs_mnttab_cache_lock);
 }
 
 void
@@ -863,7 +865,7 @@ libzfs_mnttab_find(libzfs_handle_t *hdl, const char *fsname,
 {
        mnttab_node_t find;
        mnttab_node_t *mtn;
-       int error;
+       int ret = ENOENT;
 
        if (!hdl->libzfs_mnttab_enable) {
                struct mnttab srch = { 0 };
@@ -883,17 +885,24 @@ libzfs_mnttab_find(libzfs_handle_t *hdl, const char *fsname,
                        return (ENOENT);
        }
 
-       if (avl_numnodes(&hdl->libzfs_mnttab_cache) == 0)
-               if ((error = libzfs_mnttab_update(hdl)) != 0)
+       pthread_mutex_lock(&hdl->libzfs_mnttab_cache_lock);
+       if (avl_numnodes(&hdl->libzfs_mnttab_cache) == 0) {
+               int error;
+
+               if ((error = libzfs_mnttab_update(hdl)) != 0) {
+                       pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock);
                        return (error);
+               }
+       }
 
        find.mtn_mt.mnt_special = (char *)fsname;
        mtn = avl_find(&hdl->libzfs_mnttab_cache, &find, NULL);
        if (mtn) {
                *entry = mtn->mtn_mt;
-               return (0);
+               ret = 0;
        }
-       return (ENOENT);
+       pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock);
+       return (ret);
 }
 
 void
@@ -902,14 +911,23 @@ libzfs_mnttab_add(libzfs_handle_t *hdl, const char *special,
 {
        mnttab_node_t *mtn;
 
-       if (avl_numnodes(&hdl->libzfs_mnttab_cache) == 0)
-               return;
-       mtn = zfs_alloc(hdl, sizeof (mnttab_node_t));
-       mtn->mtn_mt.mnt_special = zfs_strdup(hdl, special);
-       mtn->mtn_mt.mnt_mountp = zfs_strdup(hdl, mountp);
-       mtn->mtn_mt.mnt_fstype = zfs_strdup(hdl, MNTTYPE_ZFS);
-       mtn->mtn_mt.mnt_mntopts = zfs_strdup(hdl, mntopts);
-       avl_add(&hdl->libzfs_mnttab_cache, mtn);
+       pthread_mutex_lock(&hdl->libzfs_mnttab_cache_lock);
+       if (avl_numnodes(&hdl->libzfs_mnttab_cache) != 0) {
+               mtn = zfs_alloc(hdl, sizeof (mnttab_node_t));
+               mtn->mtn_mt.mnt_special = zfs_strdup(hdl, special);
+               mtn->mtn_mt.mnt_mountp = zfs_strdup(hdl, mountp);
+               mtn->mtn_mt.mnt_fstype = zfs_strdup(hdl, MNTTYPE_ZFS);
+               mtn->mtn_mt.mnt_mntopts = zfs_strdup(hdl, mntopts);
+               /*
+                * Another thread may have already added this entry
+                * via libzfs_mnttab_update. If so we should skip it.
+                */
+               if (avl_find(&hdl->libzfs_mnttab_cache, mtn, NULL) != NULL)
+                       free(mtn);
+               else
+                       avl_add(&hdl->libzfs_mnttab_cache, mtn);
+       }
+       pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock);
 }
 
 void
@@ -918,6 +936,7 @@ libzfs_mnttab_remove(libzfs_handle_t *hdl, const char *fsname)
        mnttab_node_t find;
        mnttab_node_t *ret;
 
+       pthread_mutex_lock(&hdl->libzfs_mnttab_cache_lock);
        find.mtn_mt.mnt_special = (char *)fsname;
        if ((ret = avl_find(&hdl->libzfs_mnttab_cache, (void *)&find, NULL))
            != NULL) {
@@ -928,6 +947,7 @@ libzfs_mnttab_remove(libzfs_handle_t *hdl, const char *fsname)
                free(ret->mtn_mt.mnt_mntopts);
                free(ret);
        }
+       pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock);
 }
 
 int
index 23e45d0d3ec5e0e9e740b34994872aac11461fc4..ef18bafab2ae0442eb20d2e32045fe4a3baf3798 100644 (file)
@@ -22,7 +22,7 @@
 /*
  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2014, 2017 by Delphix. All rights reserved.
  * Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
  * Copyright 2017 RackTop Systems.
  * Copyright (c) 2018 Datto Inc.
  * they are used by mount and unmount and when changing a filesystem's
  * mountpoint.
  *
- *     zfs_is_mounted()
- *     zfs_mount()
- *     zfs_unmount()
- *     zfs_unmountall()
+ *     zfs_is_mounted()
+ *     zfs_mount()
+ *     zfs_unmount()
+ *     zfs_unmountall()
  *
  * This file also contains the functions used to manage sharing filesystems via
  * NFS and iSCSI:
  *
- *     zfs_is_shared()
- *     zfs_share()
- *     zfs_unshare()
+ *     zfs_is_shared()
+ *     zfs_share()
+ *     zfs_unshare()
  *
- *     zfs_is_shared_nfs()
- *     zfs_is_shared_smb()
- *     zfs_share_proto()
- *     zfs_shareall();
- *     zfs_unshare_nfs()
- *     zfs_unshare_smb()
- *     zfs_unshareall_nfs()
+ *     zfs_is_shared_nfs()
+ *     zfs_is_shared_smb()
+ *     zfs_share_proto()
+ *     zfs_shareall();
+ *     zfs_unshare_nfs()
+ *     zfs_unshare_smb()
+ *     zfs_unshareall_nfs()
  *     zfs_unshareall_smb()
  *     zfs_unshareall()
  *     zfs_unshareall_bypath()
@@ -60,8 +60,8 @@
  * The following functions are available for pool consumers, and will
  * mount/unmount and share/unshare all datasets within pool:
  *
- *     zpool_enable_datasets()
- *     zpool_disable_datasets()
+ *     zpool_enable_datasets()
+ *     zpool_disable_datasets()
  */
 
 #include <dirent.h>
 #include <libzfs.h>
 
 #include "libzfs_impl.h"
+#include <thread_pool.h>
 
 #include <libshare.h>
 #include <sys/systeminfo.h>
 #define        MAXISALEN       257     /* based on sysinfo(2) man page */
 
+static int mount_tp_nthr = 512;        /* tpool threads for multi-threaded mounting */
+
+static void zfs_mount_task(void *);
 static int zfs_share_proto(zfs_handle_t *, zfs_share_proto_t *);
 zfs_share_type_t zfs_is_shared_proto(zfs_handle_t *, char **,
     zfs_share_proto_t);
@@ -1146,25 +1150,32 @@ remove_mountpoint(zfs_handle_t *zhp)
        }
 }
 
+/*
+ * Add the given zfs handle to the cb_handles array, dynamically reallocating
+ * the array if it is out of space.
+ */
 void
 libzfs_add_handle(get_all_cb_t *cbp, zfs_handle_t *zhp)
 {
        if (cbp->cb_alloc == cbp->cb_used) {
                size_t newsz;
-               void *ptr;
+               zfs_handle_t **newhandles;
 
-               newsz = cbp->cb_alloc ? cbp->cb_alloc * 2 : 64;
-               ptr = zfs_realloc(zhp->zfs_hdl,
-                   cbp->cb_handles, cbp->cb_alloc * sizeof (void *),
-                   newsz * sizeof (void *));
-               cbp->cb_handles = ptr;
+               newsz = cbp->cb_alloc != 0 ? cbp->cb_alloc * 2 : 64;
+               newhandles = zfs_realloc(zhp->zfs_hdl,
+                   cbp->cb_handles, cbp->cb_alloc * sizeof (zfs_handle_t *),
+                   newsz * sizeof (zfs_handle_t *));
+               cbp->cb_handles = newhandles;
                cbp->cb_alloc = newsz;
        }
        cbp->cb_handles[cbp->cb_used++] = zhp;
 }
 
+/*
+ * Recursive helper function used during file system enumeration
+ */
 static int
-mount_cb(zfs_handle_t *zhp, void *data)
+zfs_iter_cb(zfs_handle_t *zhp, void *data)
 {
        get_all_cb_t *cbp = data;
 
@@ -1196,112 +1207,351 @@ mount_cb(zfs_handle_t *zhp, void *data)
        }
 
        libzfs_add_handle(cbp, zhp);
-       if (zfs_iter_filesystems(zhp, mount_cb, cbp) != 0) {
+       if (zfs_iter_filesystems(zhp, zfs_iter_cb, cbp) != 0) {
                zfs_close(zhp);
                return (-1);
        }
        return (0);
 }
 
+/*
+ * Sort comparator that compares two mountpoint paths. We sort these paths so
+ * that subdirectories immediately follow their parents. This means that we
+ * effectively treat the '/' character as the lowest value non-nul char. An
+ * example sorted list using this comparator would look like:
+ *
+ * /foo
+ * /foo/bar
+ * /foo/bar/baz
+ * /foo/baz
+ * /foo.bar
+ *
+ * The mounting code depends on this ordering to deterministically iterate
+ * over filesystems in order to spawn parallel mount tasks.
+ */
 int
-libzfs_dataset_cmp(const void *a, const void *b)
+mountpoint_cmp(const void *arga, const void *argb)
 {
-       zfs_handle_t **za = (zfs_handle_t **)a;
-       zfs_handle_t **zb = (zfs_handle_t **)b;
+       zfs_handle_t *const *zap = arga;
+       zfs_handle_t *za = *zap;
+       zfs_handle_t *const *zbp = argb;
+       zfs_handle_t *zb = *zbp;
        char mounta[MAXPATHLEN];
        char mountb[MAXPATHLEN];
+       const char *a = mounta;
+       const char *b = mountb;
        boolean_t gota, gotb;
 
-       if ((gota = (zfs_get_type(*za) == ZFS_TYPE_FILESYSTEM)) != 0)
-               verify(zfs_prop_get(*za, ZFS_PROP_MOUNTPOINT, mounta,
+       gota = (zfs_get_type(za) == ZFS_TYPE_FILESYSTEM);
+       if (gota) {
+               verify(zfs_prop_get(za, ZFS_PROP_MOUNTPOINT, mounta,
                    sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0);
-       if ((gotb = (zfs_get_type(*zb) == ZFS_TYPE_FILESYSTEM)) != 0)
-               verify(zfs_prop_get(*zb, ZFS_PROP_MOUNTPOINT, mountb,
+       }
+       gotb = (zfs_get_type(zb) == ZFS_TYPE_FILESYSTEM);
+       if (gotb) {
+               verify(zfs_prop_get(zb, ZFS_PROP_MOUNTPOINT, mountb,
                    sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0);
+       }
 
-       if (gota && gotb)
-               return (strcmp(mounta, mountb));
+       if (gota && gotb) {
+               while (*a != '\0' && (*a == *b)) {
+                       a++;
+                       b++;
+               }
+               if (*a == *b)
+                       return (0);
+               if (*a == '\0')
+                       return (-1);
+               if (*b == '\0')
+                       return (1);
+               if (*a == '/')
+                       return (-1);
+               if (*b == '/')
+                       return (1);
+               return (*a < *b ? -1 : *a > *b);
+       }
 
        if (gota)
                return (-1);
        if (gotb)
                return (1);
 
-       return (strcmp(zfs_get_name(*za), zfs_get_name(*zb)));
+       /*
+        * If neither filesystem has a mountpoint, revert to sorting by
+        * dataset name.
+        */
+       return (strcmp(zfs_get_name(za), zfs_get_name(zb)));
 }
 
 /*
- * Mount and share all datasets within the given pool.  This assumes that no
- * datasets within the pool are currently mounted.  Because users can create
- * complicated nested hierarchies of mountpoints, we first gather all the
- * datasets and mountpoints within the pool, and sort them by mountpoint.  Once
- * we have the list of all filesystems, we iterate over them in order and mount
- * and/or share each one.
+ * Return true if path2 is a child of path1.
  */
-#pragma weak zpool_mount_datasets = zpool_enable_datasets
-int
-zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
+static boolean_t
+libzfs_path_contains(const char *path1, const char *path2)
 {
-       get_all_cb_t cb = { 0 };
-       libzfs_handle_t *hdl = zhp->zpool_hdl;
-       zfs_handle_t *zfsp;
-       int i, ret = -1;
-       int *good;
+       return (strstr(path2, path1) == path2 && path2[strlen(path1)] == '/');
+}
+
+/*
+ * Given a mountpoint specified by idx in the handles array, find the first
+ * non-descendent of that mountpoint and return its index. Descendant paths
+ * start with the parent's path. This function relies on the ordering
+ * enforced by mountpoint_cmp().
+ */
+static int
+non_descendant_idx(zfs_handle_t **handles, size_t num_handles, int idx)
+{
+       char parent[ZFS_MAXPROPLEN];
+       char child[ZFS_MAXPROPLEN];
+       int i;
+
+       verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, parent,
+           sizeof (parent), NULL, NULL, 0, B_FALSE) == 0);
+
+       for (i = idx + 1; i < num_handles; i++) {
+               verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT, child,
+                   sizeof (child), NULL, NULL, 0, B_FALSE) == 0);
+               if (!libzfs_path_contains(parent, child))
+                       break;
+       }
+       return (i);
+}
+
+typedef struct mnt_param {
+       libzfs_handle_t *mnt_hdl;
+       tpool_t         *mnt_tp;
+       zfs_handle_t    **mnt_zhps; /* filesystems to mount */
+       size_t          mnt_num_handles;
+       int             mnt_idx;        /* Index of selected entry to mount */
+       zfs_iter_f      mnt_func;
+       void            *mnt_data;
+} mnt_param_t;
+
+/*
+ * Allocate and populate the parameter struct for mount function, and
+ * schedule mounting of the entry selected by idx.
+ */
+static void
+zfs_dispatch_mount(libzfs_handle_t *hdl, zfs_handle_t **handles,
+    size_t num_handles, int idx, zfs_iter_f func, void *data, tpool_t *tp)
+{
+       mnt_param_t *mnt_param = zfs_alloc(hdl, sizeof (mnt_param_t));
+
+       mnt_param->mnt_hdl = hdl;
+       mnt_param->mnt_tp = tp;
+       mnt_param->mnt_zhps = handles;
+       mnt_param->mnt_num_handles = num_handles;
+       mnt_param->mnt_idx = idx;
+       mnt_param->mnt_func = func;
+       mnt_param->mnt_data = data;
+
+       (void) tpool_dispatch(tp, zfs_mount_task, (void*)mnt_param);
+}
+
+/*
+ * This is the structure used to keep state of mounting or sharing operations
+ * during a call to zpool_enable_datasets().
+ */
+typedef struct mount_state {
+       /*
+        * ms_mntstatus is set to -1 if any mount fails. While multiple threads
+        * could update this variable concurrently, no synchronization is
+        * needed as it's only ever set to -1.
+        */
+       int             ms_mntstatus;
+       int             ms_mntflags;
+       const char      *ms_mntopts;
+} mount_state_t;
+
+static int
+zfs_mount_one(zfs_handle_t *zhp, void *arg)
+{
+       mount_state_t *ms = arg;
+       int ret = 0;
 
        /*
-        * Gather all non-snap datasets within the pool.
+        * don't attempt to mount encrypted datasets with
+        * unloaded keys
         */
-       if ((zfsp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_DATASET)) == NULL)
-               goto out;
+       if (zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) ==
+           ZFS_KEYSTATUS_UNAVAILABLE)
+               return (0);
+
+       if (zfs_mount(zhp, ms->ms_mntopts, ms->ms_mntflags) != 0)
+               ret = ms->ms_mntstatus = -1;
+       return (ret);
+}
+
+static int
+zfs_share_one(zfs_handle_t *zhp, void *arg)
+{
+       mount_state_t *ms = arg;
+       int ret = 0;
+
+       if (zfs_share(zhp) != 0)
+               ret = ms->ms_mntstatus = -1;
+       return (ret);
+}
+
+/*
+ * Thread pool function to mount one file system. On completion, it finds and
+ * schedules its children to be mounted. This depends on the sorting done in
+ * zfs_foreach_mountpoint(). Note that the degenerate case (chain of entries
+ * each descending from the previous) will have no parallelism since we always
+ * have to wait for the parent to finish mounting before we can schedule
+ * its children.
+ */
+static void
+zfs_mount_task(void *arg)
+{
+       mnt_param_t *mp = arg;
+       int idx = mp->mnt_idx;
+       zfs_handle_t **handles = mp->mnt_zhps;
+       size_t num_handles = mp->mnt_num_handles;
+       char mountpoint[ZFS_MAXPROPLEN];
+
+       verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, mountpoint,
+           sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0);
+
+       if (mp->mnt_func(handles[idx], mp->mnt_data) != 0)
+               return;
 
-       libzfs_add_handle(&cb, zfsp);
-       if (zfs_iter_filesystems(zfsp, mount_cb, &cb) != 0)
-               goto out;
        /*
-        * Sort the datasets by mountpoint.
+        * We dispatch tasks to mount filesystems with mountpoints underneath
+        * this one. We do this by dispatching the next filesystem with a
+        * descendant mountpoint of the one we just mounted, then skip all of
+        * its descendants, dispatch the next descendant mountpoint, and so on.
+        * The non_descendant_idx() function skips over filesystems that are
+        * descendants of the filesystem we just dispatched.
         */
-       qsort(cb.cb_handles, cb.cb_used, sizeof (void *),
-           libzfs_dataset_cmp);
+       for (int i = idx + 1; i < num_handles;
+           i = non_descendant_idx(handles, num_handles, i)) {
+               char child[ZFS_MAXPROPLEN];
+               verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT,
+                   child, sizeof (child), NULL, NULL, 0, B_FALSE) == 0);
+
+               if (!libzfs_path_contains(mountpoint, child))
+                       break; /* not a descendant, return */
+               zfs_dispatch_mount(mp->mnt_hdl, handles, num_handles, i,
+                   mp->mnt_func, mp->mnt_data, mp->mnt_tp);
+       }
+       free(mp);
+}
 
+/*
+ * Issue the func callback for each ZFS handle contained in the handles
+ * array. This function is used to mount all datasets, and so this function
+ * guarantees that filesystems for parent mountpoints are called before their
+ * children. As such, before issuing any callbacks, we first sort the array
+ * of handles by mountpoint.
+ *
+ * Callbacks are issued in one of two ways:
+ *
+ * 1. Sequentially: If the parallel argument is B_FALSE or the ZFS_SERIAL_MOUNT
+ *    environment variable is set, then we issue callbacks sequentially.
+ *
+ * 2. In parallel: If the parallel argument is B_TRUE and the ZFS_SERIAL_MOUNT
+ *    environment variable is not set, then we use a tpool to dispatch threads
+ *    to mount filesystems in parallel. This function dispatches tasks to mount
+ *    the filesystems at the top-level mountpoints, and these tasks in turn
+ *    are responsible for recursively mounting filesystems in their children
+ *    mountpoints.
+ */
+void
+zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles,
+    size_t num_handles, zfs_iter_f func, void *data, boolean_t parallel)
+{
        /*
-        * And mount all the datasets, keeping track of which ones
-        * succeeded or failed.
+        * The ZFS_SERIAL_MOUNT environment variable is an undocumented
+        * variable that can be used as a convenience to do a/b comparison
+        * of serial vs. parallel mounting.
         */
-       if ((good = zfs_alloc(zhp->zpool_hdl,
-           cb.cb_used * sizeof (int))) == NULL)
-               goto out;
+       boolean_t serial_mount = !parallel ||
+           (getenv("ZFS_SERIAL_MOUNT") != NULL);
 
-       ret = 0;
-       for (i = 0; i < cb.cb_used; i++) {
-               /*
-                * don't attempt to mount encrypted datasets with
-                * unloaded keys
-                */
-               if (zfs_prop_get_int(cb.cb_handles[i], ZFS_PROP_KEYSTATUS) ==
-                   ZFS_KEYSTATUS_UNAVAILABLE)
-                       continue;
+       /*
+        * Sort the datasets by mountpoint. See mountpoint_cmp for details
+        * of how these are sorted.
+        */
+       qsort(handles, num_handles, sizeof (zfs_handle_t *), mountpoint_cmp);
 
-               if (zfs_mount(cb.cb_handles[i], mntopts, flags) != 0)
-                       ret = -1;
-               else
-                       good[i] = 1;
+       if (serial_mount) {
+               for (int i = 0; i < num_handles; i++) {
+                       func(handles[i], data);
+               }
+               return;
        }
 
        /*
-        * Then share all the ones that need to be shared. This needs
-        * to be a separate pass in order to avoid excessive reloading
-        * of the configuration. Good should never be NULL since
-        * zfs_alloc is supposed to exit if memory isn't available.
+        * Issue the callback function for each dataset using a parallel
+        * algorithm that uses a thread pool to manage threads.
+        */
+       tpool_t *tp = tpool_create(1, mount_tp_nthr, 0, NULL);
+
+       /*
+        * There may be multiple "top level" mountpoints outside of the pool's
+        * root mountpoint, e.g.: /foo /bar. Dispatch a mount task for each of
+        * these.
         */
-       for (i = 0; i < cb.cb_used; i++) {
-               if (good[i] && zfs_share(cb.cb_handles[i]) != 0)
-                       ret = -1;
+       for (int i = 0; i < num_handles;
+           i = non_descendant_idx(handles, num_handles, i)) {
+               zfs_dispatch_mount(hdl, handles, num_handles, i, func, data,
+                   tp);
        }
 
-       free(good);
+       tpool_wait(tp); /* wait for all scheduled mounts to complete */
+       tpool_destroy(tp);
+}
+
+/*
+ * Mount and share all datasets within the given pool.  This assumes that no
+ * datasets within the pool are currently mounted.
+ */
+#pragma weak zpool_mount_datasets = zpool_enable_datasets
+int
+zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags)
+{
+       get_all_cb_t cb = { 0 };
+       mount_state_t ms = { 0 };
+       zfs_handle_t *zfsp;
+       int ret = 0;
+
+       if ((zfsp = zfs_open(zhp->zpool_hdl, zhp->zpool_name,
+           ZFS_TYPE_DATASET)) == NULL)
+               goto out;
+
+       /*
+        * Gather all non-snapshot datasets within the pool. Start by adding
+        * the root filesystem for this pool to the list, and then iterate
+        * over all child filesystems.
+        */
+       libzfs_add_handle(&cb, zfsp);
+       if (zfs_iter_filesystems(zfsp, zfs_iter_cb, &cb) != 0)
+               goto out;
+
+       /*
+        * Mount all filesystems
+        */
+       ms.ms_mntopts = mntopts;
+       ms.ms_mntflags = flags;
+       zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used,
+           zfs_mount_one, &ms, B_TRUE);
+       if (ms.ms_mntstatus != 0)
+               ret = ms.ms_mntstatus;
+
+       /*
+        * Share all filesystems that need to be shared. This needs to be
+        * a separate pass because libshare is not mt-safe, and so we need
+        * to share serially.
+        */
+       ms.ms_mntstatus = 0;
+       zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used,
+           zfs_share_one, &ms, B_FALSE);
+       if (ms.ms_mntstatus != 0)
+               ret = ms.ms_mntstatus;
 
 out:
-       for (i = 0; i < cb.cb_used; i++)
+       for (int i = 0; i < cb.cb_used; i++)
                zfs_close(cb.cb_handles[i]);
        free(cb.cb_handles);
 
index e5826dd7a2b1a8822a07df05adf0a55ec678fbdc..225e9bc8b53dd9e470d1241708564d9800227af1 100644 (file)
@@ -181,7 +181,7 @@ tests = ['zfs_mount_001_pos', 'zfs_mount_002_pos', 'zfs_mount_003_pos',
     'zfs_mount_007_pos', 'zfs_mount_008_pos', 'zfs_mount_009_neg',
     'zfs_mount_010_neg', 'zfs_mount_011_neg', 'zfs_mount_012_neg',
     'zfs_mount_all_001_pos', 'zfs_mount_encrypted', 'zfs_mount_remount',
-    'zfs_multi_mount']
+    'zfs_multi_mount', 'zfs_mount_all_fail', 'zfs_mount_all_mountpoints']
 tags = ['functional', 'cli_root', 'zfs_mount']
 
 [tests/functional/cli_root/zfs_program]
index 05cbd8a77baaca20f3c8511710cfdeeb105b830e..b2de98934b7496395bae97c4f0b897e06f866683 100644 (file)
@@ -14,8 +14,10 @@ dist_pkgdata_SCRIPTS = \
        zfs_mount_010_neg.ksh \
        zfs_mount_011_neg.ksh \
        zfs_mount_012_neg.ksh \
-       zfs_mount_encrypted.ksh \
        zfs_mount_all_001_pos.ksh \
+       zfs_mount_all_fail.ksh \
+       zfs_mount_all_mountpoints.ksh \
+       zfs_mount_encrypted.ksh \
        zfs_mount_remount.ksh \
        zfs_multi_mount.ksh
 
index 2a7cf6ef735bdf5967f2f49f38d6f6c523ebf04c..2afb9a547b50748a09928ff0e3511020d0b3fb90 100644 (file)
@@ -25,7 +25,7 @@
 #
 
 #
-# Copyright (c) 2016 by Delphix. All rights reserved.
+# Copyright (c) 2017 by Delphix. All rights reserved.
 #
 
 . $STF_SUITE/include/libtest.shlib
@@ -84,14 +84,12 @@ function setup_filesystem #disklist #pool #fs #mntpoint #type #vdev
        fi
 
        case "$type" in
-               'ctr')  log_must zfs create $pool/$fs
-                       log_must zfs set mountpoint=$mntpoint $pool/$fs
+               'ctr')  log_must zfs create -o mountpoint=$mntpoint $pool/$fs
                        ;;
                'vol')  log_must zfs create -V $VOLSIZE $pool/$fs
                        block_device_wait
                        ;;
-               *)      log_must zfs create $pool/$fs
-                       log_must zfs set mountpoint=$mntpoint $pool/$fs
+               *)      log_must zfs create -o mountpoint=$mntpoint $pool/$fs
                        ;;
        esac
 
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_fail.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_fail.ksh
new file mode 100755 (executable)
index 0000000..d7fcd20
--- /dev/null
@@ -0,0 +1,96 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib
+
+# DESCRIPTION:
+#       Verify that if 'zfs mount -a' fails to mount one filesystem,
+#       the command fails with a non-zero error code, but all other
+#       filesystems are mounted.
+#
+# STRATEGY:
+#       1. Create zfs filesystems
+#       2. Unmount a leaf filesystem
+#       3. Create a file in the above filesystem's mountpoint
+#       4. Verify that 'zfs mount -a' fails to mount the above
+#       5. Verify that all other filesystems were mounted
+#
+
+verify_runnable "both"
+
+typeset -a filesystems
+typeset path=${TEST_BASE_DIR%%/}/testroot$$/$TESTPOOL
+typeset fscount=10
+
+function setup_all
+{
+       # Create $fscount filesystems at the top level of $path
+       for ((i=0; i<$fscount; i++)); do
+               setup_filesystem "$DISKS" "$TESTPOOL" $i "$path/$i" ctr
+       done
+
+       zfs list -r $TESTPOOL
+
+       return 0
+}
+
+function cleanup_all
+{
+       export __ZFS_POOL_RESTRICT="$TESTPOOL"
+       log_must zfs $unmountall
+       unset __ZFS_POOL_RESTRICT
+
+       [[ -d ${TEST_BASE_DIR%%/}/testroot$$ ]] && \
+               rm -rf ${TEST_BASE_DIR%%/}/testroot$$
+}
+
+log_onexit cleanup_all
+
+log_must setup_all
+
+#
+# Unmount all of the above so that we can create the stray file
+# in one of the mountpoint directories.
+#
+export __ZFS_POOL_RESTRICT="$TESTPOOL"
+log_must zfs $unmountall
+unset __ZFS_POOL_RESTRICT
+
+# All of our filesystems should be unmounted at this point
+for ((i=0; i<$fscount; i++)); do
+       log_mustnot mounted "$TESTPOOL/$i"
+done
+
+# Create a stray file in one filesystem's mountpoint
+touch $path/0/strayfile
+
+# Verify that zfs mount -a fails
+export __ZFS_POOL_RESTRICT="$TESTPOOL"
+log_mustnot zfs $mountall
+unset __ZFS_POOL_RESTRICT
+
+# All filesystems except for "0" should be mounted
+log_mustnot mounted "$TESTPOOL/0"
+for ((i=1; i<$fscount; i++)); do
+       log_must mounted "$TESTPOOL/$i"
+done
+
+log_pass "'zfs $mountall' failed as expected."
diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_mountpoints.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_mountpoints.ksh
new file mode 100755 (executable)
index 0000000..3e6a24b
--- /dev/null
@@ -0,0 +1,162 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2017 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib
+
+# DESCRIPTION:
+#       Verify that 'zfs mount -a' succeeds given a set of filesystems
+#       whose mountpoints have a parent/child relationship which is
+#       counter to the filesystem parent/child relationship.
+#
+# STRATEGY:
+#       1. Create zfs filesystems within the given pool.
+#       2. Unmount all the filesystems.
+#       3. Verify that 'zfs mount -a' command succeed,
+#         and all available ZFS filesystems are mounted.
+#      4. Verify that 'zfs mount' is identical with 'df -F zfs'
+#
+
+verify_runnable "both"
+
+typeset -a filesystems
+
+function setup_all
+{
+       typeset path=${TEST_BASE_DIR%%/}/testroot$$/$TESTPOOL
+       typeset fscount=10
+
+       #
+       # Generate an array of filesystem names that represent a deep
+       # hierarchy as such:
+       #
+       # 0
+       # 0/1
+       # 0/1/2
+       # 0/1/2/3
+       # 0/1/2/3/4
+       # ...
+       #
+       fs=0
+       for ((i=0; i<$fscount; i++)); do
+               if [[ $i -gt 0 ]]; then
+                       fs=$fs/$i
+               fi
+               filesystems+=($fs)
+       done
+
+       # Create all of the above filesystems
+       for ((i=0; i<$fscount; i++)); do
+               fs=${filesystems[$i]}
+               setup_filesystem "$DISKS" "$TESTPOOL" "$fs" "$path/$i" ctr
+       done
+
+       zfs list -r $TESTPOOL
+
+       #
+       # Unmount all of the above so that we can setup our convoluted
+       # mount paths.
+       #
+       export __ZFS_POOL_RESTRICT="$TESTPOOL"
+       log_must zfs $unmountall
+       unset __ZFS_POOL_RESTRICT
+
+       #
+       # Configure the mount paths so that each mountpoint is contained
+       # in a child filesystem. We should end up with something like the
+       # following structure (modulo the number of filesystems):
+       #
+       # NAME                       MOUNTPOINT
+       # testpool                   /testpool
+       # testpool/0                 /testroot25416/testpool/0/1/2/3/4/5/6
+       # testpool/0/1               /testroot25416/testpool/0/1/2/3/4/5
+       # testpool/0/1/2             /testroot25416/testpool/0/1/2/3/4
+       # testpool/0/1/2/3           /testroot25416/testpool/0/1/2/3
+       # testpool/0/1/2/3/4         /testroot25416/testpool/0/1/2
+       # testpool/0/1/2/3/4/5       /testroot25416/testpool/0/1
+       # testpool/0/1/2/3/4/5/6     /testroot25416/testpool/0
+       #
+       for ((i=0; i<$fscount; i++)); do
+               fs=$TESTPOOL/${filesystems[$(($fscount - $i - 1))]}
+               mnt=$path/${filesystems[$i]}
+               zfs set mountpoint=$mnt $fs
+       done
+
+       zfs list -r $TESTPOOL
+
+       return 0
+}
+
+function cleanup_all
+{
+       export __ZFS_POOL_RESTRICT="$TESTPOOL"
+       log_must zfs $unmountall
+       unset __ZFS_POOL_RESTRICT
+
+       for fs in ${filesystems[@]}; do
+               cleanup_filesystem "$TESTPOOL" "$fs"
+       done
+       [[ -d ${TEST_BASE_DIR%%/}/testroot$$ ]] && \
+               rm -rf ${TEST_BASE_DIR%%/}/testroot$$
+}
+
+#
+# This function takes a single true/false argument. If true it will verify that
+# all file systems are mounted. If false it will verify that they are not
+# mounted.
+#
+function verify_all
+{
+       if $1; then
+               logfunc=log_must
+       else
+               logfunc=log_mustnot
+       fi
+
+       for fs in ${filesystems[@]}; do
+               $logfunc mounted "$TESTPOOL/$fs"
+       done
+
+       return 0
+}
+
+log_onexit cleanup_all
+
+log_must setup_all
+
+export __ZFS_POOL_RESTRICT="$TESTPOOL"
+log_must zfs $unmountall
+unset __ZFS_POOL_RESTRICT
+
+verify_all false
+
+export __ZFS_POOL_RESTRICT="$TESTPOOL"
+log_must zfs $mountall
+unset __ZFS_POOL_RESTRICT
+
+verify_all true
+
+log_note "Verify that 'zfs $mountcmd' will display " \
+       "all ZFS filesystems currently mounted."
+
+verify_mount_display
+
+log_pass "'zfs $mountall' succeeds as root, " \
+       "and all available ZFS filesystems are mounted."