From: Sebastien Roy Date: Mon, 5 Nov 2018 15:40:05 +0000 (-0700) Subject: OpenZFS 8115 - parallel zfs mount X-Git-Tag: zfs-0.8.0-rc3~65 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a10d50f999511d304f910852c7825c70c9c9e303;p=zfs OpenZFS 8115 - parallel zfs mount Porting Notes: * Use thread pools (tpool) API instead of introducing taskq interfaces to libzfs. * Use pthread_mutext for locks as mutex_t isn't available. * Ignore alternative libshare initialization since OpenZFS-7955 is not present on zfsonlinux. Authored by: Sebastien Roy Reviewed by: Matthew Ahrens Reviewed by: Pavel Zakharov Reviewed by: Brad Lewis Reviewed by: George Wilson Reviewed by: Paul Dagnelie Reviewed by: Prashanth Sreenivasa Authored by: Brian Behlendorf Approved by: Matt Ahrens Ported-by: Don Brady OpenZFS-issue: https://www.illumos.org/issues/8115 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/a3f0e2b569 Closes #8092 --- diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index 6e0a6d5bc..2b7fe9303 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -6059,7 +6059,12 @@ zfs_do_holds(int argc, char **argv) #define CHECK_SPINNER 30 #define SPINNER_TIME 3 /* seconds */ -#define MOUNT_TIME 5 /* seconds */ +#define MOUNT_TIME 1 /* seconds */ + +typedef struct get_all_state { + boolean_t ga_verbose; + get_all_cb_t *ga_cbp; +} get_all_state_t; static int get_one_dataset(zfs_handle_t *zhp, void *data) @@ -6068,10 +6073,10 @@ get_one_dataset(zfs_handle_t *zhp, void *data) static int spinval = 0; static int spincheck = 0; static time_t last_spin_time = (time_t)0; - get_all_cb_t *cbp = data; + get_all_state_t *state = data; zfs_type_t type = zfs_get_type(zhp); - if (cbp->cb_verbose) { + if (state->ga_verbose) { if (--spincheck < 0) { time_t now = time(NULL); if (last_spin_time + SPINNER_TIME < now) { @@ -6097,25 +6102,23 @@ get_one_dataset(zfs_handle_t *zhp, void *data) zfs_close(zhp); return (0); } - libzfs_add_handle(cbp, zhp); - assert(cbp->cb_used <= cbp->cb_alloc); + libzfs_add_handle(state->ga_cbp, zhp); + assert(state->ga_cbp->cb_used <= state->ga_cbp->cb_alloc); return (0); } static void -get_all_datasets(zfs_handle_t ***dslist, size_t *count, boolean_t verbose) +get_all_datasets(get_all_cb_t *cbp, boolean_t verbose) { - get_all_cb_t cb = { 0 }; - cb.cb_verbose = verbose; - cb.cb_getone = get_one_dataset; + get_all_state_t state = { + .ga_verbose = verbose, + .ga_cbp = cbp + }; if (verbose) set_progress_header(gettext("Reading ZFS config")); - (void) zfs_iter_root(g_zfs, get_one_dataset, &cb); - - *dslist = cb.cb_handles; - *count = cb.cb_used; + (void) zfs_iter_root(g_zfs, get_one_dataset, &state); if (verbose) finish_progress(gettext("done.")); @@ -6126,8 +6129,19 @@ get_all_datasets(zfs_handle_t ***dslist, size_t *count, boolean_t verbose) * similar, we have a common function with an extra parameter to determine which * mode we are using. */ -#define OP_SHARE 0x1 -#define OP_MOUNT 0x2 +typedef enum { OP_SHARE, OP_MOUNT } share_mount_op_t; + +typedef struct share_mount_state { + share_mount_op_t sm_op; + boolean_t sm_verbose; + int sm_flags; + char *sm_options; + char *sm_proto; /* only valid for OP_SHARE */ + pthread_mutex_t sm_lock; /* protects the remaining fields */ + uint_t sm_total; /* number of filesystems to process */ + uint_t sm_done; /* number of filesystems processed */ + int sm_status; /* -1 if any of the share/mount operations failed */ +} share_mount_state_t; /* * Share or mount a dataset. @@ -6385,6 +6399,29 @@ report_mount_progress(int current, int total) update_progress(info); } +/* + * zfs_foreach_mountpoint() callback that mounts or shares one filesystem and + * updates the progress meter. + */ +static int +share_mount_one_cb(zfs_handle_t *zhp, void *arg) +{ + share_mount_state_t *sms = arg; + int ret; + + ret = share_mount_one(zhp, sms->sm_op, sms->sm_flags, sms->sm_proto, + B_FALSE, sms->sm_options); + + pthread_mutex_lock(&sms->sm_lock); + if (ret != 0) + sms->sm_status = ret; + sms->sm_done++; + if (sms->sm_verbose) + report_mount_progress(sms->sm_done, sms->sm_total); + pthread_mutex_unlock(&sms->sm_lock); + return (ret); +} + static void append_options(char *mntopts, char *newopts) { @@ -6459,8 +6496,6 @@ share_mount(int op, int argc, char **argv) /* check number of arguments */ if (do_all) { - zfs_handle_t **dslist = NULL; - size_t i, count = 0; char *protocol = NULL; if (op == OP_SHARE && argc > 0) { @@ -6481,27 +6516,35 @@ share_mount(int op, int argc, char **argv) } start_progress_timer(); - get_all_datasets(&dslist, &count, verbose); + get_all_cb_t cb = { 0 }; + get_all_datasets(&cb, verbose); - if (count == 0) { + if (cb.cb_used == 0) { if (options != NULL) free(options); return (0); } - qsort(dslist, count, sizeof (void *), libzfs_dataset_cmp); - - for (i = 0; i < count; i++) { - if (verbose) - report_mount_progress(i, count); + share_mount_state_t share_mount_state = { 0 }; + share_mount_state.sm_op = op; + share_mount_state.sm_verbose = verbose; + share_mount_state.sm_flags = flags; + share_mount_state.sm_options = options; + share_mount_state.sm_proto = protocol; + share_mount_state.sm_total = cb.cb_used; + pthread_mutex_init(&share_mount_state.sm_lock, NULL); - if (share_mount_one(dslist[i], op, flags, protocol, - B_FALSE, options) != 0) - ret = 1; - zfs_close(dslist[i]); - } + /* + * libshare isn't mt-safe, so only do the operation in parallel + * if we're mounting. + */ + zfs_foreach_mountpoint(g_zfs, cb.cb_handles, cb.cb_used, + share_mount_one_cb, &share_mount_state, op == OP_MOUNT); + ret = share_mount_state.sm_status; - free(dslist); + for (int i = 0; i < cb.cb_used; i++) + zfs_close(cb.cb_handles[i]); + free(cb.cb_handles); } else if (argc == 0) { struct mnttab entry; diff --git a/include/libzfs.h b/include/libzfs.h index d34658055..762d57bef 100644 --- a/include/libzfs.h +++ b/include/libzfs.h @@ -573,12 +573,11 @@ typedef struct get_all_cb { zfs_handle_t **cb_handles; size_t cb_alloc; size_t cb_used; - boolean_t cb_verbose; - int (*cb_getone)(zfs_handle_t *, void *); } get_all_cb_t; +void zfs_foreach_mountpoint(libzfs_handle_t *, zfs_handle_t **, size_t, + zfs_iter_f, void *, boolean_t); void libzfs_add_handle(get_all_cb_t *, zfs_handle_t *); -int libzfs_dataset_cmp(const void *, const void *); /* * Functions to create and destroy datasets. diff --git a/include/libzfs_impl.h b/include/libzfs_impl.h index 568103f4b..9a46b9f12 100644 --- a/include/libzfs_impl.h +++ b/include/libzfs_impl.h @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2015 by Delphix. All rights reserved. + * Copyright (c) 2011, 2017 by Delphix. All rights reserved. * Copyright (c) 2018 Datto Inc. */ @@ -60,6 +60,13 @@ struct libzfs_handle { void *libzfs_sharehdl; /* libshare handle */ uint_t libzfs_shareflags; boolean_t libzfs_mnttab_enable; + /* + * We need a lock to handle the case where parallel mount + * threads are populating the mnttab cache simultaneously. The + * lock only protects the integrity of the avl tree, and does + * not protect the contents of the mnttab entries themselves. + */ + pthread_mutex_t libzfs_mnttab_cache_lock; avl_tree_t libzfs_mnttab_cache; int libzfs_pool_iter; char libzfs_chassis_id[256]; diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c index e79a936f9..237933c37 100644 --- a/lib/libzfs/libzfs_dataset.c +++ b/lib/libzfs/libzfs_dataset.c @@ -791,6 +791,7 @@ libzfs_mnttab_cache_compare(const void *arg1, const void *arg2) void libzfs_mnttab_init(libzfs_handle_t *hdl) { + pthread_mutex_init(&hdl->libzfs_mnttab_cache_lock, NULL); assert(avl_numnodes(&hdl->libzfs_mnttab_cache) == 0); avl_create(&hdl->libzfs_mnttab_cache, libzfs_mnttab_cache_compare, sizeof (mnttab_node_t), offsetof(mnttab_node_t, mtn_node)); @@ -849,6 +850,7 @@ libzfs_mnttab_fini(libzfs_handle_t *hdl) free(mtn); } avl_destroy(&hdl->libzfs_mnttab_cache); + (void) pthread_mutex_destroy(&hdl->libzfs_mnttab_cache_lock); } void @@ -863,7 +865,7 @@ libzfs_mnttab_find(libzfs_handle_t *hdl, const char *fsname, { mnttab_node_t find; mnttab_node_t *mtn; - int error; + int ret = ENOENT; if (!hdl->libzfs_mnttab_enable) { struct mnttab srch = { 0 }; @@ -883,17 +885,24 @@ libzfs_mnttab_find(libzfs_handle_t *hdl, const char *fsname, return (ENOENT); } - if (avl_numnodes(&hdl->libzfs_mnttab_cache) == 0) - if ((error = libzfs_mnttab_update(hdl)) != 0) + pthread_mutex_lock(&hdl->libzfs_mnttab_cache_lock); + if (avl_numnodes(&hdl->libzfs_mnttab_cache) == 0) { + int error; + + if ((error = libzfs_mnttab_update(hdl)) != 0) { + pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock); return (error); + } + } find.mtn_mt.mnt_special = (char *)fsname; mtn = avl_find(&hdl->libzfs_mnttab_cache, &find, NULL); if (mtn) { *entry = mtn->mtn_mt; - return (0); + ret = 0; } - return (ENOENT); + pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock); + return (ret); } void @@ -902,14 +911,23 @@ libzfs_mnttab_add(libzfs_handle_t *hdl, const char *special, { mnttab_node_t *mtn; - if (avl_numnodes(&hdl->libzfs_mnttab_cache) == 0) - return; - mtn = zfs_alloc(hdl, sizeof (mnttab_node_t)); - mtn->mtn_mt.mnt_special = zfs_strdup(hdl, special); - mtn->mtn_mt.mnt_mountp = zfs_strdup(hdl, mountp); - mtn->mtn_mt.mnt_fstype = zfs_strdup(hdl, MNTTYPE_ZFS); - mtn->mtn_mt.mnt_mntopts = zfs_strdup(hdl, mntopts); - avl_add(&hdl->libzfs_mnttab_cache, mtn); + pthread_mutex_lock(&hdl->libzfs_mnttab_cache_lock); + if (avl_numnodes(&hdl->libzfs_mnttab_cache) != 0) { + mtn = zfs_alloc(hdl, sizeof (mnttab_node_t)); + mtn->mtn_mt.mnt_special = zfs_strdup(hdl, special); + mtn->mtn_mt.mnt_mountp = zfs_strdup(hdl, mountp); + mtn->mtn_mt.mnt_fstype = zfs_strdup(hdl, MNTTYPE_ZFS); + mtn->mtn_mt.mnt_mntopts = zfs_strdup(hdl, mntopts); + /* + * Another thread may have already added this entry + * via libzfs_mnttab_update. If so we should skip it. + */ + if (avl_find(&hdl->libzfs_mnttab_cache, mtn, NULL) != NULL) + free(mtn); + else + avl_add(&hdl->libzfs_mnttab_cache, mtn); + } + pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock); } void @@ -918,6 +936,7 @@ libzfs_mnttab_remove(libzfs_handle_t *hdl, const char *fsname) mnttab_node_t find; mnttab_node_t *ret; + pthread_mutex_lock(&hdl->libzfs_mnttab_cache_lock); find.mtn_mt.mnt_special = (char *)fsname; if ((ret = avl_find(&hdl->libzfs_mnttab_cache, (void *)&find, NULL)) != NULL) { @@ -928,6 +947,7 @@ libzfs_mnttab_remove(libzfs_handle_t *hdl, const char *fsname) free(ret->mtn_mt.mnt_mntopts); free(ret); } + pthread_mutex_unlock(&hdl->libzfs_mnttab_cache_lock); } int diff --git a/lib/libzfs/libzfs_mount.c b/lib/libzfs/libzfs_mount.c index 23e45d0d3..ef18bafab 100644 --- a/lib/libzfs/libzfs_mount.c +++ b/lib/libzfs/libzfs_mount.c @@ -22,7 +22,7 @@ /* * Copyright 2015 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, 2015 by Delphix. All rights reserved. + * Copyright (c) 2014, 2017 by Delphix. All rights reserved. * Copyright 2016 Igor Kozhukhov * Copyright 2017 RackTop Systems. * Copyright (c) 2018 Datto Inc. @@ -34,25 +34,25 @@ * they are used by mount and unmount and when changing a filesystem's * mountpoint. * - * zfs_is_mounted() - * zfs_mount() - * zfs_unmount() - * zfs_unmountall() + * zfs_is_mounted() + * zfs_mount() + * zfs_unmount() + * zfs_unmountall() * * This file also contains the functions used to manage sharing filesystems via * NFS and iSCSI: * - * zfs_is_shared() - * zfs_share() - * zfs_unshare() + * zfs_is_shared() + * zfs_share() + * zfs_unshare() * - * zfs_is_shared_nfs() - * zfs_is_shared_smb() - * zfs_share_proto() - * zfs_shareall(); - * zfs_unshare_nfs() - * zfs_unshare_smb() - * zfs_unshareall_nfs() + * zfs_is_shared_nfs() + * zfs_is_shared_smb() + * zfs_share_proto() + * zfs_shareall(); + * zfs_unshare_nfs() + * zfs_unshare_smb() + * zfs_unshareall_nfs() * zfs_unshareall_smb() * zfs_unshareall() * zfs_unshareall_bypath() @@ -60,8 +60,8 @@ * The following functions are available for pool consumers, and will * mount/unmount and share/unshare all datasets within pool: * - * zpool_enable_datasets() - * zpool_disable_datasets() + * zpool_enable_datasets() + * zpool_disable_datasets() */ #include @@ -84,11 +84,15 @@ #include #include "libzfs_impl.h" +#include #include #include #define MAXISALEN 257 /* based on sysinfo(2) man page */ +static int mount_tp_nthr = 512; /* tpool threads for multi-threaded mounting */ + +static void zfs_mount_task(void *); static int zfs_share_proto(zfs_handle_t *, zfs_share_proto_t *); zfs_share_type_t zfs_is_shared_proto(zfs_handle_t *, char **, zfs_share_proto_t); @@ -1146,25 +1150,32 @@ remove_mountpoint(zfs_handle_t *zhp) } } +/* + * Add the given zfs handle to the cb_handles array, dynamically reallocating + * the array if it is out of space. + */ void libzfs_add_handle(get_all_cb_t *cbp, zfs_handle_t *zhp) { if (cbp->cb_alloc == cbp->cb_used) { size_t newsz; - void *ptr; + zfs_handle_t **newhandles; - newsz = cbp->cb_alloc ? cbp->cb_alloc * 2 : 64; - ptr = zfs_realloc(zhp->zfs_hdl, - cbp->cb_handles, cbp->cb_alloc * sizeof (void *), - newsz * sizeof (void *)); - cbp->cb_handles = ptr; + newsz = cbp->cb_alloc != 0 ? cbp->cb_alloc * 2 : 64; + newhandles = zfs_realloc(zhp->zfs_hdl, + cbp->cb_handles, cbp->cb_alloc * sizeof (zfs_handle_t *), + newsz * sizeof (zfs_handle_t *)); + cbp->cb_handles = newhandles; cbp->cb_alloc = newsz; } cbp->cb_handles[cbp->cb_used++] = zhp; } +/* + * Recursive helper function used during file system enumeration + */ static int -mount_cb(zfs_handle_t *zhp, void *data) +zfs_iter_cb(zfs_handle_t *zhp, void *data) { get_all_cb_t *cbp = data; @@ -1196,112 +1207,351 @@ mount_cb(zfs_handle_t *zhp, void *data) } libzfs_add_handle(cbp, zhp); - if (zfs_iter_filesystems(zhp, mount_cb, cbp) != 0) { + if (zfs_iter_filesystems(zhp, zfs_iter_cb, cbp) != 0) { zfs_close(zhp); return (-1); } return (0); } +/* + * Sort comparator that compares two mountpoint paths. We sort these paths so + * that subdirectories immediately follow their parents. This means that we + * effectively treat the '/' character as the lowest value non-nul char. An + * example sorted list using this comparator would look like: + * + * /foo + * /foo/bar + * /foo/bar/baz + * /foo/baz + * /foo.bar + * + * The mounting code depends on this ordering to deterministically iterate + * over filesystems in order to spawn parallel mount tasks. + */ int -libzfs_dataset_cmp(const void *a, const void *b) +mountpoint_cmp(const void *arga, const void *argb) { - zfs_handle_t **za = (zfs_handle_t **)a; - zfs_handle_t **zb = (zfs_handle_t **)b; + zfs_handle_t *const *zap = arga; + zfs_handle_t *za = *zap; + zfs_handle_t *const *zbp = argb; + zfs_handle_t *zb = *zbp; char mounta[MAXPATHLEN]; char mountb[MAXPATHLEN]; + const char *a = mounta; + const char *b = mountb; boolean_t gota, gotb; - if ((gota = (zfs_get_type(*za) == ZFS_TYPE_FILESYSTEM)) != 0) - verify(zfs_prop_get(*za, ZFS_PROP_MOUNTPOINT, mounta, + gota = (zfs_get_type(za) == ZFS_TYPE_FILESYSTEM); + if (gota) { + verify(zfs_prop_get(za, ZFS_PROP_MOUNTPOINT, mounta, sizeof (mounta), NULL, NULL, 0, B_FALSE) == 0); - if ((gotb = (zfs_get_type(*zb) == ZFS_TYPE_FILESYSTEM)) != 0) - verify(zfs_prop_get(*zb, ZFS_PROP_MOUNTPOINT, mountb, + } + gotb = (zfs_get_type(zb) == ZFS_TYPE_FILESYSTEM); + if (gotb) { + verify(zfs_prop_get(zb, ZFS_PROP_MOUNTPOINT, mountb, sizeof (mountb), NULL, NULL, 0, B_FALSE) == 0); + } - if (gota && gotb) - return (strcmp(mounta, mountb)); + if (gota && gotb) { + while (*a != '\0' && (*a == *b)) { + a++; + b++; + } + if (*a == *b) + return (0); + if (*a == '\0') + return (-1); + if (*b == '\0') + return (1); + if (*a == '/') + return (-1); + if (*b == '/') + return (1); + return (*a < *b ? -1 : *a > *b); + } if (gota) return (-1); if (gotb) return (1); - return (strcmp(zfs_get_name(*za), zfs_get_name(*zb))); + /* + * If neither filesystem has a mountpoint, revert to sorting by + * dataset name. + */ + return (strcmp(zfs_get_name(za), zfs_get_name(zb))); } /* - * Mount and share all datasets within the given pool. This assumes that no - * datasets within the pool are currently mounted. Because users can create - * complicated nested hierarchies of mountpoints, we first gather all the - * datasets and mountpoints within the pool, and sort them by mountpoint. Once - * we have the list of all filesystems, we iterate over them in order and mount - * and/or share each one. + * Return true if path2 is a child of path1. */ -#pragma weak zpool_mount_datasets = zpool_enable_datasets -int -zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags) +static boolean_t +libzfs_path_contains(const char *path1, const char *path2) { - get_all_cb_t cb = { 0 }; - libzfs_handle_t *hdl = zhp->zpool_hdl; - zfs_handle_t *zfsp; - int i, ret = -1; - int *good; + return (strstr(path2, path1) == path2 && path2[strlen(path1)] == '/'); +} + +/* + * Given a mountpoint specified by idx in the handles array, find the first + * non-descendent of that mountpoint and return its index. Descendant paths + * start with the parent's path. This function relies on the ordering + * enforced by mountpoint_cmp(). + */ +static int +non_descendant_idx(zfs_handle_t **handles, size_t num_handles, int idx) +{ + char parent[ZFS_MAXPROPLEN]; + char child[ZFS_MAXPROPLEN]; + int i; + + verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, parent, + sizeof (parent), NULL, NULL, 0, B_FALSE) == 0); + + for (i = idx + 1; i < num_handles; i++) { + verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT, child, + sizeof (child), NULL, NULL, 0, B_FALSE) == 0); + if (!libzfs_path_contains(parent, child)) + break; + } + return (i); +} + +typedef struct mnt_param { + libzfs_handle_t *mnt_hdl; + tpool_t *mnt_tp; + zfs_handle_t **mnt_zhps; /* filesystems to mount */ + size_t mnt_num_handles; + int mnt_idx; /* Index of selected entry to mount */ + zfs_iter_f mnt_func; + void *mnt_data; +} mnt_param_t; + +/* + * Allocate and populate the parameter struct for mount function, and + * schedule mounting of the entry selected by idx. + */ +static void +zfs_dispatch_mount(libzfs_handle_t *hdl, zfs_handle_t **handles, + size_t num_handles, int idx, zfs_iter_f func, void *data, tpool_t *tp) +{ + mnt_param_t *mnt_param = zfs_alloc(hdl, sizeof (mnt_param_t)); + + mnt_param->mnt_hdl = hdl; + mnt_param->mnt_tp = tp; + mnt_param->mnt_zhps = handles; + mnt_param->mnt_num_handles = num_handles; + mnt_param->mnt_idx = idx; + mnt_param->mnt_func = func; + mnt_param->mnt_data = data; + + (void) tpool_dispatch(tp, zfs_mount_task, (void*)mnt_param); +} + +/* + * This is the structure used to keep state of mounting or sharing operations + * during a call to zpool_enable_datasets(). + */ +typedef struct mount_state { + /* + * ms_mntstatus is set to -1 if any mount fails. While multiple threads + * could update this variable concurrently, no synchronization is + * needed as it's only ever set to -1. + */ + int ms_mntstatus; + int ms_mntflags; + const char *ms_mntopts; +} mount_state_t; + +static int +zfs_mount_one(zfs_handle_t *zhp, void *arg) +{ + mount_state_t *ms = arg; + int ret = 0; /* - * Gather all non-snap datasets within the pool. + * don't attempt to mount encrypted datasets with + * unloaded keys */ - if ((zfsp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_DATASET)) == NULL) - goto out; + if (zfs_prop_get_int(zhp, ZFS_PROP_KEYSTATUS) == + ZFS_KEYSTATUS_UNAVAILABLE) + return (0); + + if (zfs_mount(zhp, ms->ms_mntopts, ms->ms_mntflags) != 0) + ret = ms->ms_mntstatus = -1; + return (ret); +} + +static int +zfs_share_one(zfs_handle_t *zhp, void *arg) +{ + mount_state_t *ms = arg; + int ret = 0; + + if (zfs_share(zhp) != 0) + ret = ms->ms_mntstatus = -1; + return (ret); +} + +/* + * Thread pool function to mount one file system. On completion, it finds and + * schedules its children to be mounted. This depends on the sorting done in + * zfs_foreach_mountpoint(). Note that the degenerate case (chain of entries + * each descending from the previous) will have no parallelism since we always + * have to wait for the parent to finish mounting before we can schedule + * its children. + */ +static void +zfs_mount_task(void *arg) +{ + mnt_param_t *mp = arg; + int idx = mp->mnt_idx; + zfs_handle_t **handles = mp->mnt_zhps; + size_t num_handles = mp->mnt_num_handles; + char mountpoint[ZFS_MAXPROPLEN]; + + verify(zfs_prop_get(handles[idx], ZFS_PROP_MOUNTPOINT, mountpoint, + sizeof (mountpoint), NULL, NULL, 0, B_FALSE) == 0); + + if (mp->mnt_func(handles[idx], mp->mnt_data) != 0) + return; - libzfs_add_handle(&cb, zfsp); - if (zfs_iter_filesystems(zfsp, mount_cb, &cb) != 0) - goto out; /* - * Sort the datasets by mountpoint. + * We dispatch tasks to mount filesystems with mountpoints underneath + * this one. We do this by dispatching the next filesystem with a + * descendant mountpoint of the one we just mounted, then skip all of + * its descendants, dispatch the next descendant mountpoint, and so on. + * The non_descendant_idx() function skips over filesystems that are + * descendants of the filesystem we just dispatched. */ - qsort(cb.cb_handles, cb.cb_used, sizeof (void *), - libzfs_dataset_cmp); + for (int i = idx + 1; i < num_handles; + i = non_descendant_idx(handles, num_handles, i)) { + char child[ZFS_MAXPROPLEN]; + verify(zfs_prop_get(handles[i], ZFS_PROP_MOUNTPOINT, + child, sizeof (child), NULL, NULL, 0, B_FALSE) == 0); + + if (!libzfs_path_contains(mountpoint, child)) + break; /* not a descendant, return */ + zfs_dispatch_mount(mp->mnt_hdl, handles, num_handles, i, + mp->mnt_func, mp->mnt_data, mp->mnt_tp); + } + free(mp); +} +/* + * Issue the func callback for each ZFS handle contained in the handles + * array. This function is used to mount all datasets, and so this function + * guarantees that filesystems for parent mountpoints are called before their + * children. As such, before issuing any callbacks, we first sort the array + * of handles by mountpoint. + * + * Callbacks are issued in one of two ways: + * + * 1. Sequentially: If the parallel argument is B_FALSE or the ZFS_SERIAL_MOUNT + * environment variable is set, then we issue callbacks sequentially. + * + * 2. In parallel: If the parallel argument is B_TRUE and the ZFS_SERIAL_MOUNT + * environment variable is not set, then we use a tpool to dispatch threads + * to mount filesystems in parallel. This function dispatches tasks to mount + * the filesystems at the top-level mountpoints, and these tasks in turn + * are responsible for recursively mounting filesystems in their children + * mountpoints. + */ +void +zfs_foreach_mountpoint(libzfs_handle_t *hdl, zfs_handle_t **handles, + size_t num_handles, zfs_iter_f func, void *data, boolean_t parallel) +{ /* - * And mount all the datasets, keeping track of which ones - * succeeded or failed. + * The ZFS_SERIAL_MOUNT environment variable is an undocumented + * variable that can be used as a convenience to do a/b comparison + * of serial vs. parallel mounting. */ - if ((good = zfs_alloc(zhp->zpool_hdl, - cb.cb_used * sizeof (int))) == NULL) - goto out; + boolean_t serial_mount = !parallel || + (getenv("ZFS_SERIAL_MOUNT") != NULL); - ret = 0; - for (i = 0; i < cb.cb_used; i++) { - /* - * don't attempt to mount encrypted datasets with - * unloaded keys - */ - if (zfs_prop_get_int(cb.cb_handles[i], ZFS_PROP_KEYSTATUS) == - ZFS_KEYSTATUS_UNAVAILABLE) - continue; + /* + * Sort the datasets by mountpoint. See mountpoint_cmp for details + * of how these are sorted. + */ + qsort(handles, num_handles, sizeof (zfs_handle_t *), mountpoint_cmp); - if (zfs_mount(cb.cb_handles[i], mntopts, flags) != 0) - ret = -1; - else - good[i] = 1; + if (serial_mount) { + for (int i = 0; i < num_handles; i++) { + func(handles[i], data); + } + return; } /* - * Then share all the ones that need to be shared. This needs - * to be a separate pass in order to avoid excessive reloading - * of the configuration. Good should never be NULL since - * zfs_alloc is supposed to exit if memory isn't available. + * Issue the callback function for each dataset using a parallel + * algorithm that uses a thread pool to manage threads. + */ + tpool_t *tp = tpool_create(1, mount_tp_nthr, 0, NULL); + + /* + * There may be multiple "top level" mountpoints outside of the pool's + * root mountpoint, e.g.: /foo /bar. Dispatch a mount task for each of + * these. */ - for (i = 0; i < cb.cb_used; i++) { - if (good[i] && zfs_share(cb.cb_handles[i]) != 0) - ret = -1; + for (int i = 0; i < num_handles; + i = non_descendant_idx(handles, num_handles, i)) { + zfs_dispatch_mount(hdl, handles, num_handles, i, func, data, + tp); } - free(good); + tpool_wait(tp); /* wait for all scheduled mounts to complete */ + tpool_destroy(tp); +} + +/* + * Mount and share all datasets within the given pool. This assumes that no + * datasets within the pool are currently mounted. + */ +#pragma weak zpool_mount_datasets = zpool_enable_datasets +int +zpool_enable_datasets(zpool_handle_t *zhp, const char *mntopts, int flags) +{ + get_all_cb_t cb = { 0 }; + mount_state_t ms = { 0 }; + zfs_handle_t *zfsp; + int ret = 0; + + if ((zfsp = zfs_open(zhp->zpool_hdl, zhp->zpool_name, + ZFS_TYPE_DATASET)) == NULL) + goto out; + + /* + * Gather all non-snapshot datasets within the pool. Start by adding + * the root filesystem for this pool to the list, and then iterate + * over all child filesystems. + */ + libzfs_add_handle(&cb, zfsp); + if (zfs_iter_filesystems(zfsp, zfs_iter_cb, &cb) != 0) + goto out; + + /* + * Mount all filesystems + */ + ms.ms_mntopts = mntopts; + ms.ms_mntflags = flags; + zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used, + zfs_mount_one, &ms, B_TRUE); + if (ms.ms_mntstatus != 0) + ret = ms.ms_mntstatus; + + /* + * Share all filesystems that need to be shared. This needs to be + * a separate pass because libshare is not mt-safe, and so we need + * to share serially. + */ + ms.ms_mntstatus = 0; + zfs_foreach_mountpoint(zhp->zpool_hdl, cb.cb_handles, cb.cb_used, + zfs_share_one, &ms, B_FALSE); + if (ms.ms_mntstatus != 0) + ret = ms.ms_mntstatus; out: - for (i = 0; i < cb.cb_used; i++) + for (int i = 0; i < cb.cb_used; i++) zfs_close(cb.cb_handles[i]); free(cb.cb_handles); diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index e5826dd7a..225e9bc8b 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -181,7 +181,7 @@ tests = ['zfs_mount_001_pos', 'zfs_mount_002_pos', 'zfs_mount_003_pos', 'zfs_mount_007_pos', 'zfs_mount_008_pos', 'zfs_mount_009_neg', 'zfs_mount_010_neg', 'zfs_mount_011_neg', 'zfs_mount_012_neg', 'zfs_mount_all_001_pos', 'zfs_mount_encrypted', 'zfs_mount_remount', - 'zfs_multi_mount'] + 'zfs_multi_mount', 'zfs_mount_all_fail', 'zfs_mount_all_mountpoints'] tags = ['functional', 'cli_root', 'zfs_mount'] [tests/functional/cli_root/zfs_program] diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile.am index 05cbd8a77..b2de98934 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/Makefile.am @@ -14,8 +14,10 @@ dist_pkgdata_SCRIPTS = \ zfs_mount_010_neg.ksh \ zfs_mount_011_neg.ksh \ zfs_mount_012_neg.ksh \ - zfs_mount_encrypted.ksh \ zfs_mount_all_001_pos.ksh \ + zfs_mount_all_fail.ksh \ + zfs_mount_all_mountpoints.ksh \ + zfs_mount_encrypted.ksh \ zfs_mount_remount.ksh \ zfs_multi_mount.ksh diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib index 2a7cf6ef7..2afb9a547 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib @@ -25,7 +25,7 @@ # # -# Copyright (c) 2016 by Delphix. All rights reserved. +# Copyright (c) 2017 by Delphix. All rights reserved. # . $STF_SUITE/include/libtest.shlib @@ -84,14 +84,12 @@ function setup_filesystem #disklist #pool #fs #mntpoint #type #vdev fi case "$type" in - 'ctr') log_must zfs create $pool/$fs - log_must zfs set mountpoint=$mntpoint $pool/$fs + 'ctr') log_must zfs create -o mountpoint=$mntpoint $pool/$fs ;; 'vol') log_must zfs create -V $VOLSIZE $pool/$fs block_device_wait ;; - *) log_must zfs create $pool/$fs - log_must zfs set mountpoint=$mntpoint $pool/$fs + *) log_must zfs create -o mountpoint=$mntpoint $pool/$fs ;; esac diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_fail.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_fail.ksh new file mode 100755 index 000000000..d7fcd20af --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_fail.ksh @@ -0,0 +1,96 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib + +# DESCRIPTION: +# Verify that if 'zfs mount -a' fails to mount one filesystem, +# the command fails with a non-zero error code, but all other +# filesystems are mounted. +# +# STRATEGY: +# 1. Create zfs filesystems +# 2. Unmount a leaf filesystem +# 3. Create a file in the above filesystem's mountpoint +# 4. Verify that 'zfs mount -a' fails to mount the above +# 5. Verify that all other filesystems were mounted +# + +verify_runnable "both" + +typeset -a filesystems +typeset path=${TEST_BASE_DIR%%/}/testroot$$/$TESTPOOL +typeset fscount=10 + +function setup_all +{ + # Create $fscount filesystems at the top level of $path + for ((i=0; i<$fscount; i++)); do + setup_filesystem "$DISKS" "$TESTPOOL" $i "$path/$i" ctr + done + + zfs list -r $TESTPOOL + + return 0 +} + +function cleanup_all +{ + export __ZFS_POOL_RESTRICT="$TESTPOOL" + log_must zfs $unmountall + unset __ZFS_POOL_RESTRICT + + [[ -d ${TEST_BASE_DIR%%/}/testroot$$ ]] && \ + rm -rf ${TEST_BASE_DIR%%/}/testroot$$ +} + +log_onexit cleanup_all + +log_must setup_all + +# +# Unmount all of the above so that we can create the stray file +# in one of the mountpoint directories. +# +export __ZFS_POOL_RESTRICT="$TESTPOOL" +log_must zfs $unmountall +unset __ZFS_POOL_RESTRICT + +# All of our filesystems should be unmounted at this point +for ((i=0; i<$fscount; i++)); do + log_mustnot mounted "$TESTPOOL/$i" +done + +# Create a stray file in one filesystem's mountpoint +touch $path/0/strayfile + +# Verify that zfs mount -a fails +export __ZFS_POOL_RESTRICT="$TESTPOOL" +log_mustnot zfs $mountall +unset __ZFS_POOL_RESTRICT + +# All filesystems except for "0" should be mounted +log_mustnot mounted "$TESTPOOL/0" +for ((i=1; i<$fscount; i++)); do + log_must mounted "$TESTPOOL/$i" +done + +log_pass "'zfs $mountall' failed as expected." diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_mountpoints.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_mountpoints.ksh new file mode 100755 index 000000000..3e6a24bbc --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_mount/zfs_mount_all_mountpoints.ksh @@ -0,0 +1,162 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2017 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zfs_mount/zfs_mount.kshlib + +# DESCRIPTION: +# Verify that 'zfs mount -a' succeeds given a set of filesystems +# whose mountpoints have a parent/child relationship which is +# counter to the filesystem parent/child relationship. +# +# STRATEGY: +# 1. Create zfs filesystems within the given pool. +# 2. Unmount all the filesystems. +# 3. Verify that 'zfs mount -a' command succeed, +# and all available ZFS filesystems are mounted. +# 4. Verify that 'zfs mount' is identical with 'df -F zfs' +# + +verify_runnable "both" + +typeset -a filesystems + +function setup_all +{ + typeset path=${TEST_BASE_DIR%%/}/testroot$$/$TESTPOOL + typeset fscount=10 + + # + # Generate an array of filesystem names that represent a deep + # hierarchy as such: + # + # 0 + # 0/1 + # 0/1/2 + # 0/1/2/3 + # 0/1/2/3/4 + # ... + # + fs=0 + for ((i=0; i<$fscount; i++)); do + if [[ $i -gt 0 ]]; then + fs=$fs/$i + fi + filesystems+=($fs) + done + + # Create all of the above filesystems + for ((i=0; i<$fscount; i++)); do + fs=${filesystems[$i]} + setup_filesystem "$DISKS" "$TESTPOOL" "$fs" "$path/$i" ctr + done + + zfs list -r $TESTPOOL + + # + # Unmount all of the above so that we can setup our convoluted + # mount paths. + # + export __ZFS_POOL_RESTRICT="$TESTPOOL" + log_must zfs $unmountall + unset __ZFS_POOL_RESTRICT + + # + # Configure the mount paths so that each mountpoint is contained + # in a child filesystem. We should end up with something like the + # following structure (modulo the number of filesystems): + # + # NAME MOUNTPOINT + # testpool /testpool + # testpool/0 /testroot25416/testpool/0/1/2/3/4/5/6 + # testpool/0/1 /testroot25416/testpool/0/1/2/3/4/5 + # testpool/0/1/2 /testroot25416/testpool/0/1/2/3/4 + # testpool/0/1/2/3 /testroot25416/testpool/0/1/2/3 + # testpool/0/1/2/3/4 /testroot25416/testpool/0/1/2 + # testpool/0/1/2/3/4/5 /testroot25416/testpool/0/1 + # testpool/0/1/2/3/4/5/6 /testroot25416/testpool/0 + # + for ((i=0; i<$fscount; i++)); do + fs=$TESTPOOL/${filesystems[$(($fscount - $i - 1))]} + mnt=$path/${filesystems[$i]} + zfs set mountpoint=$mnt $fs + done + + zfs list -r $TESTPOOL + + return 0 +} + +function cleanup_all +{ + export __ZFS_POOL_RESTRICT="$TESTPOOL" + log_must zfs $unmountall + unset __ZFS_POOL_RESTRICT + + for fs in ${filesystems[@]}; do + cleanup_filesystem "$TESTPOOL" "$fs" + done + [[ -d ${TEST_BASE_DIR%%/}/testroot$$ ]] && \ + rm -rf ${TEST_BASE_DIR%%/}/testroot$$ +} + +# +# This function takes a single true/false argument. If true it will verify that +# all file systems are mounted. If false it will verify that they are not +# mounted. +# +function verify_all +{ + if $1; then + logfunc=log_must + else + logfunc=log_mustnot + fi + + for fs in ${filesystems[@]}; do + $logfunc mounted "$TESTPOOL/$fs" + done + + return 0 +} + +log_onexit cleanup_all + +log_must setup_all + +export __ZFS_POOL_RESTRICT="$TESTPOOL" +log_must zfs $unmountall +unset __ZFS_POOL_RESTRICT + +verify_all false + +export __ZFS_POOL_RESTRICT="$TESTPOOL" +log_must zfs $mountall +unset __ZFS_POOL_RESTRICT + +verify_all true + +log_note "Verify that 'zfs $mountcmd' will display " \ + "all ZFS filesystems currently mounted." + +verify_mount_display + +log_pass "'zfs $mountall' succeeds as root, " \ + "and all available ZFS filesystems are mounted."