]> granicus.if.org Git - zfs/commitdiff
Defer new resilvers until the current one ends
authorTom Caputi <tcaputi@datto.com>
Fri, 19 Oct 2018 04:06:18 +0000 (00:06 -0400)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Fri, 19 Oct 2018 04:06:18 +0000 (21:06 -0700)
Currently, if a resilver is triggered for any reason while an
existing one is running, zfs will immediately restart the existing
resilver from the beginning to include the new drive. This causes
problems for system administrators when a drive fails while another
is already resilvering. In this case, the optimal thing to do to
reduce risk of data loss is to wait for the current resilver to end
before immediately replacing the second failed drive, which allows
the system to operate with two incomplete drives for the minimum
amount of time.

This patch introduces the resilver_defer feature that essentially
does this for the admin without forcing them to wait and monitor
the resilver manually. The change requires an on-disk feature
since we must mark drives that are part of a deferred resilver in
the vdev config to ensure that we do not assume they are done
resilvering when an existing resilver completes.

Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: John Kennedy <john.kennedy@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: @mmaybee
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #7732

28 files changed:
cmd/zpool/zpool_main.c
configure.ac
include/sys/fs/zfs.h
include/sys/spa_impl.h
include/sys/vdev.h
include/sys/vdev_impl.h
include/zfeature_common.h
include/zfs_gitrev.h [new file with mode: 0644]
man/man5/zpool-features.5
man/man8/zpool.8
module/zcommon/zfeature_common.c
module/zfs/dsl_scan.c
module/zfs/spa.c
module/zfs/vdev.c
module/zfs/vdev_label.c
tests/runfiles/linux.run
tests/zfs-tests/tests/functional/cli_root/Makefile.am
tests/zfs-tests/tests/functional/cli_root/zpool_get/zpool_get.cfg
tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen.shlib
tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_004_pos.ksh
tests/zfs-tests/tests/functional/cli_root/zpool_reopen/zpool_reopen_005_pos.ksh
tests/zfs-tests/tests/functional/cli_root/zpool_resilver/Makefile.am [new file with mode: 0644]
tests/zfs-tests/tests/functional/cli_root/zpool_resilver/cleanup.ksh [new file with mode: 0755]
tests/zfs-tests/tests/functional/cli_root/zpool_resilver/setup.ksh [new file with mode: 0755]
tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver.cfg [new file with mode: 0644]
tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_bad_args.ksh [new file with mode: 0755]
tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_restart.ksh [new file with mode: 0755]
tests/zfs-tests/tests/functional/cli_root/zpool_scrub/zpool_scrub_offline_device.ksh

index 5af626558aa8c80a6c37c339be6c5c293a04b9bf..4845956e513148fa9646bdc8080c2c81e16607eb 100644 (file)
@@ -97,6 +97,7 @@ static int zpool_do_replace(int, char **);
 static int zpool_do_split(int, char **);
 
 static int zpool_do_scrub(int, char **);
+static int zpool_do_resilver(int, char **);
 
 static int zpool_do_import(int, char **);
 static int zpool_do_export(int, char **);
@@ -149,6 +150,7 @@ typedef enum {
        HELP_REPLACE,
        HELP_REMOVE,
        HELP_SCRUB,
+       HELP_RESILVER,
        HELP_STATUS,
        HELP_UPGRADE,
        HELP_EVENTS,
@@ -276,6 +278,7 @@ static zpool_command_t command_table[] = {
        { "split",      zpool_do_split,         HELP_SPLIT              },
        { NULL },
        { "scrub",      zpool_do_scrub,         HELP_SCRUB              },
+       { "resilver",   zpool_do_resilver,      HELP_RESILVER           },
        { NULL },
        { "import",     zpool_do_import,        HELP_IMPORT             },
        { "export",     zpool_do_export,        HELP_EXPORT             },
@@ -358,6 +361,8 @@ get_usage(zpool_help_t idx)
                return (gettext("\treopen [-n] <pool>\n"));
        case HELP_SCRUB:
                return (gettext("\tscrub [-s | -p] <pool> ...\n"));
+       case HELP_RESILVER:
+               return (gettext("\tresilver <pool> ...\n"));
        case HELP_STATUS:
                return (gettext("\tstatus [-c [script1,script2,...]] [-gLPvxD]"
                    "[-T d|u] [pool] ... \n"
@@ -1874,11 +1879,14 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
        (void) nvlist_lookup_uint64_array(root, ZPOOL_CONFIG_SCAN_STATS,
            (uint64_t **)&ps, &c);
 
-       if (ps != NULL && ps->pss_state == DSS_SCANNING &&
-           vs->vs_scan_processed != 0 && children == 0) {
-               (void) printf(gettext("  (%s)"),
-                   (ps->pss_func == POOL_SCAN_RESILVER) ?
-                   "resilvering" : "repairing");
+       if (ps != NULL && ps->pss_state == DSS_SCANNING && children == 0) {
+               if (vs->vs_scan_processed != 0) {
+                       (void) printf(gettext("  (%s)"),
+                           (ps->pss_func == POOL_SCAN_RESILVER) ?
+                           "resilvering" : "repairing");
+               } else if (vs->vs_resilver_deferred) {
+                       (void) printf(gettext("  (awaiting resilver)"));
+               }
        }
 
        if (cb->vcdl != NULL) {
@@ -6251,7 +6259,7 @@ scrub_callback(zpool_handle_t *zhp, void *data)
         * Ignore faulted pools.
         */
        if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
-               (void) fprintf(stderr, gettext("cannot scrub '%s': pool is "
+               (void) fprintf(stderr, gettext("cannot scan '%s': pool is "
                    "currently unavailable\n"), zpool_get_name(zhp));
                return (1);
        }
@@ -6319,6 +6327,44 @@ zpool_do_scrub(int argc, char **argv)
        return (for_each_pool(argc, argv, B_TRUE, NULL, scrub_callback, &cb));
 }
 
+/*
+ * zpool resilver <pool> ...
+ *
+ *     Restarts any in-progress resilver
+ */
+int
+zpool_do_resilver(int argc, char **argv)
+{
+       int c;
+       scrub_cbdata_t cb;
+
+       cb.cb_type = POOL_SCAN_RESILVER;
+       cb.cb_scrub_cmd = POOL_SCRUB_NORMAL;
+       cb.cb_argc = argc;
+       cb.cb_argv = argv;
+
+       /* check options */
+       while ((c = getopt(argc, argv, "")) != -1) {
+               switch (c) {
+               case '?':
+                       (void) fprintf(stderr, gettext("invalid option '%c'\n"),
+                           optopt);
+                       usage(B_FALSE);
+               }
+       }
+
+       argc -= optind;
+       argv += optind;
+
+       if (argc < 1) {
+               (void) fprintf(stderr, gettext("missing pool name argument\n"));
+               usage(B_FALSE);
+       }
+
+       return (for_each_pool(argc, argv, B_TRUE, NULL, scrub_callback, &cb));
+}
+
+
 /*
  * Print out detailed scrub status.
  */
index 301258e7f7565b6766cc3407378dbdbf973441f4..59c8c5f7f37dc77e869ccec748092c2381fca894 100644 (file)
@@ -252,6 +252,7 @@ AC_CONFIG_FILES([
        tests/zfs-tests/tests/functional/cli_root/zpool_online/Makefile
        tests/zfs-tests/tests/functional/cli_root/zpool_remove/Makefile
        tests/zfs-tests/tests/functional/cli_root/zpool_reopen/Makefile
+       tests/zfs-tests/tests/functional/cli_root/zpool_resilver/Makefile
        tests/zfs-tests/tests/functional/cli_root/zpool_replace/Makefile
        tests/zfs-tests/tests/functional/cli_root/zpool_scrub/Makefile
        tests/zfs-tests/tests/functional/cli_root/zpool_set/Makefile
index 6bbf8434619ce929fe4ea81ff1e40484205d4186..4f0e13dfdc376ff01515081f34ad08f87e777535 100644 (file)
@@ -710,6 +710,7 @@ typedef struct zpool_load_policy {
 #define        ZPOOL_CONFIG_VDEV_TOP_ZAP       "com.delphix:vdev_zap_top"
 #define        ZPOOL_CONFIG_VDEV_LEAF_ZAP      "com.delphix:vdev_zap_leaf"
 #define        ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS  "com.delphix:has_per_vdev_zaps"
+#define        ZPOOL_CONFIG_RESILVER_DEFER     "com.datto:resilver_defer"
 #define        ZPOOL_CONFIG_CACHEFILE          "cachefile"     /* not stored on disk */
 #define        ZPOOL_CONFIG_MMP_STATE          "mmp_state"     /* not stored on disk */
 #define        ZPOOL_CONFIG_MMP_TXG            "mmp_txg"       /* not stored on disk */
@@ -988,6 +989,7 @@ typedef struct vdev_stat {
        uint64_t        vs_scan_processed;      /* scan processed bytes */
        uint64_t        vs_fragmentation;       /* device fragmentation */
        uint64_t        vs_checkpoint_space;    /* checkpoint-consumed space */
+       uint64_t        vs_resilver_deferred;   /* resilver deferred    */
 } vdev_stat_t;
 
 /*
index 9dbdcfcf5284477f15095ed3f9236bc8be714677..404aaa9ee37380594ad07c0a5bc7e63240b90d0e 100644 (file)
@@ -281,6 +281,13 @@ struct spa {
        uint64_t        spa_scan_pass_scrub_spent_paused; /* total paused */
        uint64_t        spa_scan_pass_exam;     /* examined bytes per pass */
        uint64_t        spa_scan_pass_issued;   /* issued bytes per pass */
+
+       /*
+        * We are in the middle of a resilver, and another resilver
+        * is needed once this one completes. This is set iff any
+        * vdev_resilver_deferred is set.
+        */
+       boolean_t       spa_resilver_deferred;
        kmutex_t        spa_async_lock;         /* protect async state */
        kthread_t       *spa_async_thread;      /* thread doing async task */
        int             spa_async_suspended;    /* async tasks suspended */
index b37b60bdd14d20f96f95b2c8068de7f886553b75..2091892b27dae9af49420a4227ad2de4bcbf0444 100644 (file)
@@ -149,6 +149,8 @@ extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg);
 extern void vdev_state_dirty(vdev_t *vd);
 extern void vdev_state_clean(vdev_t *vd);
 
+extern void vdev_set_deferred_resilver(spa_t *spa, vdev_t *vd);
+
 typedef enum vdev_config_flag {
        VDEV_CONFIG_SPARE = 1 << 0,
        VDEV_CONFIG_L2CACHE = 1 << 1,
index d308685478756ed972f3a5baba767e6d785d2fd3..7b07fe6c1ad76eefc20ce0d8df9ef0b2f2037b29 100644 (file)
@@ -335,6 +335,7 @@ struct vdev {
        boolean_t       vdev_isspare;   /* was a hot spare              */
        boolean_t       vdev_isl2cache; /* was a l2cache device         */
        boolean_t       vdev_copy_uberblocks;  /* post expand copy uberblocks */
+       boolean_t       vdev_resilver_deferred;  /* resilver deferred */
        vdev_queue_t    vdev_queue;     /* I/O deadline schedule queue  */
        vdev_cache_t    vdev_cache;     /* physical block cache         */
        spa_aux_vdev_t  *vdev_aux;      /* for l2cache and spares vdevs */
index 3804d7b1abdda8d3e97350681c6b2ba7bde41741..089a7e29d11bbe166671d56b0faabb293aaf6b58 100644 (file)
@@ -65,6 +65,7 @@ typedef enum spa_feature {
        SPA_FEATURE_POOL_CHECKPOINT,
        SPA_FEATURE_SPACEMAP_V2,
        SPA_FEATURE_ALLOCATION_CLASSES,
+       SPA_FEATURE_RESILVER_DEFER,
        SPA_FEATURES
 } spa_feature_t;
 
diff --git a/include/zfs_gitrev.h b/include/zfs_gitrev.h
new file mode 100644 (file)
index 0000000..13a831c
--- /dev/null
@@ -0,0 +1 @@
+#define        ZFS_META_GITREV "unknown"
index 5cc4db45e0ed6c0490972e1edbe6995c5cd0329d..c1f6ee6e4e6d09c907d8ed405a6c4e817fa92803 100644 (file)
@@ -756,6 +756,27 @@ can also be triggered on filesystems via `zfs set version=current <pool/fs>`.
 The upgrade process runs in the background and may take a while to complete
 for the filesystems containing a large number of files.
 
+.RE
+.sp
+.ne 2
+.na
+\fB\fBresilver_defer\fR\fR
+.ad
+.RS 4n
+.TS
+l l .
+GUID   com.datto:resilver_defer
+READ\-ONLY COMPATIBLE  yes
+DEPENDENCIES   none
+.TE
+
+This feature allows zfs to postpone new resilvers if an existing one is already
+in progress. Without this feature, any new resilvers will cause the currently
+running one to be immediately restarted from the beginning.
+
+This feature becomes \fBactive\fR once a resilver has been defered, and returns
+to being \fBenabled\fR when the defered resilver begins.
+
 .RE
 
 .sp
index 821b1e4d6b54c4243d3fc2e85eceac743de4643c..8fe6b494be472a8770a73a23dce933675044ba92 100644 (file)
 .Oo Fl o Ar property Ns = Ns Ar value Oc
 .Ar pool Ar device Op Ar new_device
 .Nm
+.Cm resilver
+.Ar pool Ns ...
+.Nm
 .Cm scrub
 .Op Fl s | Fl p
 .Ar pool Ns ...
@@ -2069,6 +2072,14 @@ again.
 .El
 .It Xo
 .Nm
+.Cm resilver
+.Ar pool Ns ...
+.Xc
+Starts a resilver. If an existing resilver is already running it will be
+restarted from the beginning. Any drives that were scheduled for a deferred
+resilver will be added to the new one.
+.It Xo
+.Nm
 .Cm set
 .Ar property Ns = Ns Ar value
 .Ar pool
index a0ad108c50f1cdc59bb0c41062ede36335bd6f44..40ce01b947483249880deeb5e089aa3b65a24fc9 100644 (file)
@@ -445,6 +445,11 @@ zpool_feature_init(void)
            "Support for separate allocation classes.",
            ZFEATURE_FLAG_READONLY_COMPAT, ZFEATURE_TYPE_BOOLEAN, NULL);
        }
+
+       zfeature_register(SPA_FEATURE_RESILVER_DEFER,
+           "com.datto:resilver_defer", "resilver_defer",
+           "Support for defering new resilvers when one is already running.",
+           ZFEATURE_FLAG_READONLY_COMPAT, ZFEATURE_TYPE_BOOLEAN, NULL);
 }
 
 #if defined(_KERNEL)
index b84c2aa45fd76855a102583dc1ba87761746284c..aff99f275efd9f59f5522002726347aa4c690a42 100644 (file)
@@ -175,6 +175,8 @@ enum ddt_class zfs_scrub_ddt_class_max = DDT_CLASS_DUPLICATE;
 /* max number of blocks to free in a single TXG */
 unsigned long zfs_async_block_max_blocks = 100000;
 
+int zfs_resilver_disable_defer = 0; /* set to disable resilver deferring */
+
 /*
  * We wait a few txgs after importing a pool to begin scanning so that
  * the import / mounting code isn't held up by scrub / resilver IO.
@@ -720,6 +722,11 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
        spa->spa_scrub_reopen = B_FALSE;
        (void) spa_vdev_state_exit(spa, NULL, 0);
 
+       if (func == POOL_SCAN_RESILVER) {
+               dsl_resilver_restart(spa->spa_dsl_pool, 0);
+               return (0);
+       }
+
        if (func == POOL_SCAN_SCRUB && dsl_scan_is_paused_scrub(scn)) {
                /* got scrub start cmd, resume paused scrub */
                int err = dsl_scrub_set_pause_resume(scn->scn_dp,
@@ -736,6 +743,41 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
            dsl_scan_setup_sync, &func, 0, ZFS_SPACE_CHECK_EXTRA_RESERVED));
 }
 
+/*
+ * Sets the resilver defer flag to B_FALSE on all leaf devs under vd. Returns
+ * B_TRUE if we have devices that need to be resilvered and are available to
+ * accept resilver I/Os.
+ */
+static boolean_t
+dsl_scan_clear_deferred(vdev_t *vd, dmu_tx_t *tx)
+{
+       boolean_t resilver_needed = B_FALSE;
+       spa_t *spa = vd->vdev_spa;
+
+       for (int c = 0; c < vd->vdev_children; c++) {
+               resilver_needed |=
+                   dsl_scan_clear_deferred(vd->vdev_child[c], tx);
+       }
+
+       if (vd == spa->spa_root_vdev &&
+           spa_feature_is_active(spa, SPA_FEATURE_RESILVER_DEFER)) {
+               spa_feature_decr(spa, SPA_FEATURE_RESILVER_DEFER, tx);
+               vdev_config_dirty(vd);
+               spa->spa_resilver_deferred = B_FALSE;
+               return (resilver_needed);
+       }
+
+       if (!vdev_is_concrete(vd) || vd->vdev_aux ||
+           !vd->vdev_ops->vdev_op_leaf)
+               return (resilver_needed);
+
+       if (vd->vdev_resilver_deferred)
+               vd->vdev_resilver_deferred = B_FALSE;
+
+       return (!vdev_is_dead(vd) && !vd->vdev_offline &&
+           vdev_resilver_needed(vd, NULL, NULL));
+}
+
 /* ARGSUSED */
 static void
 dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
@@ -835,6 +877,25 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
                 * Let the async thread assess this and handle the detach.
                 */
                spa_async_request(spa, SPA_ASYNC_RESILVER_DONE);
+
+               /*
+                * Clear any deferred_resilver flags in the config.
+                * If there are drives that need resilvering, kick
+                * off an asynchronous request to start resilver.
+                * dsl_scan_clear_deferred() may update the config
+                * before the resilver can restart. In the event of
+                * a crash during this period, the spa loading code
+                * will find the drives that need to be resilvered
+                * when the machine reboots and start the resilver then.
+                */
+               boolean_t resilver_needed =
+                   dsl_scan_clear_deferred(spa->spa_root_vdev, tx);
+               if (resilver_needed) {
+                       spa_history_log_internal(spa,
+                           "starting deferred resilver", tx,
+                           "errors=%llu", spa_get_errlog_size(spa));
+                       spa_async_request(spa, SPA_ASYNC_RESILVER);
+               }
        }
 
        scn->scn_phys.scn_end_time = gethrestime_sec();
@@ -2966,6 +3027,26 @@ dsl_scan_active(dsl_scan_t *scn)
        return (used != 0);
 }
 
+static boolean_t
+dsl_scan_check_deferred(vdev_t *vd)
+{
+       boolean_t need_resilver = B_FALSE;
+
+       for (int c = 0; c < vd->vdev_children; c++) {
+               need_resilver |=
+                   dsl_scan_check_deferred(vd->vdev_child[c]);
+       }
+
+       if (!vdev_is_concrete(vd) || vd->vdev_aux ||
+           !vd->vdev_ops->vdev_op_leaf)
+               return (need_resilver);
+
+       if (!vd->vdev_resilver_deferred)
+               need_resilver = B_TRUE;
+
+       return (need_resilver);
+}
+
 static boolean_t
 dsl_scan_need_resilver(spa_t *spa, const dva_t *dva, size_t psize,
     uint64_t phys_birth)
@@ -3013,6 +3094,13 @@ dsl_scan_need_resilver(spa_t *spa, const dva_t *dva, size_t psize,
        if (!vdev_dtl_need_resilver(vd, DVA_GET_OFFSET(dva), psize))
                return (B_FALSE);
 
+       /*
+        * Check that this top-level vdev has a device under it which
+        * is resilvering and is not deferred.
+        */
+       if (!dsl_scan_check_deferred(vd))
+               return (B_FALSE);
+
        return (B_TRUE);
 }
 
@@ -3173,12 +3261,19 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
        spa_t *spa = dp->dp_spa;
        state_sync_type_t sync_type = SYNC_OPTIONAL;
 
+       if (spa->spa_resilver_deferred &&
+           !spa_feature_is_active(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER))
+               spa_feature_incr(spa, SPA_FEATURE_RESILVER_DEFER, tx);
+
        /*
         * Check for scn_restart_txg before checking spa_load_state, so
         * that we can restart an old-style scan while the pool is being
-        * imported (see dsl_scan_init).
+        * imported (see dsl_scan_init). We also restart scans if there
+        * is a deferred resilver and the user has manually disabled
+        * deferred resilvers via the tunable.
         */
-       if (dsl_scan_restarting(scn, tx)) {
+       if (dsl_scan_restarting(scn, tx) ||
+           (spa->spa_resilver_deferred && zfs_resilver_disable_defer)) {
                pool_scan_func_t func = POOL_SCAN_SCRUB;
                dsl_scan_done(scn, B_FALSE, tx);
                if (vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL))
@@ -4000,4 +4095,8 @@ MODULE_PARM_DESC(zfs_scan_strict_mem_lim,
 module_param(zfs_scan_fill_weight, int, 0644);
 MODULE_PARM_DESC(zfs_scan_fill_weight,
        "Tunable to adjust bias towards more filled segments during scans");
+
+module_param(zfs_resilver_disable_defer, int, 0644);
+MODULE_PARM_DESC(zfs_resilver_disable_defer,
+       "Process all resilvers immediately");
 #endif
index fdce49c40c75d735bb1f92c424ed295bfdfb7142..3785981b7655476b092057bba113d8d3b64c5c1e 100644 (file)
@@ -6059,9 +6059,14 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
        /*
         * Schedule the resilver to restart in the future. We do this to
         * ensure that dmu_sync-ed blocks have been stitched into the
-        * respective datasets.
+        * respective datasets. We do not do this if resilvers have been
+        * deferred.
         */
-       dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg);
+       if (dsl_scan_resilvering(spa_get_dsl(spa)) &&
+           spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER))
+               vdev_set_deferred_resilver(spa, newvd);
+       else
+               dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg);
 
        if (spa->spa_bootfs)
                spa_event_notify(spa, newvd, NULL, ESC_ZFS_BOOTFS_VDEV_ATTACH);
@@ -6933,6 +6938,7 @@ static void
 spa_async_thread(void *arg)
 {
        spa_t *spa = (spa_t *)arg;
+       dsl_pool_t *dp = spa->spa_dsl_pool;
        int tasks;
 
        ASSERT(spa->spa_sync_on);
@@ -7008,8 +7014,10 @@ spa_async_thread(void *arg)
        /*
         * Kick off a resilver.
         */
-       if (tasks & SPA_ASYNC_RESILVER)
-               dsl_resilver_restart(spa->spa_dsl_pool, 0);
+       if (tasks & SPA_ASYNC_RESILVER &&
+           (!dsl_scan_resilvering(dp) ||
+           !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER)))
+               dsl_resilver_restart(dp, 0);
 
        /*
         * Let the world know that we're done.
index 2c95626c4d78a07f134649fb7cd7afc79b3e6c71..1521acc40552568f4f676a6c92cdc232a735e169 100644 (file)
@@ -790,6 +790,9 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
                (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_RESILVER_TXG,
                    &vd->vdev_resilver_txg);
 
+               if (nvlist_exists(nv, ZPOOL_CONFIG_RESILVER_DEFER))
+                       vdev_set_deferred_resilver(spa, vd);
+
                /*
                 * In general, when importing a pool we want to ignore the
                 * persistent fault state, as the diagnosis made on another
@@ -1798,8 +1801,13 @@ vdev_open(vdev_t *vd)
         * since this would just restart the scrub we are already doing.
         */
        if (vd->vdev_ops->vdev_op_leaf && !spa->spa_scrub_reopen &&
-           vdev_resilver_needed(vd, NULL, NULL))
-               spa_async_request(spa, SPA_ASYNC_RESILVER);
+           vdev_resilver_needed(vd, NULL, NULL)) {
+               if (dsl_scan_resilvering(spa->spa_dsl_pool) &&
+                   spa_feature_is_enabled(spa, SPA_FEATURE_RESILVER_DEFER))
+                       vdev_set_deferred_resilver(spa, vd);
+               else
+                       spa_async_request(spa, SPA_ASYNC_RESILVER);
+       }
 
        return (0);
 }
@@ -2488,6 +2496,9 @@ vdev_dtl_should_excise(vdev_t *vd)
        if (vd->vdev_state < VDEV_STATE_DEGRADED)
                return (B_FALSE);
 
+       if (vd->vdev_resilver_deferred)
+               return (B_FALSE);
+
        if (vd->vdev_resilver_txg == 0 ||
            range_tree_is_empty(vd->vdev_dtl[DTL_MISSING]))
                return (B_TRUE);
@@ -3618,8 +3629,14 @@ vdev_clear(spa_t *spa, vdev_t *vd)
                if (vd != rvd && vdev_writeable(vd->vdev_top))
                        vdev_state_dirty(vd->vdev_top);
 
-               if (vd->vdev_aux == NULL && !vdev_is_dead(vd))
-                       spa_async_request(spa, SPA_ASYNC_RESILVER);
+               if (vd->vdev_aux == NULL && !vdev_is_dead(vd)) {
+                       if (dsl_scan_resilvering(spa->spa_dsl_pool) &&
+                           spa_feature_is_enabled(spa,
+                           SPA_FEATURE_RESILVER_DEFER))
+                               vdev_set_deferred_resilver(spa, vd);
+                       else
+                               spa_async_request(spa, SPA_ASYNC_RESILVER);
+               }
 
                spa_event_notify(spa, vd, NULL, ESC_ZFS_VDEV_CLEAR);
        }
@@ -3840,6 +3857,8 @@ vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
                        vs->vs_fragmentation = (vd->vdev_mg != NULL) ?
                            vd->vdev_mg->mg_fragmentation : 0;
                }
+               if (vd->vdev_ops->vdev_op_leaf)
+                       vs->vs_resilver_deferred = vd->vdev_resilver_deferred;
        }
 
        ASSERT(spa_config_held(vd->vdev_spa, SCL_ALL, RW_READER) != 0);
@@ -4578,6 +4597,14 @@ vdev_deadman(vdev_t *vd, char *tag)
        }
 }
 
+void
+vdev_set_deferred_resilver(spa_t *spa, vdev_t *vd)
+{
+       ASSERT(vd->vdev_ops->vdev_op_leaf);
+       vd->vdev_resilver_deferred = B_TRUE;
+       spa->spa_resilver_deferred = B_TRUE;
+}
+
 #if defined(_KERNEL)
 EXPORT_SYMBOL(vdev_fault);
 EXPORT_SYMBOL(vdev_degrade);
index 439ab7438d90be661e1d658973dabf17da57bb69..7e86e3a8b3d93fa0fd3014b66c738493523afc01 100644 (file)
@@ -524,6 +524,12 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
                        fnvlist_add_uint64(nv, ZPOOL_CONFIG_VDEV_TOP_ZAP,
                            vd->vdev_top_zap);
                }
+
+               if (vd->vdev_resilver_deferred) {
+                       ASSERT(vd->vdev_ops->vdev_op_leaf);
+                       ASSERT(spa->spa_resilver_deferred);
+                       fnvlist_add_boolean(nv, ZPOOL_CONFIG_RESILVER_DEFER);
+               }
        }
 
        if (getstats) {
index 613660df95d30046b3f37149e56dbb03f11323c1..4f556acde3b799227b3e463ed781e749ab58c662 100644 (file)
@@ -421,6 +421,10 @@ tags = ['functional', 'cli_root', 'zpool_reopen']
 tests = ['zpool_replace_001_neg', 'replace-o_ashift', 'replace_prop_ashift']
 tags = ['functional', 'cli_root', 'zpool_replace']
 
+[tests/functional/cli_root/zpool_resilver]
+tests = ['zpool_resilver_bad_args', 'zpool_resilver_restart']
+tags = ['functional', 'cli_root', 'zpool_resilver']
+
 [tests/functional/cli_root/zpool_scrub]
 tests = ['zpool_scrub_001_neg', 'zpool_scrub_002_pos', 'zpool_scrub_003_pos',
     'zpool_scrub_004_pos', 'zpool_scrub_005_pos',
index 7a765a16037f9b034623555946f0aab512da55de..13ff889d81966d8da527f3f985ba531e5177b361 100644 (file)
@@ -52,6 +52,7 @@ SUBDIRS = \
        zpool_remove \
        zpool_reopen \
        zpool_replace \
+       zpool_resilver \
        zpool_scrub \
        zpool_set \
        zpool_split \
index 8907533c9e4ba090696e5491a9b485453f0a7c3e..48a32174fa36b1546749ad2902b276c98da94974 100644 (file)
@@ -87,5 +87,6 @@ if is_linux; then
            "feature@encryption"
            "feature@project_quota"
            "feature@allocation_classes"
+           "feature@resilver_defer"
        )
 fi
index 82860deb34488aa854b4712f3dca6c47d257bb31..075ad85e9f9626b080f0f896a02447940e5b7a30 100755 (executable)
@@ -115,3 +115,10 @@ function is_scan_restarted #pool
        zpool history -i $pool | grep -q "scan aborted, restarting"
        return $?
 }
+
+function is_deferred_scan_started #pool
+{
+       typeset pool=$1
+       zpool history -i $pool | grep -q "starting deferred resilver"
+       return $?
+}
index 30c389ce8414e16d32da2394333691fae53704c7..956ceebafbc61efbe1f203d1fd354601d750cdff 100755 (executable)
@@ -29,7 +29,7 @@
 # 4. Execute scrub.
 # 5. "Plug back" disk.
 # 6. Reopen a pool with an -n flag.
-# 7. Check if scrub scan is NOT replaced by resilver.
+# 7. Check if resilver was deferred.
 # 8. Check if trying to put device to offline fails because of no valid
 #    replicas.
 #
@@ -75,11 +75,12 @@ log_must check_state $TESTPOOL "$REMOVED_DISK_ID" "online"
 log_must zinject -c all
 # 7. Check if scrub scan is NOT replaced by resilver.
 log_must wait_for_scrub_end $TESTPOOL $MAXTIMEOUT
-log_mustnot is_scan_restarted $TESTPOOL
+log_must is_deferred_scan_started $TESTPOOL
 
 # 8. Check if trying to put device to offline fails because of no valid
 #    replicas.
-log_mustnot zpool offline $TESTPOOL $DISK2
+log_must wait_for_resilver_end $TESTPOOL $MAXTIMEOUT
+log_must zpool offline $TESTPOOL $DISK2
 
 # clean up
 log_must zpool destroy $TESTPOOL
index 95029a8b6df340ea79859f064f776aadd79d1a23..fc298d01061e4c673d8e90d2da471feb2397532c 100755 (executable)
@@ -72,13 +72,13 @@ log_must zinject -d $REMOVED_DISK_ID -D25:1 $TESTPOOL
 log_must wait_for_resilver_start $TESTPOOL $MAXTIMEOUT
 
 # 6. Reopen a pool again with -n flag.
-zpool reopen -n $TESTPOOL
+log_must zpool reopen -n $TESTPOOL
 
 # 7. Wait until resilvering is finished and check if it was restarted.
 log_must wait_for_resilver_end $TESTPOOL $MAXTIMEOUT
 # remove delay from disk
 log_must zinject -c all
-log_must is_scan_restarted $TESTPOOL
+log_mustnot is_scan_restarted $TESTPOOL
 
 # clean up
 log_must zpool destroy $TESTPOOL
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/Makefile.am
new file mode 100644 (file)
index 0000000..2cec533
--- /dev/null
@@ -0,0 +1,9 @@
+pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zpool_resilver
+dist_pkgdata_SCRIPTS = \
+       setup.ksh \
+       cleanup.ksh \
+       zpool_resilver_bad_args.ksh \
+       zpool_resilver_restart.ksh
+
+dist_pkgdata_DATA = \
+       zpool_resilver.cfg
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/cleanup.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/cleanup.ksh
new file mode 100755 (executable)
index 0000000..c74e239
--- /dev/null
@@ -0,0 +1,33 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_scrub/zpool_scrub.cfg
+
+verify_runnable "global"
+
+destroy_mirrors
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/setup.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/setup.ksh
new file mode 100755 (executable)
index 0000000..48ceecd
--- /dev/null
@@ -0,0 +1,39 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2018 by Datto. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_resilver/zpool_resilver.cfg
+
+verify_runnable "global"
+verify_disk_count "$DISKS" 3
+
+default_mirror_setup_noexit $DISK1 $DISK2 $DISK3
+
+mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS)
+
+# Create 256M of data
+log_must file_write -b 1048576 -c 256 -o create -d 0 -f $mntpnt/bigfile
+log_pass
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver.cfg b/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver.cfg
new file mode 100644 (file)
index 0000000..7d92984
--- /dev/null
@@ -0,0 +1,33 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2018 by Datto. All rights reserved.
+#
+
+export DISK1=$(echo $DISKS | nawk '{print $1}')
+export DISK2=$(echo $DISKS | nawk '{print $2}')
+export DISK3=$(echo $DISKS | nawk '{print $3}')
+
+export ZFS_SCAN_VDEV_LIMIT_SLOW=$((128*1024))
+export ZFS_SCAN_VDEV_LIMIT_DEFAULT=$((4*1024*1024))
+
+export MAXTIMEOUT=80
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_bad_args.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_bad_args.ksh
new file mode 100755 (executable)
index 0000000..9d973be
--- /dev/null
@@ -0,0 +1,58 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2018 by Datto. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# A badly formed parameter passed to 'zpool resilver' should
+# return an error.
+#
+# STRATEGY:
+# 1. Create an array containing bad 'zpool reilver' parameters.
+# 2. For each element, execute the sub-command.
+# 3. Verify it returns an error.
+#
+
+verify_runnable "global"
+
+set -A args "" "-?" "blah blah" "-%" "--?" "-*" "-=" \
+    "-a" "-b" "-c" "-d" "-e" "-f" "-g" "-h" "-i" "-j" "-k" "-l" \
+    "-m" "-n" "-o" "-p" "-q" "-r" "-s" "-t" "-u" "-v" "-w" "-x" "-y" "-z" \
+    "-A" "-B" "-C" "-D" "-E" "-F" "-G" "-H" "-I" "-J" "-K" "-L" \
+    "-M" "-N" "-O" "-P" "-Q" "-R" "-S" "-T" "-U" "-V" "-W" "-X" "-W" "-Z"
+
+
+log_assert "Execute 'zpool resilver' using invalid parameters."
+
+typeset -i i=0
+while [[ $i -lt ${#args[*]} ]]; do
+       log_mustnot zpool resilver ${args[i]}
+
+       ((i = i + 1))
+done
+
+log_pass "Badly formed 'zpool resilver' parameters fail as expected."
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_restart.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_restart.ksh
new file mode 100755 (executable)
index 0000000..de9e5ec
--- /dev/null
@@ -0,0 +1,95 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2018 Datto Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_reopen/zpool_reopen.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_resilver/zpool_resilver.cfg
+
+#
+# DESCRIPTION:
+#      "Verify 'zpool resilver' restarts in-progress resilvers"
+#
+# STRATEGY:
+#      1. Write some data and detatch the first drive so it has resilver
+#         work to do
+#      2. Repeat the process with a second disk
+#      3. Reattach the drives, causing the second drive's resilver to be
+#         deferred
+#      4. Manually restart the resilver with all drives
+#
+# NOTES:
+#      Artificially limit the scrub speed by setting the zfs_scan_vdev_limit
+#      low and adding a 50ms zio delay in order to ensure that the resilver
+#      does not complete early.
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+       log_must zinject -c all
+       log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_DEFAULT
+       log_must rm -f $mntpnt/biggerfile1
+       log_must rm -f $mntpnt/biggerfile2
+}
+
+log_onexit cleanup
+
+log_assert "Verify 'zpool resilver' restarts in-progress resilvers"
+
+mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS)
+
+# 1. Write some data and detatch the first drive so it has resilver work to do
+log_must file_write -b 524288 -c 1024 -o create -d 0 -f $mntpnt/biggerfile1
+log_must sync
+log_must zpool detach $TESTPOOL $DISK2
+
+# 2. Repeat the process with a second disk
+log_must file_write -b 524288 -c 1024 -o create -d 0 -f $mntpnt/biggerfile2
+log_must sync
+log_must zpool detach $TESTPOOL $DISK3
+
+# 3. Reattach the drives, causing the second drive's resilver to be deferred
+log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_SLOW
+
+log_must zpool attach $TESTPOOL $DISK1 $DISK2
+log_must zinject -d $DISK2 -D50:1 $TESTPOOL
+log_must is_pool_resilvering $TESTPOOL true
+
+log_must zpool attach $TESTPOOL $DISK1 $DISK3
+log_must zinject -d $DISK3 -D50:1 $TESTPOOL
+log_must is_pool_resilvering $TESTPOOL true
+
+# 4. Manually restart the resilver with all drives
+log_must zpool resilver $TESTPOOL
+log_must zinject -c all
+log_must set_tunable64 zfs_scan_vdev_limit $ZFS_SCAN_VDEV_LIMIT_DEFAULT
+log_must wait_for_resilver_end $TESTPOOL $MAXTIMEOUT
+log_must is_deferred_scan_started $TESTPOOL
+log_must check_state $TESTPOOL "$DISK2" "online"
+log_must check_state $TESTPOOL "$DISK3" "online"
+
+log_pass "Verified 'zpool resilver' restarts in-progress resilvers"
index 3bc798d1a9f8e777aee2e6758200853a36b5e706..fdf315dea5ed32fda06045f25f017fc031aa78a5 100755 (executable)
@@ -25,6 +25,7 @@
 #
 
 . $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_reopen/zpool_reopen.shlib
 . $STF_SUITE/tests/functional/cli_root/zpool_scrub/zpool_scrub.cfg
 
 #
@@ -95,6 +96,7 @@ DISK1="$TEST_BASE_DIR/zpool_disk1.dat"
 DISK2="$TEST_BASE_DIR/zpool_disk2.dat"
 DISK3="$TEST_BASE_DIR/zpool_disk3.dat"
 DISK4="$TEST_BASE_DIR/zpool_disk4.dat"
+RESILVER_TIMEOUT=40
 
 # 1. Create the pool
 log_must truncate -s $DEVSIZE $DISK1
@@ -117,6 +119,7 @@ zpool_scrub_sync $TESTPOOL
 # 5. Online the first device and offline the second device
 zpool_do_sync 'online' $TESTPOOL $DISK1
 zpool_do_sync 'offline' $TESTPOOL $DISK2
+log_must wait_for_resilver_end $TESTPOOL $RESILVER_TIMEOUT
 
 # 6. Scrub the pool again
 zpool_scrub_sync $TESTPOOL