OpenZFS 9102 - zfs should be able to initialize storage devices

author George Wilson <george.wilson@delphix.com>

Wed, 19 Dec 2018 14:54:59 +0000 (07:54 -0700)

committer Brian Behlendorf <behlendorf1@llnl.gov>

Mon, 7 Jan 2019 18:37:26 +0000 (10:37 -0800)
author George Wilson <george.wilson@delphix.com>
Wed, 19 Dec 2018 14:54:59 +0000 (07:54 -0700)
committer Brian Behlendorf <behlendorf1@llnl.gov>
Mon, 7 Jan 2019 18:37:26 +0000 (10:37 -0800)
diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c

index 8a4ac35d55e495f4ea190db8e11630b7f35bb308..4e6a699c4f2b4f6e54ccc5906b56de9c03af09d4 100644 (file)
--- a/cmd/zpool/zpool_main.c
+++ b/cmd/zpool/zpool_main.c
@@ -97,6 +97,7 @@ static int zpool_do_detach(int, char **);
  static int zpool_do_replace(int, char **);
  static int zpool_do_split(int, char **);
  
+static int zpool_do_initialize(int, char **);
  static int zpool_do_scrub(int, char **);
  static int zpool_do_resilver(int, char **);
  
@@ -150,6 +151,7 @@ typedef enum {
         HELP_ONLINE,
         HELP_REPLACE,
         HELP_REMOVE,
+       HELP_INITIALIZE,
         HELP_SCRUB,
         HELP_RESILVER,
         HELP_STATUS,
@@ -278,6 +280,7 @@ static zpool_command_t command_table[] = {
         { "replace",    zpool_do_replace,       HELP_REPLACE            },
         { "split",      zpool_do_split,         HELP_SPLIT              },
         { NULL },
+       { "initialize", zpool_do_initialize,    HELP_INITIALIZE         },
         { "scrub",      zpool_do_scrub,         HELP_SCRUB              },
         { "resilver",   zpool_do_resilver,      HELP_RESILVER           },
         { NULL },
@@ -360,6 +363,8 @@ get_usage(zpool_help_t idx)
                 return (gettext("\tremove [-nps] <pool> <device> ...\n"));
         case HELP_REOPEN:
                 return (gettext("\treopen [-n] <pool>\n"));
+       case HELP_INITIALIZE:
+               return (gettext("\tinitialize [-cs] <pool> [<device> ...]\n"));
         case HELP_SCRUB:
                 return (gettext("\tscrub [-s | -p] <pool> ...\n"));
         case HELP_RESILVER:
@@ -393,6 +398,27 @@ get_usage(zpool_help_t idx)
         /* NOTREACHED */
  }
  
+static void
+zpool_collect_leaves(zpool_handle_t *zhp, nvlist_t *nvroot, nvlist_t *res)
+{
+       uint_t children = 0;
+       nvlist_t **child;
+       uint_t i;
+
+       (void) nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
+           &child, &children);
+
+       if (children == 0) {
+               char *path = zpool_vdev_name(g_zfs, zhp, nvroot, B_FALSE);
+               fnvlist_add_boolean(res, path);
+               free(path);
+               return;
+       }
+
+       for (i = 0; i < children; i++) {
+               zpool_collect_leaves(zhp, child[i], res);
+       }
+}
  
  /*
   * Callback routine that will print out a pool property value.
@@ -479,6 +505,97 @@ usage(boolean_t requested)
         exit(requested ? 0 : 2);
  }
  
+/*
+ * zpool initialize [-cs] <pool> [<vdev> ...]
+ * Initialize all unused blocks in the specified vdevs, or all vdevs in the pool
+ * if none specified.
+ *
+ *     -c      Cancel. Ends active initializing.
+ *     -s      Suspend. Initializing can then be restarted with no flags.
+ */
+int
+zpool_do_initialize(int argc, char **argv)
+{
+       int c;
+       char *poolname;
+       zpool_handle_t *zhp;
+       nvlist_t *vdevs;
+       int err = 0;
+
+       struct option long_options[] = {
+               {"cancel",      no_argument,            NULL, 'c'},
+               {"suspend",     no_argument,            NULL, 's'},
+               {0, 0, 0, 0}
+       };
+
+       pool_initialize_func_t cmd_type = POOL_INITIALIZE_DO;
+       while ((c = getopt_long(argc, argv, "cs", long_options, NULL)) != -1) {
+               switch (c) {
+               case 'c':
+                       if (cmd_type != POOL_INITIALIZE_DO) {
+                               (void) fprintf(stderr, gettext("-c cannot be "
+                                   "combined with other options\n"));
+                               usage(B_FALSE);
+                       }
+                       cmd_type = POOL_INITIALIZE_CANCEL;
+                       break;
+               case 's':
+                       if (cmd_type != POOL_INITIALIZE_DO) {
+                               (void) fprintf(stderr, gettext("-s cannot be "
+                                   "combined with other options\n"));
+                               usage(B_FALSE);
+                       }
+                       cmd_type = POOL_INITIALIZE_SUSPEND;
+                       break;
+               case '?':
+                       if (optopt != 0) {
+                               (void) fprintf(stderr,
+                                   gettext("invalid option '%c'\n"), optopt);
+                       } else {
+                               (void) fprintf(stderr,
+                                   gettext("invalid option '%s'\n"),
+                                   argv[optind - 1]);
+                       }
+                       usage(B_FALSE);
+               }
+       }
+
+       argc -= optind;
+       argv += optind;
+
+       if (argc < 1) {
+               (void) fprintf(stderr, gettext("missing pool name argument\n"));
+               usage(B_FALSE);
+               return (-1);
+       }
+
+       poolname = argv[0];
+       zhp = zpool_open(g_zfs, poolname);
+       if (zhp == NULL)
+               return (-1);
+
+       vdevs = fnvlist_alloc();
+       if (argc == 1) {
+               /* no individual leaf vdevs specified, so add them all */
+               nvlist_t *config = zpool_get_config(zhp, NULL);
+               nvlist_t *nvroot = fnvlist_lookup_nvlist(config,
+                   ZPOOL_CONFIG_VDEV_TREE);
+               zpool_collect_leaves(zhp, nvroot, vdevs);
+       } else {
+               int i;
+               for (i = 1; i < argc; i++) {
+                       fnvlist_add_boolean(vdevs, argv[i]);
+               }
+       }
+
+       err = zpool_initialize(zhp, cmd_type, vdevs);
+
+       fnvlist_free(vdevs);
+       zpool_close(zhp);
+
+       return (err);
+}
+
  /*
   * print a pool vdev config for dry runs
   */
@@ -1923,6 +2040,43 @@ print_status_config(zpool_handle_t *zhp, status_cbdata_t *cb, const char *name,
                 }
         }
  
+       if ((vs->vs_initialize_state == VDEV_INITIALIZE_ACTIVE ||
+           vs->vs_initialize_state == VDEV_INITIALIZE_SUSPENDED ||
+           vs->vs_initialize_state == VDEV_INITIALIZE_COMPLETE) &&
+           !vs->vs_scan_removing) {
+               char zbuf[1024];
+               char tbuf[256];
+               struct tm zaction_ts;
+
+               time_t t = vs->vs_initialize_action_time;
+               int initialize_pct = 100;
+               if (vs->vs_initialize_state != VDEV_INITIALIZE_COMPLETE) {
+                       initialize_pct = (vs->vs_initialize_bytes_done * 100 /
+                           (vs->vs_initialize_bytes_est + 1));
+               }
+
+               (void) localtime_r(&t, &zaction_ts);
+               (void) strftime(tbuf, sizeof (tbuf), "%c", &zaction_ts);
+
+               switch (vs->vs_initialize_state) {
+               case VDEV_INITIALIZE_SUSPENDED:
+                       (void) snprintf(zbuf, sizeof (zbuf),
+                           ", suspended, started at %s", tbuf);
+                       break;
+               case VDEV_INITIALIZE_ACTIVE:
+                       (void) snprintf(zbuf, sizeof (zbuf),
+                           ", started at %s", tbuf);
+                       break;
+               case VDEV_INITIALIZE_COMPLETE:
+                       (void) snprintf(zbuf, sizeof (zbuf),
+                           ", completed at %s", tbuf);
+                       break;
+               }
+
+               (void) printf(gettext("  (%d%% initialized%s)"),
+                   initialize_pct, zbuf);
+       }
+
         (void) printf("\n");
  
         for (c = 0; c < children; c++) {
diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c

index f9ba9b6d0e2664df9ee653122141d5c2c0fe77d8..385984f84dc81ddc77e6aacdc3953b9151b856a8 100644 (file)
--- a/cmd/ztest/ztest.c
+++ b/cmd/ztest/ztest.c
@@ -106,6 +106,7 @@
  #include <sys/zil_impl.h>
  #include <sys/vdev_impl.h>
  #include <sys/vdev_file.h>
+#include <sys/vdev_initialize.h>
  #include <sys/spa_impl.h>
  #include <sys/metaslab_impl.h>
  #include <sys/dsl_prop.h>
@@ -374,6 +375,7 @@ ztest_func_t ztest_spa_upgrade;
  ztest_func_t ztest_device_removal;
  ztest_func_t ztest_remap_blocks;
  ztest_func_t ztest_spa_checkpoint_create_discard;
+ztest_func_t ztest_initialize;
  ztest_func_t ztest_fletcher;
  ztest_func_t ztest_fletcher_incr;
  ztest_func_t ztest_verify_dnode_bt;
@@ -427,6 +429,7 @@ ztest_info_t ztest_info[] = {
         ZTI_INIT(ztest_device_removal, 1, &zopt_sometimes),
         ZTI_INIT(ztest_remap_blocks, 1, &zopt_sometimes),
         ZTI_INIT(ztest_spa_checkpoint_create_discard, 1, &zopt_rarely),
+       ZTI_INIT(ztest_initialize, 1, &zopt_sometimes),
         ZTI_INIT(ztest_fletcher, 1, &zopt_rarely),
         ZTI_INIT(ztest_fletcher_incr, 1, &zopt_rarely),
         ZTI_INIT(ztest_verify_dnode_bt, 1, &zopt_sometimes),
@@ -6343,6 +6346,97 @@ ztest_get_zdb_bin(char *bin, int len)
         strcpy(bin, "zdb");
  }
  
+static vdev_t *
+ztest_random_concrete_vdev_leaf(vdev_t *vd)
+{
+       if (vd == NULL)
+               return (NULL);
+
+       if (vd->vdev_children == 0)
+               return (vd);
+
+       vdev_t *eligible[vd->vdev_children];
+       int eligible_idx = 0, i;
+       for (i = 0; i < vd->vdev_children; i++) {
+               vdev_t *cvd = vd->vdev_child[i];
+               if (cvd->vdev_top->vdev_removing)
+                       continue;
+               if (cvd->vdev_children > 0 ||
+                   (vdev_is_concrete(cvd) && !cvd->vdev_detached)) {
+                       eligible[eligible_idx++] = cvd;
+               }
+       }
+       VERIFY(eligible_idx > 0);
+
+       uint64_t child_no = ztest_random(eligible_idx);
+       return (ztest_random_concrete_vdev_leaf(eligible[child_no]));
+}
+
+/* ARGSUSED */
+void
+ztest_initialize(ztest_ds_t *zd, uint64_t id)
+{
+       spa_t *spa = ztest_spa;
+       int error = 0;
+
+       mutex_enter(&ztest_vdev_lock);
+
+       spa_config_enter(spa, SCL_VDEV, FTAG, RW_READER);
+
+       /* Random leaf vdev */
+       vdev_t *rand_vd = ztest_random_concrete_vdev_leaf(spa->spa_root_vdev);
+       if (rand_vd == NULL) {
+               spa_config_exit(spa, SCL_VDEV, FTAG);
+               mutex_exit(&ztest_vdev_lock);
+               return;
+       }
+
+       /*
+        * The random vdev we've selected may change as soon as we
+        * drop the spa_config_lock. We create local copies of things
+        * we're interested in.
+        */
+       uint64_t guid = rand_vd->vdev_guid;
+       char *path = strdup(rand_vd->vdev_path);
+       boolean_t active = rand_vd->vdev_initialize_thread != NULL;
+
+       zfs_dbgmsg("vd %p, guid %llu", rand_vd, guid);
+       spa_config_exit(spa, SCL_VDEV, FTAG);
+
+       uint64_t cmd = ztest_random(POOL_INITIALIZE_FUNCS);
+       error = spa_vdev_initialize(spa, guid, cmd);
+       switch (cmd) {
+       case POOL_INITIALIZE_CANCEL:
+               if (ztest_opts.zo_verbose >= 4) {
+                       (void) printf("Cancel initialize %s", path);
+                       if (!active)
+                               (void) printf(" failed (no initialize active)");
+                       (void) printf("\n");
+               }
+               break;
+       case POOL_INITIALIZE_DO:
+               if (ztest_opts.zo_verbose >= 4) {
+                       (void) printf("Start initialize %s", path);
+                       if (active && error == 0)
+                               (void) printf(" failed (already active)");
+                       else if (error != 0)
+                               (void) printf(" failed (error %d)", error);
+                       (void) printf("\n");
+               }
+               break;
+       case POOL_INITIALIZE_SUSPEND:
+               if (ztest_opts.zo_verbose >= 4) {
+                       (void) printf("Suspend initialize %s", path);
+                       if (!active)
+                               (void) printf(" failed (no initialize active)");
+                       (void) printf("\n");
+               }
+               break;
+       }
+       free(path);
+       mutex_exit(&ztest_vdev_lock);
+}
+
  /*
   * Verify pool integrity by running zdb.
   */
diff --git a/configure.ac b/configure.ac

index 028cae338750df2a452a9116cbbf78f076adadc6..be3e556c183f478a41bf892f01912a57d094eb11 100644 (file)
--- a/configure.ac
+++ b/configure.ac
@@ -247,6 +247,7 @@ AC_CONFIG_FILES([
         tests/zfs-tests/tests/functional/cli_root/zpool_history/Makefile
         tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile
         tests/zfs-tests/tests/functional/cli_root/zpool_import/blockfiles/Makefile
+       tests/zfs-tests/tests/functional/cli_root/zpool_initialize/Makefile
         tests/zfs-tests/tests/functional/cli_root/zpool_labelclear/Makefile
         tests/zfs-tests/tests/functional/cli_root/zpool/Makefile
         tests/zfs-tests/tests/functional/cli_root/zpool_offline/Makefile
diff --git a/include/libzfs.h b/include/libzfs.h

index c764e92dd50a00ac964c3640e64f56c4a066673f..85b0bc0ddb771303c62a4814dee6d3ad012649b4 100644 (file)
--- a/include/libzfs.h
+++ b/include/libzfs.h
@@ -139,6 +139,9 @@ typedef enum zfs_error {
         EZFS_DEVRM_IN_PROGRESS, /* a device is currently being removed */
         EZFS_VDEV_TOO_BIG,      /* a device is too big to be used */
         EZFS_IOC_NOTSUPPORTED,  /* operation not supported by zfs module */
+       EZFS_TOOMANY,           /* argument list too long */
+       EZFS_INITIALIZING,      /* currently initializing */
+       EZFS_NO_INITIALIZE,     /* no active initialize */
         EZFS_UNKNOWN
  } zfs_error_t;
  
@@ -253,6 +256,8 @@ typedef struct splitflags {
   * Functions to manipulate pool and vdev state
   */
  extern int zpool_scan(zpool_handle_t *, pool_scan_func_t, pool_scrub_cmd_t);
+extern int zpool_initialize(zpool_handle_t *, pool_initialize_func_t,
+    nvlist_t *);
  extern int zpool_clear(zpool_handle_t *, const char *, nvlist_t *);
  extern int zpool_reguid(zpool_handle_t *);
  extern int zpool_reopen_one(zpool_handle_t *, void *);
diff --git a/include/libzfs_core.h b/include/libzfs_core.h

index f84270d7eb440df4c1e3ad4676f7f384f31a2a72..264ce3fa02b0cbfc7bd2db91e4057098adcdb7a8 100644 (file)
--- a/include/libzfs_core.h
+++ b/include/libzfs_core.h
@@ -62,6 +62,8 @@ int lzc_destroy_bookmarks(nvlist_t *, nvlist_t **);
  int lzc_load_key(const char *, boolean_t, uint8_t *, uint_t);
  int lzc_unload_key(const char *);
  int lzc_change_key(const char *, uint64_t, nvlist_t *, uint8_t *, uint_t);
+int lzc_initialize(const char *, pool_initialize_func_t, nvlist_t *,
+    nvlist_t **);
  
  int lzc_snaprange_space(const char *, const char *, uint64_t *);
  
diff --git a/include/sys/Makefile.am b/include/sys/Makefile.am

index 8bf376998bf612b4b5bb6d3dcb007137ed61de84..e6c82d113ccfa312ffc4e4c298d96c6ed911148a 100644 (file)
--- a/include/sys/Makefile.am
+++ b/include/sys/Makefile.am
@@ -96,6 +96,7 @@ COMMON_H = \
         $(top_srcdir)/include/sys/vdev_impl.h \
         $(top_srcdir)/include/sys/vdev_indirect_births.h \
         $(top_srcdir)/include/sys/vdev_indirect_mapping.h \
+       $(top_srcdir)/include/sys/vdev_initialize.h \
         $(top_srcdir)/include/sys/vdev_raidz.h \
         $(top_srcdir)/include/sys/vdev_raidz_impl.h \
         $(top_srcdir)/include/sys/vdev_removal.h \
diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h

index 85512618cf6bffd96f8db8e9f115c257dfecf3e1..945853739b7cb70954f36bd1cc5657a2dcacaf70 100644 (file)
--- a/include/sys/fs/zfs.h
+++ b/include/sys/fs/zfs.h
@@ -775,6 +775,13 @@ typedef struct zpool_load_policy {
  #define        VDEV_ALLOC_BIAS_SPECIAL         "special"
  #define        VDEV_ALLOC_BIAS_DEDUP           "dedup"
  
+#define        VDEV_LEAF_ZAP_INITIALIZE_LAST_OFFSET    \
+       "com.delphix:next_offset_to_initialize"
+#define        VDEV_LEAF_ZAP_INITIALIZE_STATE  \
+       "com.delphix:vdev_initialize_state"
+#define        VDEV_LEAF_ZAP_INITIALIZE_ACTION_TIME    \
+       "com.delphix:vdev_initialize_action_time"
+
  /*
   * This is needed in userland to report the minimum necessary device size.
   */
@@ -988,10 +995,15 @@ typedef struct vdev_stat {
         uint64_t        vs_read_errors;         /* read errors          */
         uint64_t        vs_write_errors;        /* write errors         */
         uint64_t        vs_checksum_errors;     /* checksum errors      */
+       uint64_t        vs_initialize_errors;   /* initializing errors  */
         uint64_t        vs_self_healed;         /* self-healed bytes    */
         uint64_t        vs_scan_removing;       /* removing?    */
         uint64_t        vs_scan_processed;      /* scan processed bytes */
         uint64_t        vs_fragmentation;       /* device fragmentation */
+       uint64_t        vs_initialize_bytes_done; /* bytes initialized */
+       uint64_t        vs_initialize_bytes_est; /* total bytes to initialize */
+       uint64_t        vs_initialize_state;    /* vdev_initialzing_state_t */
+       uint64_t        vs_initialize_action_time; /* time_t */
         uint64_t        vs_checkpoint_space;    /* checkpoint-consumed space */
         uint64_t        vs_resilver_deferred;   /* resilver deferred    */
         uint64_t        vs_slow_ios;            /* slow IOs */
@@ -1023,7 +1035,6 @@ typedef struct vdev_stat_ex {
  #define        VDEV_L_HISTO_BUCKETS 37         /* Latency histo buckets */
  #define        VDEV_RQ_HISTO_BUCKETS 25        /* Request size histo buckets */
  
-
         /* Amount of time in ZIO queue (ns) */
         uint64_t vsx_queue_histo[ZIO_PRIORITY_NUM_QUEUEABLE]
             [VDEV_L_HISTO_BUCKETS];
@@ -1050,6 +1061,16 @@ typedef struct vdev_stat_ex {
  
  } vdev_stat_ex_t;
  
+/*
+ * Initialize functions.
+ */
+typedef enum pool_initialize_func {
+       POOL_INITIALIZE_DO,
+       POOL_INITIALIZE_CANCEL,
+       POOL_INITIALIZE_SUSPEND,
+       POOL_INITIALIZE_FUNCS
+} pool_initialize_func_t;
+
  /*
   * DDT statistics.  Note: all fields should be 64-bit because this
   * is passed between kernel and userland as an nvlist uint64 array.
@@ -1094,6 +1115,14 @@ typedef struct ddt_histogram {
  #define        ZVOL_PROP_NAME          "name"
  #define        ZVOL_DEFAULT_BLOCKSIZE  8192
  
+typedef enum {
+       VDEV_INITIALIZE_NONE,
+       VDEV_INITIALIZE_ACTIVE,
+       VDEV_INITIALIZE_CANCELED,
+       VDEV_INITIALIZE_SUSPENDED,
+       VDEV_INITIALIZE_COMPLETE
+} vdev_initializing_state_t;
+
  /*
   * /dev/zfs ioctl numbers.
   *
@@ -1184,6 +1213,7 @@ typedef enum zfs_ioc {
         ZFS_IOC_REMAP,                          /* 0x5a4c */
         ZFS_IOC_POOL_CHECKPOINT,                /* 0x5a4d */
         ZFS_IOC_POOL_DISCARD_CHECKPOINT,        /* 0x5a4e */
+       ZFS_IOC_POOL_INITIALIZE,                /* 0x5a4f */
  
         /*
          * Linux - 3/64 numbers reserved.
@@ -1277,6 +1307,12 @@ typedef enum {
   */
  #define        ZPOOL_HIDDEN_ARGS       "hidden_args"
  
+/*
+ * The following are names used when invoking ZFS_IOC_POOL_INITIALIZE.
+ */
+#define        ZPOOL_INITIALIZE_COMMAND        "initialize_command"
+#define        ZPOOL_INITIALIZE_VDEVS          "initialize_vdevs"
+
  /*
   * Flags for ZFS_IOC_VDEV_SET_STATE
   */
diff --git a/include/sys/metaslab_impl.h b/include/sys/metaslab_impl.h

index aa1c82a0258e32905b8ebae40f4ce1e6dc6971ee..3e32eace6d6ad8e261730076e74fe005a74578f5 100644 (file)
--- a/include/sys/metaslab_impl.h
+++ b/include/sys/metaslab_impl.h
@@ -68,7 +68,8 @@ typedef enum trace_alloc_type {
         TRACE_GROUP_FAILURE     = -5ULL,
         TRACE_ENOSPC            = -6ULL,
         TRACE_CONDENSING        = -7ULL,
-       TRACE_VDEV_ERROR        = -8ULL
+       TRACE_VDEV_ERROR        = -8ULL,
+       TRACE_INITIALIZING      = -9ULL
  } trace_alloc_type_t;
  
  #define        METASLAB_WEIGHT_PRIMARY         (1ULL << 63)
@@ -270,6 +271,11 @@ struct metaslab_group {
         uint64_t                mg_failed_allocations;
         uint64_t                mg_fragmentation;
         uint64_t                mg_histogram[RANGE_TREE_HISTOGRAM_SIZE];
+
+       int                     mg_ms_initializing;
+       boolean_t               mg_initialize_updating;
+       kmutex_t                mg_ms_initialize_lock;
+       kcondvar_t              mg_ms_initialize_cv;
  };
  
  /*
@@ -360,6 +366,8 @@ struct metaslab {
         boolean_t       ms_condense_wanted;
         uint64_t        ms_condense_checked_txg;
  
+       uint64_t        ms_initializing; /* leaves initializing this ms */
+
         /*
          * We must hold both ms_lock and ms_group->mg_lock in order to
          * modify ms_loaded.
diff --git a/include/sys/spa.h b/include/sys/spa.h

index 5dc27e3349be8473927a912cd8b3f0038d1dd6a0..4a66260ef902e2748470dbc976dbed0c9295cfa3 100644 (file)
--- a/include/sys/spa.h
+++ b/include/sys/spa.h
@@ -772,6 +772,7 @@ extern int spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps);
  #define        SPA_ASYNC_AUTOEXPAND    0x20
  #define        SPA_ASYNC_REMOVE_DONE   0x40
  #define        SPA_ASYNC_REMOVE_STOP   0x80
+#define        SPA_ASYNC_INITIALIZE_RESTART    0x100
  
  /*
   * Controls the behavior of spa_vdev_remove().
@@ -787,6 +788,7 @@ extern int spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid,
      int replace_done);
  extern int spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare);
  extern boolean_t spa_vdev_remove_active(spa_t *spa);
+extern int spa_vdev_initialize(spa_t *spa, uint64_t guid, uint64_t cmd_type);
  extern int spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath);
  extern int spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru);
  extern int spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h

index 6c13a548fb85c787cb7bc862e0fc15ce3483bf41..ae21e037ed3fd20e980c93b2217ffe440f537a08 100644 (file)
--- a/include/sys/vdev_impl.h
+++ b/include/sys/vdev_impl.h
@@ -82,6 +82,12 @@ typedef void vdev_remap_cb_t(uint64_t inner_offset, vdev_t *vd,
      uint64_t offset, uint64_t size, void *arg);
  typedef void   vdev_remap_func_t(vdev_t *vd, uint64_t offset, uint64_t size,
      vdev_remap_cb_t callback, void *arg);
+/*
+ * Given a target vdev, translates the logical range "in" to the physical
+ * range "res"
+ */
+typedef void vdev_xlation_func_t(vdev_t *cvd, const range_seg_t *in,
+    range_seg_t *res);
  
  typedef const struct vdev_ops {
         vdev_open_func_t                *vdev_op_open;
@@ -94,6 +100,11 @@ typedef const struct vdev_ops {
         vdev_hold_func_t                *vdev_op_hold;
         vdev_rele_func_t                *vdev_op_rele;
         vdev_remap_func_t               *vdev_op_remap;
+       /*
+        * For translating ranges from non-leaf vdevs (e.g. raidz) to leaves.
+        * Used when initializing vdevs. Isn't used by leaf ops.
+        */
+       vdev_xlation_func_t             *vdev_op_xlate;
         char                            vdev_op_type[16];
         boolean_t                       vdev_op_leaf;
  } vdev_ops_t;
@@ -250,6 +261,24 @@ struct vdev {
         /* pool checkpoint related */
         space_map_t     *vdev_checkpoint_sm;    /* contains reserved blocks */
  
+       boolean_t       vdev_initialize_exit_wanted;
+       vdev_initializing_state_t       vdev_initialize_state;
+       kthread_t       *vdev_initialize_thread;
+       /* Protects vdev_initialize_thread and vdev_initialize_state. */
+       kmutex_t        vdev_initialize_lock;
+       kcondvar_t      vdev_initialize_cv;
+       uint64_t        vdev_initialize_offset[TXG_SIZE];
+       uint64_t        vdev_initialize_last_offset;
+       range_tree_t    *vdev_initialize_tree;  /* valid while initializing */
+       uint64_t        vdev_initialize_bytes_est;
+       uint64_t        vdev_initialize_bytes_done;
+       time_t          vdev_initialize_action_time;    /* start and end time */
+
+       /* for limiting outstanding I/Os */
+       kmutex_t        vdev_initialize_io_lock;
+       kcondvar_t      vdev_initialize_io_cv;
+       uint64_t        vdev_initialize_inflight;
+
         /*
          * Values stored in the config for an indirect or removing vdev.
          */
@@ -478,6 +507,8 @@ extern vdev_ops_t vdev_indirect_ops;
  /*
   * Common size functions
   */
+extern void vdev_default_xlate(vdev_t *vd, const range_seg_t *in,
+    range_seg_t *out);
  extern uint64_t vdev_default_asize(vdev_t *vd, uint64_t psize);
  extern uint64_t vdev_get_min_asize(vdev_t *vd);
  extern void vdev_set_min_asize(vdev_t *vd);
diff --git a/include/sys/vdev_initialize.h b/include/sys/vdev_initialize.h

new file mode 100644 (file)

index 0000000..db4b057
--- /dev/null
+++ b/include/sys/vdev_initialize.h
@@ -0,0 +1,46 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2016 by Delphix. All rights reserved.
+ */
+
+#ifndef _SYS_VDEV_INITIALIZE_H
+#define        _SYS_VDEV_INITIALIZE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern void vdev_initialize(vdev_t *vd);
+extern void vdev_initialize_stop(vdev_t *vd,
+    vdev_initializing_state_t tgt_state);
+extern void vdev_initialize_stop_all(vdev_t *vd,
+    vdev_initializing_state_t tgt_state);
+extern void vdev_initialize_restart(vdev_t *vd);
+extern void vdev_xlate(vdev_t *vd, const range_seg_t *logical_rs,
+    range_seg_t *physical_rs);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_VDEV_INITIALIZE_H */
diff --git a/include/sys/zio_priority.h b/include/sys/zio_priority.h

index c2cc8b2d5975c991441243f492b5b0d11a4a3eae..d8e6a1745969fd199f8bd4b691fddae40b5655ad 100644 (file)
--- a/include/sys/zio_priority.h
+++ b/include/sys/zio_priority.h
@@ -13,7 +13,7 @@
   * CDDL HEADER END
   */
  /*
- * Copyright (c) 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2014, 2016 by Delphix. All rights reserved.
   */
  #ifndef        _ZIO_PRIORITY_H
  #define        _ZIO_PRIORITY_H
@@ -29,6 +29,7 @@ typedef enum zio_priority {
         ZIO_PRIORITY_ASYNC_WRITE,       /* spa_sync() */
         ZIO_PRIORITY_SCRUB,             /* asynchronous scrub/resilver reads */
         ZIO_PRIORITY_REMOVAL,           /* reads/writes for vdev removal */
+       ZIO_PRIORITY_INITIALIZING,      /* initializing I/O */
         ZIO_PRIORITY_NUM_QUEUEABLE,
         ZIO_PRIORITY_NOW,               /* non-queued i/os (e.g. free) */
  } zio_priority_t;
diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c

index bc320e516752e844f7d7642fd46085bfa442f0ef..f799471e435128e43ba16cdcac07afa723df7ed1 100644 (file)
--- a/lib/libzfs/libzfs_pool.c
+++ b/lib/libzfs/libzfs_pool.c
@@ -2092,6 +2092,100 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
         return (ret);
  }
  
+static int
+xlate_init_err(int err)
+{
+       switch (err) {
+       case ENODEV:
+               return (EZFS_NODEVICE);
+       case EINVAL:
+       case EROFS:
+               return (EZFS_BADDEV);
+       case EBUSY:
+               return (EZFS_INITIALIZING);
+       case ESRCH:
+               return (EZFS_NO_INITIALIZE);
+       }
+       return (err);
+}
+
+/*
+ * Begin, suspend, or cancel the initialization (initializing of all free
+ * blocks) for the given vdevs in the given pool.
+ */
+int
+zpool_initialize(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
+    nvlist_t *vds)
+{
+       char msg[1024];
+       libzfs_handle_t *hdl = zhp->zpool_hdl;
+
+       nvlist_t *errlist;
+
+       /* translate vdev names to guids */
+       nvlist_t *vdev_guids = fnvlist_alloc();
+       nvlist_t *guids_to_paths = fnvlist_alloc();
+       boolean_t spare, cache;
+       nvlist_t *tgt;
+       nvpair_t *elem;
+
+       for (elem = nvlist_next_nvpair(vds, NULL); elem != NULL;
+           elem = nvlist_next_nvpair(vds, elem)) {
+               char *vd_path = nvpair_name(elem);
+               tgt = zpool_find_vdev(zhp, vd_path, &spare, &cache, NULL);
+
+               if ((tgt == NULL) || cache || spare) {
+                       (void) snprintf(msg, sizeof (msg),
+                           dgettext(TEXT_DOMAIN, "cannot initialize '%s'"),
+                           vd_path);
+                       int err = (tgt == NULL) ? EZFS_NODEVICE :
+                           (spare ? EZFS_ISSPARE : EZFS_ISL2CACHE);
+                       fnvlist_free(vdev_guids);
+                       fnvlist_free(guids_to_paths);
+                       return (zfs_error(hdl, err, msg));
+               }
+
+               uint64_t guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
+               fnvlist_add_uint64(vdev_guids, vd_path, guid);
+
+               (void) snprintf(msg, sizeof (msg), "%llu", (u_longlong_t)guid);
+               fnvlist_add_string(guids_to_paths, msg, vd_path);
+       }
+
+       int err = lzc_initialize(zhp->zpool_name, cmd_type, vdev_guids,
+           &errlist);
+       fnvlist_free(vdev_guids);
+
+       if (err == 0) {
+               fnvlist_free(guids_to_paths);
+               return (0);
+       }
+
+       nvlist_t *vd_errlist = NULL;
+       if (errlist != NULL) {
+               vd_errlist = fnvlist_lookup_nvlist(errlist,
+                   ZPOOL_INITIALIZE_VDEVS);
+       }
+
+       (void) snprintf(msg, sizeof (msg),
+           dgettext(TEXT_DOMAIN, "operation failed"));
+
+       for (elem = nvlist_next_nvpair(vd_errlist, NULL); elem != NULL;
+           elem = nvlist_next_nvpair(vd_errlist, elem)) {
+               int64_t vd_error = xlate_init_err(fnvpair_value_int64(elem));
+               char *path = fnvlist_lookup_string(guids_to_paths,
+                   nvpair_name(elem));
+               (void) zfs_error_fmt(hdl, vd_error, "cannot initialize '%s'",
+                   path);
+       }
+
+       fnvlist_free(guids_to_paths);
+       if (vd_errlist != NULL)
+               return (-1);
+
+       return (zpool_standard_error(hdl, err, msg));
+}
+
  /*
   * Scan the pool.
   */
diff --git a/lib/libzfs/libzfs_util.c b/lib/libzfs/libzfs_util.c

index 776d887a27761a3d1f0c2061f90d3c715539e665..d7401cdf408a0379d9e5499ccf14142889c2a787 100644 (file)
--- a/lib/libzfs/libzfs_util.c
+++ b/lib/libzfs/libzfs_util.c
@@ -283,6 +283,13 @@ libzfs_error_description(libzfs_handle_t *hdl)
                     "different host"));
         case EZFS_CRYPTOFAILED:
                 return (dgettext(TEXT_DOMAIN, "encryption failure"));
+       case EZFS_TOOMANY:
+               return (dgettext(TEXT_DOMAIN, "argument list too long"));
+       case EZFS_INITIALIZING:
+               return (dgettext(TEXT_DOMAIN, "currently initializing"));
+       case EZFS_NO_INITIALIZE:
+               return (dgettext(TEXT_DOMAIN, "there is no active "
+                   "initialization"));
         case EZFS_UNKNOWN:
                 return (dgettext(TEXT_DOMAIN, "unknown error"));
         default:
diff --git a/lib/libzfs_core/libzfs_core.c b/lib/libzfs_core/libzfs_core.c

index 1baec04a4077f7231b2fff15124996fb75f3209a..524a637e4a38e8eafed2dff8f855d95cea0841d0 100644 (file)
--- a/lib/libzfs_core/libzfs_core.c
+++ b/lib/libzfs_core/libzfs_core.c
@@ -1397,3 +1397,40 @@ lzc_reopen(const char *pool_name, boolean_t scrub_restart)
         nvlist_free(args);
         return (error);
  }
+
+/*
+ * Changes initializing state.
+ *
+ * vdevs should be a list of (<key>, guid) where guid is a uint64 vdev GUID.
+ * The key is ignored.
+ *
+ * If there are errors related to vdev arguments, per-vdev errors are returned
+ * in an nvlist with the key "vdevs". Each error is a (guid, errno) pair where
+ * guid is stringified with PRIu64, and errno is one of the following as
+ * an int64_t:
+ *     - ENODEV if the device was not found
+ *     - EINVAL if the devices is not a leaf or is not concrete (e.g. missing)
+ *     - EROFS if the device is not writeable
+ *     - EBUSY start requested but the device is already being initialized
+ *     - ESRCH cancel/suspend requested but device is not being initialized
+ *
+ * If the errlist is empty, then return value will be:
+ *     - EINVAL if one or more arguments was invalid
+ *     - Other spa_open failures
+ *     - 0 if the operation succeeded
+ */
+int
+lzc_initialize(const char *poolname, pool_initialize_func_t cmd_type,
+    nvlist_t *vdevs, nvlist_t **errlist)
+{
+       int error;
+       nvlist_t *args = fnvlist_alloc();
+       fnvlist_add_uint64(args, ZPOOL_INITIALIZE_COMMAND, (uint64_t)cmd_type);
+       fnvlist_add_nvlist(args, ZPOOL_INITIALIZE_VDEVS, vdevs);
+
+       error = lzc_ioctl(ZFS_IOC_POOL_INITIALIZE, poolname, args, errlist);
+
+       fnvlist_free(args);
+
+       return (error);
+}
diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am

index efc44b27ebb77b2f26c7a0434c9be22deaced20f..e13bb0f58d39d072ca31b1512f4236400e940503 100644 (file)
--- a/lib/libzpool/Makefile.am
+++ b/lib/libzpool/Makefile.am
@@ -113,6 +113,7 @@ KERNEL_C = \
         vdev_indirect_births.c \
         vdev_indirect.c \
         vdev_indirect_mapping.c \
+       vdev_initialize.c \
         vdev_label.c \
         vdev_mirror.c \
         vdev_missing.c \
diff --git a/man/man5/zfs-module-parameters.5 b/man/man5/zfs-module-parameters.5

index 55115c2661c473462f731156223347850b46b5ba..1dbf865f435f0c1e991321aa328afe44c90193e6 100644 (file)
--- a/man/man5/zfs-module-parameters.5
+++ b/man/man5/zfs-module-parameters.5
@@ -1372,6 +1372,30 @@ further increasing latency.
  Default value: \fB2\fR.
  .RE
  
+.sp
+.ne 2
+.na
+\fBzfs_vdev_initializing_max_active\fR (int)
+.ad
+.RS 12n
+Maximum initializing I/Os active to each device.
+See the section "ZFS I/O SCHEDULER".
+.sp
+Default value: \fB1\fR.
+.RE
+
+.sp
+.ne 2
+.na
+\fBzfs_vdev_initializing_min_active\fR (int)
+.ad
+.RS 12n
+Minimum initializing I/Os active to each device.
+See the section "ZFS I/O SCHEDULER".
+.sp
+Default value: \fB1\fR.
+.RE
+
  .sp
  .ne 2
  .na
@@ -1385,6 +1409,30 @@ queue's min_active.  See the section "ZFS I/O SCHEDULER".
  Default value: \fB1,000\fR.
  .RE
  
+.sp
+.ne 2
+.na
+\fBzfs_vdev_removal_max_active\fR (int)
+.ad
+.RS 12n
+Maximum removal I/Os active to each device.
+See the section "ZFS I/O SCHEDULER".
+.sp
+Default value: \fB2\fR.
+.RE
+
+.sp
+.ne 2
+.na
+\fBzfs_vdev_removal_min_active\fR (int)
+.ad
+.RS 12n
+Minimum removal I/Os active to each device.
+See the section "ZFS I/O SCHEDULER".
+.sp
+Default value: \fB1\fR.
+.RE
+
  .sp
  .ne 2
  .na
@@ -1612,6 +1660,17 @@ dataset being written to had the property setting \fBlogbias=throughput\fR.
  Default value: \fB32,768\fR.
  .RE
  
+.sp
+.ne 2
+.na
+\fBzfs_initialize_value\fR (ulong)
+.ad
+.RS 12n
+Pattern written to vdev free space by \fBzpool initialize\fR.
+.sp
+Default value: \fB16,045,690,984,833,335,022\fR (0xdeadbeefdeadbeee).
+.RE
+
  .sp
  .ne 2
  .na
diff --git a/man/man8/zpool.8 b/man/man8/zpool.8

index 83e50bd0140f4e0918708cca3cbf78485a8437da..a05d4d1f32afaa5badaaa46da6684d4a5aba16bb 100644 (file)
--- a/man/man8/zpool.8
+++ b/man/man8/zpool.8
@@ -115,6 +115,11 @@
  .Ar pool Ns | Ns Ar id
  .Op Ar newpool Oo Fl t Oc
  .Nm
+.Cm initialize
+.Op Fl cs
+.Ar pool
+.Op Ar device Ns ...
+.Nm
  .Cm iostat
  .Op Oo Oo Fl c Ar SCRIPT Oc Oo Fl lq Oc Oc Ns | Ns Fl rw
  .Op Fl T Sy u Ns | Ns Sy d
@@ -1597,6 +1602,32 @@ Will also set -o cachefile=none when not explicitly specified.
  .El
  .It Xo
  .Nm
+.Cm initialize
+.Op Fl cs
+.Ar pool
+.Op Ar device Ns ...
+.Xc
+Begins initializing by writing to all unallocated regions on the specified
+devices, or all eligible devices in the pool if no individual devices are
+specified.
+Only leaf data or log devices may be initialized.
+.Bl -tag -width Ds
+.It Fl c, -cancel
+Cancel initializing on the specified devices, or all eligible devices if none
+are specified.
+If one or more target devices are invalid or are not currently being
+initialized, the command will fail and no cancellation will occur on any device.
+.It Fl s -suspend
+Suspend initializing on the specified devices, or all eligible devices if none
+are specified.
+If one or more target devices are invalid or are not currently being
+initialized, the command will fail and no suspension will occur on any device.
+Initializing can then be resumed by running
+.Nm zpool Cm initialize
+with no flags on the relevant target devices.
+.El
+.It Xo
+.Nm
  .Cm iostat
  .Op Oo Oo Fl c Ar SCRIPT Oc Oo Fl lq Oc Oc Ns | Ns Fl rw
  .Op Fl T Sy u Ns | Ns Sy d
diff --git a/module/zfs/Makefile.in b/module/zfs/Makefile.in

index a243f51d86bdee84ff59f1c87ae98568a96fe7e2..193bdc5105fe84b99e99ddd526002707e3442402 100644 (file)
--- a/module/zfs/Makefile.in
+++ b/module/zfs/Makefile.in
@@ -89,6 +89,7 @@ $(MODULE)-objs += vdev_file.o
  $(MODULE)-objs += vdev_indirect.o
  $(MODULE)-objs += vdev_indirect_births.o
  $(MODULE)-objs += vdev_indirect_mapping.o
+$(MODULE)-objs += vdev_initialize.o
  $(MODULE)-objs += vdev_label.o
  $(MODULE)-objs += vdev_mirror.o
  $(MODULE)-objs += vdev_missing.o
diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c

index 4b5baf6a69908ffbc079fdd15d659c5cc3e4c222..71688b4206e55dce57f34b45a9786abcd0c70993 100644 (file)
--- a/module/zfs/metaslab.c
+++ b/module/zfs/metaslab.c
@@ -635,6 +635,8 @@ metaslab_group_create(metaslab_class_t *mc, vdev_t *vd, int allocators)
  
         mg = kmem_zalloc(sizeof (metaslab_group_t), KM_SLEEP);
         mutex_init(&mg->mg_lock, NULL, MUTEX_DEFAULT, NULL);
+       mutex_init(&mg->mg_ms_initialize_lock, NULL, MUTEX_DEFAULT, NULL);
+       cv_init(&mg->mg_ms_initialize_cv, NULL, CV_DEFAULT, NULL);
         mg->mg_primaries = kmem_zalloc(allocators * sizeof (metaslab_t *),
             KM_SLEEP);
         mg->mg_secondaries = kmem_zalloc(allocators * sizeof (metaslab_t *),
@@ -681,6 +683,8 @@ metaslab_group_destroy(metaslab_group_t *mg)
         kmem_free(mg->mg_secondaries, mg->mg_allocators *
             sizeof (metaslab_t *));
         mutex_destroy(&mg->mg_lock);
+       mutex_destroy(&mg->mg_ms_initialize_lock);
+       cv_destroy(&mg->mg_ms_initialize_cv);
  
         for (int i = 0; i < mg->mg_allocators; i++) {
                 zfs_refcount_destroy(&mg->mg_alloc_queue_depth[i]);
@@ -1502,6 +1506,7 @@ metaslab_init(metaslab_group_t *mg, uint64_t id, uint64_t object, uint64_t txg,
         mutex_init(&ms->ms_lock, NULL, MUTEX_DEFAULT, NULL);
         mutex_init(&ms->ms_sync_lock, NULL, MUTEX_DEFAULT, NULL);
         cv_init(&ms->ms_load_cv, NULL, CV_DEFAULT, NULL);
+
         ms->ms_id = id;
         ms->ms_start = id << vd->vdev_ms_shift;
         ms->ms_size = 1ULL << vd->vdev_ms_shift;
@@ -2686,6 +2691,7 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
          * from it in 'metaslab_unload_delay' txgs, then unload it.
          */
         if (msp->ms_loaded &&
+           msp->ms_initializing == 0 &&
             msp->ms_selected_txg + metaslab_unload_delay < txg) {
  
                 for (int t = 1; t < TXG_CONCURRENT_STATES; t++) {
@@ -2967,6 +2973,7 @@ metaslab_block_alloc(metaslab_t *msp, uint64_t size, uint64_t txg)
         metaslab_class_t *mc = msp->ms_group->mg_class;
  
         VERIFY(!msp->ms_condensing);
+       VERIFY0(msp->ms_initializing);
  
         start = mc->mc_ops->msop_alloc(msp, size);
         if (start != -1ULL) {
@@ -3027,9 +3034,10 @@ find_valid_metaslab(metaslab_group_t *mg, uint64_t activation_weight,
                 }
  
                 /*
-                * If the selected metaslab is condensing, skip it.
+                * If the selected metaslab is condensing or being
+                * initialized, skip it.
                  */
-               if (msp->ms_condensing)
+               if (msp->ms_condensing || msp->ms_initializing > 0)
                         continue;
  
                 *was_active = msp->ms_allocator != -1;
@@ -3190,7 +3198,9 @@ metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal,
                 /*
                  * If this metaslab is currently condensing then pick again as
                  * we can't manipulate this metaslab until it's committed
-                * to disk.
+                * to disk. If this metaslab is being initialized, we shouldn't
+                * allocate from it since the allocated region might be
+                * overwritten after allocation.
                  */
                 if (msp->ms_condensing) {
                         metaslab_trace_add(zal, mg, msp, asize, d,
@@ -3199,6 +3209,13 @@ metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal,
                             ~METASLAB_ACTIVE_MASK);
                         mutex_exit(&msp->ms_lock);
                         continue;
+               } else if (msp->ms_initializing > 0) {
+                       metaslab_trace_add(zal, mg, msp, asize, d,
+                           TRACE_INITIALIZING, allocator);
+                       metaslab_passivate(msp, msp->ms_weight &
+                           ~METASLAB_ACTIVE_MASK);
+                       mutex_exit(&msp->ms_lock);
+                       continue;
                 }
  
                 offset = metaslab_block_alloc(msp, asize, txg);
diff --git a/module/zfs/spa.c b/module/zfs/spa.c

index f0683b0b84612521a6467765dcf64322b832c8f7..622be75f9454995d19b1b56ce0bf15628b051872 100644 (file)
--- a/module/zfs/spa.c
+++ b/module/zfs/spa.c
@@ -56,6 +56,7 @@
  #include <sys/vdev_removal.h>
  #include <sys/vdev_indirect_mapping.h>
  #include <sys/vdev_indirect_births.h>
+#include <sys/vdev_initialize.h>
  #include <sys/vdev_disk.h>
  #include <sys/metaslab.h>
  #include <sys/metaslab_impl.h>
@@ -434,8 +435,9 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp)
  
                                 dp = spa_get_dsl(spa);
                                 dsl_pool_config_enter(dp, FTAG);
-                               if ((err = dsl_dataset_hold_obj(dp,
-                                   za.za_first_integer, FTAG, &ds))) {
+                               err = dsl_dataset_hold_obj(dp,
+                                   za.za_first_integer, FTAG, &ds);
+                               if (err != 0) {
                                         dsl_pool_config_exit(dp, FTAG);
                                         break;
                                 }
@@ -601,7 +603,7 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
                                 }
  
                                 error = dmu_objset_hold(strval, FTAG, &os);
-                               if (error)
+                               if (error != 0)
                                         break;
  
                                 /*
@@ -1218,8 +1220,10 @@ spa_activate(spa_t *spa, int mode)
                 spa_create_zio_taskqs(spa);
         }
  
-       for (size_t i = 0; i < TXG_SIZE; i++)
-               spa->spa_txg_zio[i] = zio_root(spa, NULL, NULL, 0);
+       for (size_t i = 0; i < TXG_SIZE; i++) {
+               spa->spa_txg_zio[i] = zio_root(spa, NULL, NULL,
+                   ZIO_FLAG_CANFAIL);
+       }
  
         list_create(&spa->spa_config_dirty_list, sizeof (vdev_t),
             offsetof(vdev_t, vdev_config_dirty_node));
@@ -1437,6 +1441,11 @@ spa_unload(spa_t *spa)
          */
         spa_async_suspend(spa);
  
+       if (spa->spa_root_vdev) {
+               vdev_initialize_stop_all(spa->spa_root_vdev,
+                   VDEV_INITIALIZE_ACTIVE);
+       }
+
         /*
          * Stop syncing.
          */
@@ -1452,10 +1461,10 @@ spa_unload(spa_t *spa)
          * calling taskq_wait(mg_taskq).
          */
         if (spa->spa_root_vdev != NULL) {
-               spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+               spa_config_enter(spa, SCL_ALL, spa, RW_WRITER);
                 for (int c = 0; c < spa->spa_root_vdev->vdev_children; c++)
                         vdev_metaslab_fini(spa->spa_root_vdev->vdev_child[c]);
-               spa_config_exit(spa, SCL_ALL, FTAG);
+               spa_config_exit(spa, SCL_ALL, spa);
         }
  
         if (spa->spa_mmp.mmp_thread)
@@ -1492,7 +1501,7 @@ spa_unload(spa_t *spa)
  
         bpobj_close(&spa->spa_deferred_bpobj);
  
-       spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+       spa_config_enter(spa, SCL_ALL, spa, RW_WRITER);
  
         /*
          * Close all vdevs.
@@ -1554,7 +1563,7 @@ spa_unload(spa_t *spa)
                 spa->spa_comment = NULL;
         }
  
-       spa_config_exit(spa, SCL_ALL, FTAG);
+       spa_config_exit(spa, SCL_ALL, spa);
  }
  
  /*
@@ -4246,6 +4255,9 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
                  */
                 dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool);
  
+               spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+               vdev_initialize_restart(spa->spa_root_vdev);
+               spa_config_exit(spa, SCL_CONFIG, FTAG);
         }
  
         spa_load_note(spa, "LOADED");
@@ -5653,6 +5665,18 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
                         return (SET_ERROR(EXDEV));
                 }
  
+               /*
+                * We're about to export or destroy this pool. Make sure
+                * we stop all initializtion activity here before we
+                * set the spa_final_txg. This will ensure that all
+                * dirty data resulting from the initialization is
+                * committed to disk before we unload the pool.
+                */
+               if (spa->spa_root_vdev != NULL) {
+                       vdev_initialize_stop_all(spa->spa_root_vdev,
+                           VDEV_INITIALIZE_ACTIVE);
+               }
+
                 /*
                  * We want this to be reflected on every label,
                  * so mark them all dirty.  spa_unload() will do the
@@ -6357,6 +6381,86 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
         return (error);
  }
  
+int
+spa_vdev_initialize(spa_t *spa, uint64_t guid, uint64_t cmd_type)
+{
+       /*
+        * We hold the namespace lock through the whole function
+        * to prevent any changes to the pool while we're starting or
+        * stopping initialization. The config and state locks are held so that
+        * we can properly assess the vdev state before we commit to
+        * the initializing operation.
+        */
+       mutex_enter(&spa_namespace_lock);
+       spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER);
+
+       /* Look up vdev and ensure it's a leaf. */
+       vdev_t *vd = spa_lookup_by_guid(spa, guid, B_FALSE);
+       if (vd == NULL || vd->vdev_detached) {
+               spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
+               mutex_exit(&spa_namespace_lock);
+               return (SET_ERROR(ENODEV));
+       } else if (!vd->vdev_ops->vdev_op_leaf || !vdev_is_concrete(vd)) {
+               spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
+               mutex_exit(&spa_namespace_lock);
+               return (SET_ERROR(EINVAL));
+       } else if (!vdev_writeable(vd)) {
+               spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
+               mutex_exit(&spa_namespace_lock);
+               return (SET_ERROR(EROFS));
+       }
+       mutex_enter(&vd->vdev_initialize_lock);
+       spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
+
+       /*
+        * When we activate an initialize action we check to see
+        * if the vdev_initialize_thread is NULL. We do this instead
+        * of using the vdev_initialize_state since there might be
+        * a previous initialization process which has completed but
+        * the thread is not exited.
+        */
+       if (cmd_type == POOL_INITIALIZE_DO &&
+           (vd->vdev_initialize_thread != NULL ||
+           vd->vdev_top->vdev_removing)) {
+               mutex_exit(&vd->vdev_initialize_lock);
+               mutex_exit(&spa_namespace_lock);
+               return (SET_ERROR(EBUSY));
+       } else if (cmd_type == POOL_INITIALIZE_CANCEL &&
+           (vd->vdev_initialize_state != VDEV_INITIALIZE_ACTIVE &&
+           vd->vdev_initialize_state != VDEV_INITIALIZE_SUSPENDED)) {
+               mutex_exit(&vd->vdev_initialize_lock);
+               mutex_exit(&spa_namespace_lock);
+               return (SET_ERROR(ESRCH));
+       } else if (cmd_type == POOL_INITIALIZE_SUSPEND &&
+           vd->vdev_initialize_state != VDEV_INITIALIZE_ACTIVE) {
+               mutex_exit(&vd->vdev_initialize_lock);
+               mutex_exit(&spa_namespace_lock);
+               return (SET_ERROR(ESRCH));
+       }
+
+       switch (cmd_type) {
+       case POOL_INITIALIZE_DO:
+               vdev_initialize(vd);
+               break;
+       case POOL_INITIALIZE_CANCEL:
+               vdev_initialize_stop(vd, VDEV_INITIALIZE_CANCELED);
+               break;
+       case POOL_INITIALIZE_SUSPEND:
+               vdev_initialize_stop(vd, VDEV_INITIALIZE_SUSPENDED);
+               break;
+       default:
+               panic("invalid cmd_type %llu", (unsigned long long)cmd_type);
+       }
+       mutex_exit(&vd->vdev_initialize_lock);
+
+       /* Sync out the initializing state */
+       txg_wait_synced(spa->spa_dsl_pool, 0);
+       mutex_exit(&spa_namespace_lock);
+
+       return (0);
+}
+
+
  /*
   * Split a set of devices from their mirrors, and create a new pool from them.
   */
@@ -6565,6 +6669,19 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
         spa_activate(newspa, spa_mode_global);
         spa_async_suspend(newspa);
  
+       for (c = 0; c < children; c++) {
+               if (vml[c] != NULL) {
+                       /*
+                        * Temporarily stop the initializing activity. We set
+                        * the state to ACTIVE so that we know to resume
+                        * the initializing once the split has completed.
+                        */
+                       mutex_enter(&vml[c]->vdev_initialize_lock);
+                       vdev_initialize_stop(vml[c], VDEV_INITIALIZE_ACTIVE);
+                       mutex_exit(&vml[c]->vdev_initialize_lock);
+               }
+       }
+
         newspa->spa_config_source = SPA_CONFIG_SRC_SPLIT;
  
         /* create the new pool from the disks of the original pool */
@@ -6652,6 +6769,10 @@ out:
                 if (vml[c] != NULL)
                         vml[c]->vdev_offline = B_FALSE;
         }
+
+       /* restart initializing disks as necessary */
+       spa_async_request(spa, SPA_ASYNC_INITIALIZE_RESTART);
+
         vdev_reopen(spa->spa_root_vdev);
  
         nvlist_free(spa->spa_config_splitting);
@@ -7025,6 +7146,14 @@ spa_async_thread(void *arg)
             !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_RESILVER_DEFER)))
                 dsl_resilver_restart(dp, 0);
  
+       if (tasks & SPA_ASYNC_INITIALIZE_RESTART) {
+               mutex_enter(&spa_namespace_lock);
+               spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+               vdev_initialize_restart(spa->spa_root_vdev);
+               spa_config_exit(spa, SCL_CONFIG, FTAG);
+               mutex_exit(&spa_namespace_lock);
+       }
+
         /*
          * Let the world know that we're done.
          */
@@ -7677,8 +7806,9 @@ spa_sync(spa_t *spa, uint64_t txg)
          * Wait for i/os issued in open context that need to complete
          * before this txg syncs.
          */
-       VERIFY0(zio_wait(spa->spa_txg_zio[txg & TXG_MASK]));
-       spa->spa_txg_zio[txg & TXG_MASK] = zio_root(spa, NULL, NULL, 0);
+       (void) zio_wait(spa->spa_txg_zio[txg & TXG_MASK]);
+       spa->spa_txg_zio[txg & TXG_MASK] = zio_root(spa, NULL, NULL,
+           ZIO_FLAG_CANFAIL);
  
         /*
          * Lock out configuration changes.
@@ -7983,7 +8113,8 @@ spa_sync(spa_t *spa, uint64_t txg)
         /*
          * Update usable space statistics.
          */
-       while ((vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg))))
+       while ((vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg)))
+           != NULL)
                 vdev_sync_done(vd, txg);
  
         spa_update_dspace(spa);
diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c

index a3ac70f07ae28e035cafaeae4ab95bbd388d15b1..dfac92d458d3ea9f5a24d642aa47e05da3fe89e3 100644 (file)
--- a/module/zfs/spa_misc.c
+++ b/module/zfs/spa_misc.c
@@ -38,6 +38,7 @@
  #include <sys/zap.h>
  #include <sys/zil.h>
  #include <sys/vdev_impl.h>
+#include <sys/vdev_initialize.h>
  #include <sys/vdev_file.h>
  #include <sys/vdev_raidz.h>
  #include <sys/metaslab.h>
@@ -1194,6 +1195,12 @@ spa_vdev_config_exit(spa_t *spa, vdev_t *vd, uint64_t txg, int error, char *tag)
  
         if (vd != NULL) {
                 ASSERT(!vd->vdev_detached || vd->vdev_dtl_sm == NULL);
+               if (vd->vdev_ops->vdev_op_leaf) {
+                       mutex_enter(&vd->vdev_initialize_lock);
+                       vdev_initialize_stop(vd, VDEV_INITIALIZE_CANCELED);
+                       mutex_exit(&vd->vdev_initialize_lock);
+               }
+
                 spa_config_enter(spa, SCL_ALL, spa, RW_WRITER);
                 vdev_free(vd);
                 spa_config_exit(spa, SCL_ALL, spa);
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c

index 8273e79079f147c249725a6557ca0beb2116c575..f808f8ee78b93ec2627688b1ab8133b06c94f29a 100644 (file)
--- a/module/zfs/vdev.c
+++ b/module/zfs/vdev.c
@@ -50,6 +50,7 @@
  #include <sys/zil.h>
  #include <sys/dsl_scan.h>
  #include <sys/abd.h>
+#include <sys/vdev_initialize.h>
  #include <sys/zvol.h>
  #include <sys/zfs_ratelimit.h>
  
@@ -212,6 +213,14 @@ vdev_getops(const char *type)
         return (ops);
  }
  
+/* ARGSUSED */
+void
+vdev_default_xlate(vdev_t *vd, const range_seg_t *in, range_seg_t *res)
+{
+       res->rs_start = in->rs_start;
+       res->rs_end = in->rs_end;
+}
+
  /*
   * Derive the enumerated alloction bias from string input.
   * String origin is either the per-vdev zap or zpool(1M).
@@ -526,6 +535,10 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
         mutex_init(&vd->vdev_probe_lock, NULL, MUTEX_DEFAULT, NULL);
         mutex_init(&vd->vdev_queue_lock, NULL, MUTEX_DEFAULT, NULL);
         mutex_init(&vd->vdev_scan_io_queue_lock, NULL, MUTEX_DEFAULT, NULL);
+       mutex_init(&vd->vdev_initialize_lock, NULL, MUTEX_DEFAULT, NULL);
+       mutex_init(&vd->vdev_initialize_io_lock, NULL, MUTEX_DEFAULT, NULL);
+       cv_init(&vd->vdev_initialize_cv, NULL, CV_DEFAULT, NULL);
+       cv_init(&vd->vdev_initialize_io_cv, NULL, CV_DEFAULT, NULL);
  
         for (int t = 0; t < DTL_TYPES; t++) {
                 vd->vdev_dtl[t] = range_tree_create(NULL, NULL);
@@ -850,6 +863,7 @@ void
  vdev_free(vdev_t *vd)
  {
         spa_t *spa = vd->vdev_spa;
+       ASSERT3P(vd->vdev_initialize_thread, ==, NULL);
  
         /*
          * Scan queues are normally destroyed at the end of a scan. If the
@@ -880,6 +894,7 @@ vdev_free(vdev_t *vd)
  
         ASSERT(vd->vdev_child == NULL);
         ASSERT(vd->vdev_guid_sum == vd->vdev_guid);
+       ASSERT(vd->vdev_initialize_thread == NULL);
  
         /*
          * Discard allocation state.
@@ -957,6 +972,10 @@ vdev_free(vdev_t *vd)
         mutex_destroy(&vd->vdev_stat_lock);
         mutex_destroy(&vd->vdev_probe_lock);
         mutex_destroy(&vd->vdev_scan_io_queue_lock);
+       mutex_destroy(&vd->vdev_initialize_lock);
+       mutex_destroy(&vd->vdev_initialize_io_lock);
+       cv_destroy(&vd->vdev_initialize_io_cv);
+       cv_destroy(&vd->vdev_initialize_cv);
  
         zfs_ratelimit_fini(&vd->vdev_delay_rl);
         zfs_ratelimit_fini(&vd->vdev_checksum_rl);
@@ -3207,7 +3226,8 @@ vdev_sync_done(vdev_t *vd, uint64_t txg)
  
         ASSERT(vdev_is_concrete(vd));
  
-       while ((msp = txg_list_remove(&vd->vdev_ms_list, TXG_CLEAN(txg))))
+       while ((msp = txg_list_remove(&vd->vdev_ms_list, TXG_CLEAN(txg)))
+           != NULL)
                 metaslab_sync_done(msp, txg);
  
         if (reassess)
@@ -3458,6 +3478,15 @@ vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
                 spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
         }
  
+       /* Restart initializing if necessary */
+       mutex_enter(&vd->vdev_initialize_lock);
+       if (vdev_writeable(vd) &&
+           vd->vdev_initialize_thread == NULL &&
+           vd->vdev_initialize_state == VDEV_INITIALIZE_ACTIVE) {
+               (void) vdev_initialize(vd);
+       }
+       mutex_exit(&vd->vdev_initialize_lock);
+
         if (wasoffline ||
             (oldstate < VDEV_STATE_DEGRADED &&
             vd->vdev_state >= VDEV_STATE_DEGRADED))
@@ -3848,9 +3877,22 @@ vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
                 vs->vs_timestamp = gethrtime() - vs->vs_timestamp;
                 vs->vs_state = vd->vdev_state;
                 vs->vs_rsize = vdev_get_min_asize(vd);
-               if (vd->vdev_ops->vdev_op_leaf)
+               if (vd->vdev_ops->vdev_op_leaf) {
                         vs->vs_rsize += VDEV_LABEL_START_SIZE +
                             VDEV_LABEL_END_SIZE;
+                       /*
+                        * Report intializing progress. Since we don't
+                        * have the initializing locks held, this is only
+                        * an estimate (although a fairly accurate one).
+                        */
+                       vs->vs_initialize_bytes_done =
+                           vd->vdev_initialize_bytes_done;
+                       vs->vs_initialize_bytes_est =
+                           vd->vdev_initialize_bytes_est;
+                       vs->vs_initialize_state = vd->vdev_initialize_state;
+                       vs->vs_initialize_action_time =
+                           vd->vdev_initialize_action_time;
+               }
                 /*
                  * Report expandable space on top-level, non-auxillary devices
                  * only. The expandable space is reported in terms of metaslab
diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c

index 9c44ba12a36ad188bf11e303b65ed573ab227360..d13f365dd055d3e72d257d195a1cd5cea72f34d6 100644 (file)
--- a/module/zfs/vdev_disk.c
+++ b/module/zfs/vdev_disk.c
@@ -890,6 +890,7 @@ vdev_ops_t vdev_disk_ops = {
         vdev_disk_hold,
         vdev_disk_rele,
         NULL,
+       vdev_default_xlate,
         VDEV_TYPE_DISK,         /* name of this vdev type */
         B_TRUE                  /* leaf vdev */
  };
diff --git a/module/zfs/vdev_file.c b/module/zfs/vdev_file.c

index bd7e0bc2e39201a18ebb1dd99ba10aff9d4bb156..3551898e0781430f20d8c9ffd43d7069c6dfe163 100644 (file)
--- a/module/zfs/vdev_file.c
+++ b/module/zfs/vdev_file.c
@@ -20,7 +20,7 @@
   */
  /*
   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
   */
  
  #include <sys/zfs_context.h>
@@ -254,6 +254,7 @@ vdev_ops_t vdev_file_ops = {
         vdev_file_hold,
         vdev_file_rele,
         NULL,
+       vdev_default_xlate,
         VDEV_TYPE_FILE,         /* name of this vdev type */
         B_TRUE                  /* leaf vdev */
  };
@@ -289,6 +290,7 @@ vdev_ops_t vdev_disk_ops = {
         vdev_file_hold,
         vdev_file_rele,
         NULL,
+       vdev_default_xlate,
         VDEV_TYPE_DISK,         /* name of this vdev type */
         B_TRUE                  /* leaf vdev */
  };
diff --git a/module/zfs/vdev_indirect.c b/module/zfs/vdev_indirect.c

index 724457df465ba0f76fad25494c6f3df0de11389f..070d1b8d94dccb9967000be02ef16bfb5cff1307 100644 (file)
--- a/module/zfs/vdev_indirect.c
+++ b/module/zfs/vdev_indirect.c
@@ -1857,6 +1857,7 @@ vdev_ops_t vdev_indirect_ops = {
         NULL,
         NULL,
         vdev_indirect_remap,
+       NULL,
         VDEV_TYPE_INDIRECT,     /* name of this vdev type */
         B_FALSE                 /* leaf vdev */
  };
diff --git a/module/zfs/vdev_initialize.c b/module/zfs/vdev_initialize.c

new file mode 100644 (file)

index 0000000..fcd2c76
--- /dev/null
+++ b/module/zfs/vdev_initialize.c
@@ -0,0 +1,819 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2016 by Delphix. All rights reserved.
+ */
+
+#include <sys/spa.h>
+#include <sys/spa_impl.h>
+#include <sys/txg.h>
+#include <sys/vdev_impl.h>
+#include <sys/refcount.h>
+#include <sys/metaslab_impl.h>
+#include <sys/dsl_synctask.h>
+#include <sys/zap.h>
+#include <sys/dmu_tx.h>
+
+/*
+ * Maximum number of metaslabs per group that can be initialized
+ * simultaneously.
+ */
+int max_initialize_ms = 3;
+
+/*
+ * Value that is written to disk during initialization.
+ */
+#ifdef _ILP32
+unsigned long zfs_initialize_value = 0xdeadbeefUL;
+#else
+unsigned long zfs_initialize_value = 0xdeadbeefdeadbeeeULL;
+#endif
+
+/* maximum number of I/Os outstanding per leaf vdev */
+int zfs_initialize_limit = 1;
+
+/* size of initializing writes; default 1MiB, see zfs_remove_max_segment */
+uint64_t zfs_initialize_chunk_size = 1024 * 1024;
+
+static boolean_t
+vdev_initialize_should_stop(vdev_t *vd)
+{
+       return (vd->vdev_initialize_exit_wanted || !vdev_writeable(vd) ||
+           vd->vdev_detached || vd->vdev_top->vdev_removing);
+}
+
+static void
+vdev_initialize_zap_update_sync(void *arg, dmu_tx_t *tx)
+{
+       /*
+        * We pass in the guid instead of the vdev_t since the vdev may
+        * have been freed prior to the sync task being processed. This
+        * happens when a vdev is detached as we call spa_config_vdev_exit(),
+        * stop the intializing thread, schedule the sync task, and free
+        * the vdev. Later when the scheduled sync task is invoked, it would
+        * find that the vdev has been freed.
+        */
+       uint64_t guid = *(uint64_t *)arg;
+       uint64_t txg = dmu_tx_get_txg(tx);
+       kmem_free(arg, sizeof (uint64_t));
+
+       vdev_t *vd = spa_lookup_by_guid(tx->tx_pool->dp_spa, guid, B_FALSE);
+       if (vd == NULL || vd->vdev_top->vdev_removing || !vdev_is_concrete(vd))
+               return;
+
+       uint64_t last_offset = vd->vdev_initialize_offset[txg & TXG_MASK];
+       vd->vdev_initialize_offset[txg & TXG_MASK] = 0;
+
+       VERIFY(vd->vdev_leaf_zap != 0);
+
+       objset_t *mos = vd->vdev_spa->spa_meta_objset;
+
+       if (last_offset > 0) {
+               vd->vdev_initialize_last_offset = last_offset;
+               VERIFY0(zap_update(mos, vd->vdev_leaf_zap,
+                   VDEV_LEAF_ZAP_INITIALIZE_LAST_OFFSET,
+                   sizeof (last_offset), 1, &last_offset, tx));
+       }
+       if (vd->vdev_initialize_action_time > 0) {
+               uint64_t val = (uint64_t)vd->vdev_initialize_action_time;
+               VERIFY0(zap_update(mos, vd->vdev_leaf_zap,
+                   VDEV_LEAF_ZAP_INITIALIZE_ACTION_TIME, sizeof (val),
+                   1, &val, tx));
+       }
+
+       uint64_t initialize_state = vd->vdev_initialize_state;
+       VERIFY0(zap_update(mos, vd->vdev_leaf_zap,
+           VDEV_LEAF_ZAP_INITIALIZE_STATE, sizeof (initialize_state), 1,
+           &initialize_state, tx));
+}
+
+static void
+vdev_initialize_change_state(vdev_t *vd, vdev_initializing_state_t new_state)
+{
+       ASSERT(MUTEX_HELD(&vd->vdev_initialize_lock));
+       spa_t *spa = vd->vdev_spa;
+
+       if (new_state == vd->vdev_initialize_state)
+               return;
+
+       /*
+        * Copy the vd's guid, this will be freed by the sync task.
+        */
+       uint64_t *guid = kmem_zalloc(sizeof (uint64_t), KM_SLEEP);
+       *guid = vd->vdev_guid;
+
+       /*
+        * If we're suspending, then preserving the original start time.
+        */
+       if (vd->vdev_initialize_state != VDEV_INITIALIZE_SUSPENDED) {
+               vd->vdev_initialize_action_time = gethrestime_sec();
+       }
+       vd->vdev_initialize_state = new_state;
+
+       dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
+       VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
+       dsl_sync_task_nowait(spa_get_dsl(spa), vdev_initialize_zap_update_sync,
+           guid, 2, ZFS_SPACE_CHECK_RESERVED, tx);
+
+       switch (new_state) {
+       case VDEV_INITIALIZE_ACTIVE:
+               spa_history_log_internal(spa, "initialize", tx,
+                   "vdev=%s activated", vd->vdev_path);
+               break;
+       case VDEV_INITIALIZE_SUSPENDED:
+               spa_history_log_internal(spa, "initialize", tx,
+                   "vdev=%s suspended", vd->vdev_path);
+               break;
+       case VDEV_INITIALIZE_CANCELED:
+               spa_history_log_internal(spa, "initialize", tx,
+                   "vdev=%s canceled", vd->vdev_path);
+               break;
+       case VDEV_INITIALIZE_COMPLETE:
+               spa_history_log_internal(spa, "initialize", tx,
+                   "vdev=%s complete", vd->vdev_path);
+               break;
+       default:
+               panic("invalid state %llu", (unsigned long long)new_state);
+       }
+
+       dmu_tx_commit(tx);
+}
+
+static void
+vdev_initialize_cb(zio_t *zio)
+{
+       vdev_t *vd = zio->io_vd;
+       mutex_enter(&vd->vdev_initialize_io_lock);
+       if (zio->io_error == ENXIO && !vdev_writeable(vd)) {
+               /*
+                * The I/O failed because the vdev was unavailable; roll the
+                * last offset back. (This works because spa_sync waits on
+                * spa_txg_zio before it runs sync tasks.)
+                */
+               uint64_t *off =
+                   &vd->vdev_initialize_offset[zio->io_txg & TXG_MASK];
+               *off = MIN(*off, zio->io_offset);
+       } else {
+               /*
+                * Since initializing is best-effort, we ignore I/O errors and
+                * rely on vdev_probe to determine if the errors are more
+                * critical.
+                */
+               if (zio->io_error != 0)
+                       vd->vdev_stat.vs_initialize_errors++;
+
+               vd->vdev_initialize_bytes_done += zio->io_orig_size;
+       }
+       ASSERT3U(vd->vdev_initialize_inflight, >, 0);
+       vd->vdev_initialize_inflight--;
+       cv_broadcast(&vd->vdev_initialize_io_cv);
+       mutex_exit(&vd->vdev_initialize_io_lock);
+
+       spa_config_exit(vd->vdev_spa, SCL_STATE_ALL, vd);
+}
+
+/* Takes care of physical writing and limiting # of concurrent ZIOs. */
+static int
+vdev_initialize_write(vdev_t *vd, uint64_t start, uint64_t size, abd_t *data)
+{
+       spa_t *spa = vd->vdev_spa;
+
+       /* Limit inflight initializing I/Os */
+       mutex_enter(&vd->vdev_initialize_io_lock);
+       while (vd->vdev_initialize_inflight >= zfs_initialize_limit) {
+               cv_wait(&vd->vdev_initialize_io_cv,
+                   &vd->vdev_initialize_io_lock);
+       }
+       vd->vdev_initialize_inflight++;
+       mutex_exit(&vd->vdev_initialize_io_lock);
+
+       dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
+       VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
+       uint64_t txg = dmu_tx_get_txg(tx);
+
+       spa_config_enter(spa, SCL_STATE_ALL, vd, RW_READER);
+       mutex_enter(&vd->vdev_initialize_lock);
+
+       if (vd->vdev_initialize_offset[txg & TXG_MASK] == 0) {
+               uint64_t *guid = kmem_zalloc(sizeof (uint64_t), KM_SLEEP);
+               *guid = vd->vdev_guid;
+
+               /* This is the first write of this txg. */
+               dsl_sync_task_nowait(spa_get_dsl(spa),
+                   vdev_initialize_zap_update_sync, guid, 2,
+                   ZFS_SPACE_CHECK_RESERVED, tx);
+       }
+
+       /*
+        * We know the vdev struct will still be around since all
+        * consumers of vdev_free must stop the initialization first.
+        */
+       if (vdev_initialize_should_stop(vd)) {
+               mutex_enter(&vd->vdev_initialize_io_lock);
+               ASSERT3U(vd->vdev_initialize_inflight, >, 0);
+               vd->vdev_initialize_inflight--;
+               mutex_exit(&vd->vdev_initialize_io_lock);
+               spa_config_exit(vd->vdev_spa, SCL_STATE_ALL, vd);
+               mutex_exit(&vd->vdev_initialize_lock);
+               dmu_tx_commit(tx);
+               return (SET_ERROR(EINTR));
+       }
+       mutex_exit(&vd->vdev_initialize_lock);
+
+       vd->vdev_initialize_offset[txg & TXG_MASK] = start + size;
+       zio_nowait(zio_write_phys(spa->spa_txg_zio[txg & TXG_MASK], vd, start,
+           size, data, ZIO_CHECKSUM_OFF, vdev_initialize_cb, NULL,
+           ZIO_PRIORITY_INITIALIZING, ZIO_FLAG_CANFAIL, B_FALSE));
+       /* vdev_initialize_cb releases SCL_STATE_ALL */
+
+       dmu_tx_commit(tx);
+
+       return (0);
+}
+
+/*
+ * Translate a logical range to the physical range for the specified vdev_t.
+ * This function is initially called with a leaf vdev and will walk each
+ * parent vdev until it reaches a top-level vdev. Once the top-level is
+ * reached the physical range is initialized and the recursive function
+ * begins to unwind. As it unwinds it calls the parent's vdev specific
+ * translation function to do the real conversion.
+ */
+void
+vdev_xlate(vdev_t *vd, const range_seg_t *logical_rs, range_seg_t *physical_rs)
+{
+       /*
+        * Walk up the vdev tree
+        */
+       if (vd != vd->vdev_top) {
+               vdev_xlate(vd->vdev_parent, logical_rs, physical_rs);
+       } else {
+               /*
+                * We've reached the top-level vdev, initialize the
+                * physical range to the logical range and start to
+                * unwind.
+                */
+               physical_rs->rs_start = logical_rs->rs_start;
+               physical_rs->rs_end = logical_rs->rs_end;
+               return;
+       }
+
+       vdev_t *pvd = vd->vdev_parent;
+       ASSERT3P(pvd, !=, NULL);
+       ASSERT3P(pvd->vdev_ops->vdev_op_xlate, !=, NULL);
+
+       /*
+        * As this recursive function unwinds, translate the logical
+        * range into its physical components by calling the
+        * vdev specific translate function.
+        */
+       range_seg_t intermediate = { { { 0, 0 } } };
+       pvd->vdev_ops->vdev_op_xlate(vd, physical_rs, &intermediate);
+
+       physical_rs->rs_start = intermediate.rs_start;
+       physical_rs->rs_end = intermediate.rs_end;
+}
+
+/*
+ * Callback to fill each ABD chunk with zfs_initialize_value. len must be
+ * divisible by sizeof (uint64_t), and buf must be 8-byte aligned. The ABD
+ * allocation will guarantee these for us.
+ */
+/* ARGSUSED */
+static int
+vdev_initialize_block_fill(void *buf, size_t len, void *unused)
+{
+       ASSERT0(len % sizeof (uint64_t));
+#ifdef _ILP32
+       for (uint64_t i = 0; i < len; i += sizeof (uint32_t)) {
+               *(uint32_t *)((char *)(buf) + i) = zfs_initialize_value;
+       }
+#else
+       for (uint64_t i = 0; i < len; i += sizeof (uint64_t)) {
+               *(uint64_t *)((char *)(buf) + i) = zfs_initialize_value;
+       }
+#endif
+       return (0);
+}
+
+static abd_t *
+vdev_initialize_block_alloc(void)
+{
+       /* Allocate ABD for filler data */
+       abd_t *data = abd_alloc_for_io(zfs_initialize_chunk_size, B_FALSE);
+
+       ASSERT0(zfs_initialize_chunk_size % sizeof (uint64_t));
+       (void) abd_iterate_func(data, 0, zfs_initialize_chunk_size,
+           vdev_initialize_block_fill, NULL);
+
+       return (data);
+}
+
+static void
+vdev_initialize_block_free(abd_t *data)
+{
+       abd_free(data);
+}
+
+static int
+vdev_initialize_ranges(vdev_t *vd, abd_t *data)
+{
+       avl_tree_t *rt = &vd->vdev_initialize_tree->rt_root;
+
+       for (range_seg_t *rs = avl_first(rt); rs != NULL;
+           rs = AVL_NEXT(rt, rs)) {
+               uint64_t size = rs->rs_end - rs->rs_start;
+
+               /* Split range into legally-sized physical chunks */
+               uint64_t writes_required =
+                   ((size - 1) / zfs_initialize_chunk_size) + 1;
+
+               for (uint64_t w = 0; w < writes_required; w++) {
+                       int error;
+
+                       error = vdev_initialize_write(vd,
+                           VDEV_LABEL_START_SIZE + rs->rs_start +
+                           (w * zfs_initialize_chunk_size),
+                           MIN(size - (w * zfs_initialize_chunk_size),
+                           zfs_initialize_chunk_size), data);
+                       if (error != 0)
+                               return (error);
+               }
+       }
+       return (0);
+}
+
+static void
+vdev_initialize_ms_load(metaslab_t *msp)
+{
+       ASSERT(MUTEX_HELD(&msp->ms_lock));
+
+       metaslab_load_wait(msp);
+       if (!msp->ms_loaded)
+               VERIFY0(metaslab_load(msp));
+}
+
+static void
+vdev_initialize_mg_wait(metaslab_group_t *mg)
+{
+       ASSERT(MUTEX_HELD(&mg->mg_ms_initialize_lock));
+       while (mg->mg_initialize_updating) {
+               cv_wait(&mg->mg_ms_initialize_cv, &mg->mg_ms_initialize_lock);
+       }
+}
+
+static void
+vdev_initialize_mg_mark(metaslab_group_t *mg)
+{
+       ASSERT(MUTEX_HELD(&mg->mg_ms_initialize_lock));
+       ASSERT(mg->mg_initialize_updating);
+
+       while (mg->mg_ms_initializing >= max_initialize_ms) {
+               cv_wait(&mg->mg_ms_initialize_cv, &mg->mg_ms_initialize_lock);
+       }
+       mg->mg_ms_initializing++;
+       ASSERT3U(mg->mg_ms_initializing, <=, max_initialize_ms);
+}
+
+/*
+ * Mark the metaslab as being initialized to prevent any allocations
+ * on this metaslab. We must also track how many metaslabs are currently
+ * being initialized within a metaslab group and limit them to prevent
+ * allocation failures from occurring because all metaslabs are being
+ * initialized.
+ */
+static void
+vdev_initialize_ms_mark(metaslab_t *msp)
+{
+       ASSERT(!MUTEX_HELD(&msp->ms_lock));
+       metaslab_group_t *mg = msp->ms_group;
+
+       mutex_enter(&mg->mg_ms_initialize_lock);
+
+       /*
+        * To keep an accurate count of how many threads are initializing
+        * a specific metaslab group, we only allow one thread to mark
+        * the metaslab group at a time. This ensures that the value of
+        * ms_initializing will be accurate when we decide to mark a metaslab
+        * group as being initialized. To do this we force all other threads
+        * to wait till the metaslab's mg_initialize_updating flag is no
+        * longer set.
+        */
+       vdev_initialize_mg_wait(mg);
+       mg->mg_initialize_updating = B_TRUE;
+       if (msp->ms_initializing == 0) {
+               vdev_initialize_mg_mark(mg);
+       }
+       mutex_enter(&msp->ms_lock);
+       msp->ms_initializing++;
+       mutex_exit(&msp->ms_lock);
+
+       mg->mg_initialize_updating = B_FALSE;
+       cv_broadcast(&mg->mg_ms_initialize_cv);
+       mutex_exit(&mg->mg_ms_initialize_lock);
+}
+
+static void
+vdev_initialize_ms_unmark(metaslab_t *msp)
+{
+       ASSERT(!MUTEX_HELD(&msp->ms_lock));
+       metaslab_group_t *mg = msp->ms_group;
+       mutex_enter(&mg->mg_ms_initialize_lock);
+       mutex_enter(&msp->ms_lock);
+       if (--msp->ms_initializing == 0) {
+               mg->mg_ms_initializing--;
+               cv_broadcast(&mg->mg_ms_initialize_cv);
+       }
+       mutex_exit(&msp->ms_lock);
+       mutex_exit(&mg->mg_ms_initialize_lock);
+}
+
+static void
+vdev_initialize_calculate_progress(vdev_t *vd)
+{
+       ASSERT(spa_config_held(vd->vdev_spa, SCL_CONFIG, RW_READER) ||
+           spa_config_held(vd->vdev_spa, SCL_CONFIG, RW_WRITER));
+       ASSERT(vd->vdev_leaf_zap != 0);
+
+       vd->vdev_initialize_bytes_est = 0;
+       vd->vdev_initialize_bytes_done = 0;
+
+       for (uint64_t i = 0; i < vd->vdev_top->vdev_ms_count; i++) {
+               metaslab_t *msp = vd->vdev_top->vdev_ms[i];
+               mutex_enter(&msp->ms_lock);
+
+               uint64_t ms_free = msp->ms_size -
+                   space_map_allocated(msp->ms_sm);
+
+               if (vd->vdev_top->vdev_ops == &vdev_raidz_ops)
+                       ms_free /= vd->vdev_top->vdev_children;
+
+               /*
+                * Convert the metaslab range to a physical range
+                * on our vdev. We use this to determine if we are
+                * in the middle of this metaslab range.
+                */
+               range_seg_t logical_rs, physical_rs;
+               logical_rs.rs_start = msp->ms_start;
+               logical_rs.rs_end = msp->ms_start + msp->ms_size;
+               vdev_xlate(vd, &logical_rs, &physical_rs);
+
+               if (vd->vdev_initialize_last_offset <= physical_rs.rs_start) {
+                       vd->vdev_initialize_bytes_est += ms_free;
+                       mutex_exit(&msp->ms_lock);
+                       continue;
+               } else if (vd->vdev_initialize_last_offset >
+                   physical_rs.rs_end) {
+                       vd->vdev_initialize_bytes_done += ms_free;
+                       vd->vdev_initialize_bytes_est += ms_free;
+                       mutex_exit(&msp->ms_lock);
+                       continue;
+               }
+
+               /*
+                * If we get here, we're in the middle of initializing this
+                * metaslab. Load it and walk the free tree for more accurate
+                * progress estimation.
+                */
+               vdev_initialize_ms_load(msp);
+
+               for (range_seg_t *rs = avl_first(&msp->ms_allocatable->rt_root);
+                   rs; rs = AVL_NEXT(&msp->ms_allocatable->rt_root, rs)) {
+                       logical_rs.rs_start = rs->rs_start;
+                       logical_rs.rs_end = rs->rs_end;
+                       vdev_xlate(vd, &logical_rs, &physical_rs);
+
+                       uint64_t size = physical_rs.rs_end -
+                           physical_rs.rs_start;
+                       vd->vdev_initialize_bytes_est += size;
+                       if (vd->vdev_initialize_last_offset >
+                           physical_rs.rs_end) {
+                               vd->vdev_initialize_bytes_done += size;
+                       } else if (vd->vdev_initialize_last_offset >
+                           physical_rs.rs_start &&
+                           vd->vdev_initialize_last_offset <
+                           physical_rs.rs_end) {
+                               vd->vdev_initialize_bytes_done +=
+                                   vd->vdev_initialize_last_offset -
+                                   physical_rs.rs_start;
+                       }
+               }
+               mutex_exit(&msp->ms_lock);
+       }
+}
+
+static int
+vdev_initialize_load(vdev_t *vd)
+{
+       int err = 0;
+       ASSERT(spa_config_held(vd->vdev_spa, SCL_CONFIG, RW_READER) ||
+           spa_config_held(vd->vdev_spa, SCL_CONFIG, RW_WRITER));
+       ASSERT(vd->vdev_leaf_zap != 0);
+
+       if (vd->vdev_initialize_state == VDEV_INITIALIZE_ACTIVE ||
+           vd->vdev_initialize_state == VDEV_INITIALIZE_SUSPENDED) {
+               err = zap_lookup(vd->vdev_spa->spa_meta_objset,
+                   vd->vdev_leaf_zap, VDEV_LEAF_ZAP_INITIALIZE_LAST_OFFSET,
+                   sizeof (vd->vdev_initialize_last_offset), 1,
+                   &vd->vdev_initialize_last_offset);
+               if (err == ENOENT) {
+                       vd->vdev_initialize_last_offset = 0;
+                       err = 0;
+               }
+       }
+
+       vdev_initialize_calculate_progress(vd);
+       return (err);
+}
+
+
+/*
+ * Convert the logical range into a physcial range and add it to our
+ * avl tree.
+ */
+void
+vdev_initialize_range_add(void *arg, uint64_t start, uint64_t size)
+{
+       vdev_t *vd = arg;
+       range_seg_t logical_rs, physical_rs;
+       logical_rs.rs_start = start;
+       logical_rs.rs_end = start + size;
+
+       ASSERT(vd->vdev_ops->vdev_op_leaf);
+       vdev_xlate(vd, &logical_rs, &physical_rs);
+
+       IMPLY(vd->vdev_top == vd,
+           logical_rs.rs_start == physical_rs.rs_start);
+       IMPLY(vd->vdev_top == vd,
+           logical_rs.rs_end == physical_rs.rs_end);
+
+       /* Only add segments that we have not visited yet */
+       if (physical_rs.rs_end <= vd->vdev_initialize_last_offset)
+               return;
+
+       /* Pick up where we left off mid-range. */
+       if (vd->vdev_initialize_last_offset > physical_rs.rs_start) {
+               zfs_dbgmsg("range write: vd %s changed (%llu, %llu) to "
+                   "(%llu, %llu)", vd->vdev_path,
+                   (u_longlong_t)physical_rs.rs_start,
+                   (u_longlong_t)physical_rs.rs_end,
+                   (u_longlong_t)vd->vdev_initialize_last_offset,
+                   (u_longlong_t)physical_rs.rs_end);
+               ASSERT3U(physical_rs.rs_end, >,
+                   vd->vdev_initialize_last_offset);
+               physical_rs.rs_start = vd->vdev_initialize_last_offset;
+       }
+       ASSERT3U(physical_rs.rs_end, >=, physical_rs.rs_start);
+
+       /*
+        * With raidz, it's possible that the logical range does not live on
+        * this leaf vdev. We only add the physical range to this vdev's if it
+        * has a length greater than 0.
+        */
+       if (physical_rs.rs_end > physical_rs.rs_start) {
+               range_tree_add(vd->vdev_initialize_tree, physical_rs.rs_start,
+                   physical_rs.rs_end - physical_rs.rs_start);
+       } else {
+               ASSERT3U(physical_rs.rs_end, ==, physical_rs.rs_start);
+       }
+}
+
+static void
+vdev_initialize_thread(void *arg)
+{
+       vdev_t *vd = arg;
+       spa_t *spa = vd->vdev_spa;
+       int error = 0;
+       uint64_t ms_count = 0;
+
+       ASSERT(vdev_is_concrete(vd));
+       spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+
+       vd->vdev_initialize_last_offset = 0;
+       VERIFY0(vdev_initialize_load(vd));
+
+       abd_t *deadbeef = vdev_initialize_block_alloc();
+
+       vd->vdev_initialize_tree = range_tree_create(NULL, NULL);
+
+       for (uint64_t i = 0; !vd->vdev_detached &&
+           i < vd->vdev_top->vdev_ms_count; i++) {
+               metaslab_t *msp = vd->vdev_top->vdev_ms[i];
+
+               /*
+                * If we've expanded the top-level vdev or it's our
+                * first pass, calculate our progress.
+                */
+               if (vd->vdev_top->vdev_ms_count != ms_count) {
+                       vdev_initialize_calculate_progress(vd);
+                       ms_count = vd->vdev_top->vdev_ms_count;
+               }
+
+               vdev_initialize_ms_mark(msp);
+               mutex_enter(&msp->ms_lock);
+               vdev_initialize_ms_load(msp);
+
+               range_tree_walk(msp->ms_allocatable, vdev_initialize_range_add,
+                   vd);
+               mutex_exit(&msp->ms_lock);
+
+               spa_config_exit(spa, SCL_CONFIG, FTAG);
+               error = vdev_initialize_ranges(vd, deadbeef);
+               vdev_initialize_ms_unmark(msp);
+               spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
+
+               range_tree_vacate(vd->vdev_initialize_tree, NULL, NULL);
+               if (error != 0)
+                       break;
+       }
+
+       spa_config_exit(spa, SCL_CONFIG, FTAG);
+       mutex_enter(&vd->vdev_initialize_io_lock);
+       while (vd->vdev_initialize_inflight > 0) {
+               cv_wait(&vd->vdev_initialize_io_cv,
+                   &vd->vdev_initialize_io_lock);
+       }
+       mutex_exit(&vd->vdev_initialize_io_lock);
+
+       range_tree_destroy(vd->vdev_initialize_tree);
+       vdev_initialize_block_free(deadbeef);
+       vd->vdev_initialize_tree = NULL;
+
+       mutex_enter(&vd->vdev_initialize_lock);
+       if (!vd->vdev_initialize_exit_wanted && vdev_writeable(vd)) {
+               vdev_initialize_change_state(vd, VDEV_INITIALIZE_COMPLETE);
+       }
+       ASSERT(vd->vdev_initialize_thread != NULL ||
+           vd->vdev_initialize_inflight == 0);
+
+       /*
+        * Drop the vdev_initialize_lock while we sync out the
+        * txg since it's possible that a device might be trying to
+        * come online and must check to see if it needs to restart an
+        * initialization. That thread will be holding the spa_config_lock
+        * which would prevent the txg_wait_synced from completing.
+        */
+       mutex_exit(&vd->vdev_initialize_lock);
+       txg_wait_synced(spa_get_dsl(spa), 0);
+       mutex_enter(&vd->vdev_initialize_lock);
+
+       vd->vdev_initialize_thread = NULL;
+       cv_broadcast(&vd->vdev_initialize_cv);
+       mutex_exit(&vd->vdev_initialize_lock);
+}
+
+/*
+ * Initiates a device. Caller must hold vdev_initialize_lock.
+ * Device must be a leaf and not already be initializing.
+ */
+void
+vdev_initialize(vdev_t *vd)
+{
+       ASSERT(MUTEX_HELD(&vd->vdev_initialize_lock));
+       ASSERT(vd->vdev_ops->vdev_op_leaf);
+       ASSERT(vdev_is_concrete(vd));
+       ASSERT3P(vd->vdev_initialize_thread, ==, NULL);
+       ASSERT(!vd->vdev_detached);
+       ASSERT(!vd->vdev_initialize_exit_wanted);
+       ASSERT(!vd->vdev_top->vdev_removing);
+
+       vdev_initialize_change_state(vd, VDEV_INITIALIZE_ACTIVE);
+       vd->vdev_initialize_thread = thread_create(NULL, 0,
+           vdev_initialize_thread, vd, 0, &p0, TS_RUN, maxclsyspri);
+}
+
+/*
+ * Stop initializng a device, with the resultant initialing state being
+ * tgt_state. Blocks until the initializing thread has exited.
+ * Caller must hold vdev_initialize_lock and must not be writing to the spa
+ * config, as the initializing thread may try to enter the config as a reader
+ * before exiting.
+ */
+void
+vdev_initialize_stop(vdev_t *vd, vdev_initializing_state_t tgt_state)
+{
+       ASSERTV(spa_t *spa = vd->vdev_spa);
+       ASSERT(!spa_config_held(spa, SCL_CONFIG | SCL_STATE, RW_WRITER));
+
+       ASSERT(MUTEX_HELD(&vd->vdev_initialize_lock));
+       ASSERT(vd->vdev_ops->vdev_op_leaf);
+       ASSERT(vdev_is_concrete(vd));
+
+       /*
+        * Allow cancel requests to proceed even if the initialize thread
+        * has stopped.
+        */
+       if (vd->vdev_initialize_thread == NULL &&
+           tgt_state != VDEV_INITIALIZE_CANCELED) {
+               return;
+       }
+
+       vdev_initialize_change_state(vd, tgt_state);
+       vd->vdev_initialize_exit_wanted = B_TRUE;
+       while (vd->vdev_initialize_thread != NULL)
+               cv_wait(&vd->vdev_initialize_cv, &vd->vdev_initialize_lock);
+
+       ASSERT3P(vd->vdev_initialize_thread, ==, NULL);
+       vd->vdev_initialize_exit_wanted = B_FALSE;
+}
+
+static void
+vdev_initialize_stop_all_impl(vdev_t *vd, vdev_initializing_state_t tgt_state)
+{
+       if (vd->vdev_ops->vdev_op_leaf && vdev_is_concrete(vd)) {
+               mutex_enter(&vd->vdev_initialize_lock);
+               vdev_initialize_stop(vd, tgt_state);
+               mutex_exit(&vd->vdev_initialize_lock);
+               return;
+       }
+
+       for (uint64_t i = 0; i < vd->vdev_children; i++) {
+               vdev_initialize_stop_all_impl(vd->vdev_child[i], tgt_state);
+       }
+}
+
+/*
+ * Convenience function to stop initializing of a vdev tree and set all
+ * initialize thread pointers to NULL.
+ */
+void
+vdev_initialize_stop_all(vdev_t *vd, vdev_initializing_state_t tgt_state)
+{
+       vdev_initialize_stop_all_impl(vd, tgt_state);
+
+       if (vd->vdev_spa->spa_sync_on) {
+               /* Make sure that our state has been synced to disk */
+               txg_wait_synced(spa_get_dsl(vd->vdev_spa), 0);
+       }
+}
+
+void
+vdev_initialize_restart(vdev_t *vd)
+{
+       ASSERT(MUTEX_HELD(&spa_namespace_lock));
+       ASSERT(!spa_config_held(vd->vdev_spa, SCL_ALL, RW_WRITER));
+
+       if (vd->vdev_leaf_zap != 0) {
+               mutex_enter(&vd->vdev_initialize_lock);
+               uint64_t initialize_state = VDEV_INITIALIZE_NONE;
+               int err = zap_lookup(vd->vdev_spa->spa_meta_objset,
+                   vd->vdev_leaf_zap, VDEV_LEAF_ZAP_INITIALIZE_STATE,
+                   sizeof (initialize_state), 1, &initialize_state);
+               ASSERT(err == 0 || err == ENOENT);
+               vd->vdev_initialize_state = initialize_state;
+
+               uint64_t timestamp = 0;
+               err = zap_lookup(vd->vdev_spa->spa_meta_objset,
+                   vd->vdev_leaf_zap, VDEV_LEAF_ZAP_INITIALIZE_ACTION_TIME,
+                   sizeof (timestamp), 1, &timestamp);
+               ASSERT(err == 0 || err == ENOENT);
+               vd->vdev_initialize_action_time = (time_t)timestamp;
+
+               if (vd->vdev_initialize_state == VDEV_INITIALIZE_SUSPENDED ||
+                   vd->vdev_offline) {
+                       /* load progress for reporting, but don't resume */
+                       VERIFY0(vdev_initialize_load(vd));
+               } else if (vd->vdev_initialize_state ==
+                   VDEV_INITIALIZE_ACTIVE && vdev_writeable(vd)) {
+                       vdev_initialize(vd);
+               }
+
+               mutex_exit(&vd->vdev_initialize_lock);
+       }
+
+       for (uint64_t i = 0; i < vd->vdev_children; i++) {
+               vdev_initialize_restart(vd->vdev_child[i]);
+       }
+}
+
+#if defined(_KERNEL)
+EXPORT_SYMBOL(vdev_initialize_restart);
+EXPORT_SYMBOL(vdev_xlate);
+EXPORT_SYMBOL(vdev_initialize_stop_all);
+EXPORT_SYMBOL(vdev_initialize);
+EXPORT_SYMBOL(vdev_initialize_stop);
+
+/* CSTYLED */
+module_param(zfs_initialize_value, ulong, 0644);
+MODULE_PARM_DESC(zfs_initialize_value,
+       "Value written during zpool initialize");
+#endif
diff --git a/module/zfs/vdev_mirror.c b/module/zfs/vdev_mirror.c

index 65357d841805917876d023afe444cf9a118e5809..b45c05db28a45ff95a963350ad214b896670178a 100644 (file)
--- a/module/zfs/vdev_mirror.c
+++ b/module/zfs/vdev_mirror.c
@@ -740,6 +740,7 @@ vdev_ops_t vdev_mirror_ops = {
         NULL,
         NULL,
         NULL,
+       vdev_default_xlate,
         VDEV_TYPE_MIRROR,       /* name of this vdev type */
         B_FALSE                 /* not a leaf vdev */
  };
@@ -755,6 +756,7 @@ vdev_ops_t vdev_replacing_ops = {
         NULL,
         NULL,
         NULL,
+       vdev_default_xlate,
         VDEV_TYPE_REPLACING,    /* name of this vdev type */
         B_FALSE                 /* not a leaf vdev */
  };
@@ -770,6 +772,7 @@ vdev_ops_t vdev_spare_ops = {
         NULL,
         NULL,
         NULL,
+       vdev_default_xlate,
         VDEV_TYPE_SPARE,        /* name of this vdev type */
         B_FALSE                 /* not a leaf vdev */
  };
diff --git a/module/zfs/vdev_missing.c b/module/zfs/vdev_missing.c

index b1c039f167f26086ee8c8f33456ae1b16db9bde0..d85993bff05246c241e46558fa33f0c4e0760071 100644 (file)
--- a/module/zfs/vdev_missing.c
+++ b/module/zfs/vdev_missing.c
@@ -24,7 +24,7 @@
   */
  
  /*
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
   */
  
  /*
@@ -90,6 +90,7 @@ vdev_ops_t vdev_missing_ops = {
         NULL,
         NULL,
         NULL,
+       NULL,
         VDEV_TYPE_MISSING,      /* name of this vdev type */
         B_TRUE                  /* leaf vdev */
  };
@@ -105,6 +106,7 @@ vdev_ops_t vdev_hole_ops = {
         NULL,
         NULL,
         NULL,
+       NULL,
         VDEV_TYPE_HOLE,         /* name of this vdev type */
         B_TRUE                  /* leaf vdev */
  };
diff --git a/module/zfs/vdev_queue.c b/module/zfs/vdev_queue.c

index 89cdf7d81099708cc726cc24744dfe2037e6da37..939699cb837349f159867e7d032d05cfa68aac14 100644 (file)
--- a/module/zfs/vdev_queue.c
+++ b/module/zfs/vdev_queue.c
@@ -154,6 +154,8 @@ uint32_t zfs_vdev_scrub_min_active = 1;
  uint32_t zfs_vdev_scrub_max_active = 2;
  uint32_t zfs_vdev_removal_min_active = 1;
  uint32_t zfs_vdev_removal_max_active = 2;
+uint32_t zfs_vdev_initializing_min_active = 1;
+uint32_t zfs_vdev_initializing_max_active = 1;
  
  /*
   * When the pool has less than zfs_vdev_async_write_active_min_dirty_percent
@@ -261,6 +263,8 @@ vdev_queue_class_min_active(zio_priority_t p)
                 return (zfs_vdev_scrub_min_active);
         case ZIO_PRIORITY_REMOVAL:
                 return (zfs_vdev_removal_min_active);
+       case ZIO_PRIORITY_INITIALIZING:
+               return (zfs_vdev_initializing_min_active);
         default:
                 panic("invalid priority %u", p);
                 return (0);
@@ -331,6 +335,8 @@ vdev_queue_class_max_active(spa_t *spa, zio_priority_t p)
                 return (zfs_vdev_scrub_max_active);
         case ZIO_PRIORITY_REMOVAL:
                 return (zfs_vdev_removal_max_active);
+       case ZIO_PRIORITY_INITIALIZING:
+               return (zfs_vdev_initializing_max_active);
         default:
                 panic("invalid priority %u", p);
                 return (0);
@@ -718,8 +724,8 @@ again:
         }
  
         /*
-        * For LBA-ordered queues (async / scrub), issue the i/o which follows
-        * the most recently issued i/o in LBA (offset) order.
+        * For LBA-ordered queues (async / scrub / initializing), issue the
+        * i/o which follows the most recently issued i/o in LBA (offset) order.
          *
          * For FIFO queues (sync), issue the i/o with the lowest timestamp.
          */
@@ -775,13 +781,15 @@ vdev_queue_io(zio_t *zio)
                 if (zio->io_priority != ZIO_PRIORITY_SYNC_READ &&
                     zio->io_priority != ZIO_PRIORITY_ASYNC_READ &&
                     zio->io_priority != ZIO_PRIORITY_SCRUB &&
-                   zio->io_priority != ZIO_PRIORITY_REMOVAL)
+                   zio->io_priority != ZIO_PRIORITY_REMOVAL &&
+                   zio->io_priority != ZIO_PRIORITY_INITIALIZING)
                         zio->io_priority = ZIO_PRIORITY_ASYNC_READ;
         } else {
                 ASSERT(zio->io_type == ZIO_TYPE_WRITE);
                 if (zio->io_priority != ZIO_PRIORITY_SYNC_WRITE &&
                     zio->io_priority != ZIO_PRIORITY_ASYNC_WRITE &&
-                   zio->io_priority != ZIO_PRIORITY_REMOVAL)
+                   zio->io_priority != ZIO_PRIORITY_REMOVAL &&
+                   zio->io_priority != ZIO_PRIORITY_INITIALIZING)
                         zio->io_priority = ZIO_PRIORITY_ASYNC_WRITE;
         }
  
@@ -938,11 +946,29 @@ module_param(zfs_vdev_async_write_min_active, int, 0644);
  MODULE_PARM_DESC(zfs_vdev_async_write_min_active,
         "Min active async write I/Os per vdev");
  
+module_param(zfs_vdev_initializing_max_active, int, 0644);
+MODULE_PARM_DESC(zfs_vdev_initializing_max_active,
+       "Max active initializing I/Os per vdev");
+
+module_param(zfs_vdev_initializing_min_active, int, 0644);
+MODULE_PARM_DESC(zfs_vdev_initializing_min_active,
+       "Min active initializing I/Os per vdev");
+
+module_param(zfs_vdev_removal_max_active, int, 0644);
+MODULE_PARM_DESC(zfs_vdev_removal_max_active,
+       "Max active removal I/Os per vdev");
+
+module_param(zfs_vdev_removal_min_active, int, 0644);
+MODULE_PARM_DESC(zfs_vdev_removal_min_active,
+       "Min active removal I/Os per vdev");
+
  module_param(zfs_vdev_scrub_max_active, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_scrub_max_active, "Max active scrub I/Os per vdev");
+MODULE_PARM_DESC(zfs_vdev_scrub_max_active,
+       "Max active scrub I/Os per vdev");
  
  module_param(zfs_vdev_scrub_min_active, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_scrub_min_active, "Min active scrub I/Os per vdev");
+MODULE_PARM_DESC(zfs_vdev_scrub_min_active,
+       "Min active scrub I/Os per vdev");
  
  module_param(zfs_vdev_sync_read_max_active, int, 0644);
  MODULE_PARM_DESC(zfs_vdev_sync_read_max_active,
diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c

index a21baf9c264b69720b3973c044ca6d3eb3e2e11f..d10d89f3eca797304b8e7dfa181af0e598f8dec9 100644 (file)
--- a/module/zfs/vdev_raidz.c
+++ b/module/zfs/vdev_raidz.c
@@ -36,6 +36,10 @@
  #include <sys/vdev_raidz.h>
  #include <sys/vdev_raidz_impl.h>
  
+#ifdef ZFS_DEBUG
+#include <sys/vdev_initialize.h>       /* vdev_xlate testing */
+#endif
+
  /*
   * Virtual device vector for RAID-Z.
   *
@@ -1627,6 +1631,39 @@ vdev_raidz_child_done(zio_t *zio)
         rc->rc_skipped = 0;
  }
  
+static void
+vdev_raidz_io_verify(zio_t *zio, raidz_map_t *rm, int col)
+{
+#ifdef ZFS_DEBUG
+       vdev_t *vd = zio->io_vd;
+       vdev_t *tvd = vd->vdev_top;
+
+       range_seg_t logical_rs, physical_rs;
+       logical_rs.rs_start = zio->io_offset;
+       logical_rs.rs_end = logical_rs.rs_start +
+           vdev_raidz_asize(zio->io_vd, zio->io_size);
+
+       raidz_col_t *rc = &rm->rm_col[col];
+       vdev_t *cvd = vd->vdev_child[rc->rc_devidx];
+
+       vdev_xlate(cvd, &logical_rs, &physical_rs);
+       ASSERT3U(rc->rc_offset, ==, physical_rs.rs_start);
+       ASSERT3U(rc->rc_offset, <, physical_rs.rs_end);
+       /*
+        * It would be nice to assert that rs_end is equal
+        * to rc_offset + rc_size but there might be an
+        * optional I/O at the end that is not accounted in
+        * rc_size.
+        */
+       if (physical_rs.rs_end > rc->rc_offset + rc->rc_size) {
+               ASSERT3U(physical_rs.rs_end, ==, rc->rc_offset +
+                   rc->rc_size + (1 << tvd->vdev_ashift));
+       } else {
+               ASSERT3U(physical_rs.rs_end, ==, rc->rc_offset + rc->rc_size);
+       }
+#endif
+}
+
  /*
   * Start an IO operation on a RAIDZ VDev
   *
@@ -1665,6 +1702,12 @@ vdev_raidz_io_start(zio_t *zio)
                 for (c = 0; c < rm->rm_cols; c++) {
                         rc = &rm->rm_col[c];
                         cvd = vd->vdev_child[rc->rc_devidx];
+
+                       /*
+                        * Verify physical to logical translation.
+                        */
+                       vdev_raidz_io_verify(zio, rm, c);
+
                         zio_nowait(zio_vdev_child_io(zio, NULL, cvd,
                             rc->rc_offset, rc->rc_abd, rc->rc_size,
                             zio->io_type, zio->io_priority, 0,
@@ -2323,6 +2366,37 @@ vdev_raidz_need_resilver(vdev_t *vd, uint64_t offset, size_t psize)
         return (B_FALSE);
  }
  
+static void
+vdev_raidz_xlate(vdev_t *cvd, const range_seg_t *in, range_seg_t *res)
+{
+       vdev_t *raidvd = cvd->vdev_parent;
+       ASSERT(raidvd->vdev_ops == &vdev_raidz_ops);
+
+       uint64_t width = raidvd->vdev_children;
+       uint64_t tgt_col = cvd->vdev_id;
+       uint64_t ashift = raidvd->vdev_top->vdev_ashift;
+
+       /* make sure the offsets are block-aligned */
+       ASSERT0(in->rs_start % (1 << ashift));
+       ASSERT0(in->rs_end % (1 << ashift));
+       uint64_t b_start = in->rs_start >> ashift;
+       uint64_t b_end = in->rs_end >> ashift;
+
+       uint64_t start_row = 0;
+       if (b_start > tgt_col) /* avoid underflow */
+               start_row = ((b_start - tgt_col - 1) / width) + 1;
+
+       uint64_t end_row = 0;
+       if (b_end > tgt_col)
+               end_row = ((b_end - tgt_col - 1) / width) + 1;
+
+       res->rs_start = start_row << ashift;
+       res->rs_end = end_row << ashift;
+
+       ASSERT3U(res->rs_start, <=, in->rs_start);
+       ASSERT3U(res->rs_end - res->rs_start, <=, in->rs_end - in->rs_start);
+}
+
  vdev_ops_t vdev_raidz_ops = {
         vdev_raidz_open,
         vdev_raidz_close,
@@ -2334,6 +2408,7 @@ vdev_ops_t vdev_raidz_ops = {
         NULL,
         NULL,
         NULL,
+       vdev_raidz_xlate,
         VDEV_TYPE_RAIDZ,        /* name of this vdev type */
         B_FALSE                 /* not a leaf vdev */
  };
diff --git a/module/zfs/vdev_removal.c b/module/zfs/vdev_removal.c

index a706bc2a425a186d50dbd3dcc2e6dc9c8f61b3c9..d0824aa843f7891c0d0d9d1a3e4d9cf36357f70b 100644 (file)
--- a/module/zfs/vdev_removal.c
+++ b/module/zfs/vdev_removal.c
@@ -44,6 +44,7 @@
  #include <sys/vdev_indirect_births.h>
  #include <sys/vdev_indirect_mapping.h>
  #include <sys/abd.h>
+#include <sys/vdev_initialize.h>
  #include <sys/trace_vdev.h>
  
  /*
@@ -1186,6 +1187,7 @@ vdev_remove_complete(spa_t *spa)
         txg_wait_synced(spa->spa_dsl_pool, 0);
         txg = spa_vdev_enter(spa);
         vdev_t *vd = vdev_lookup_top(spa, spa->spa_vdev_removal->svr_vdev_id);
+       ASSERT3P(vd->vdev_initialize_thread, ==, NULL);
  
         sysevent_t *ev = spa_event_create(spa, vd, NULL,
             ESC_ZFS_VDEV_REMOVE_DEV);
@@ -1896,6 +1898,9 @@ spa_vdev_remove_log(vdev_t *vd, uint64_t *txg)
  
         spa_vdev_config_exit(spa, NULL, *txg, 0, FTAG);
  
+       /* Stop initializing */
+       (void) vdev_initialize_stop_all(vd, VDEV_INITIALIZE_CANCELED);
+
         *txg = spa_vdev_config_enter(spa);
  
         sysevent_t *ev = spa_event_create(spa, vd, NULL,
@@ -2072,6 +2077,13 @@ spa_vdev_remove_top(vdev_t *vd, uint64_t *txg)
          */
         error = spa_reset_logs(spa);
  
+       /*
+        * We stop any initializing that is currently in progress but leave
+        * the state as "active". This will allow the initializing to resume
+        * if the removal is canceled sometime later.
+        */
+       vdev_initialize_stop_all(vd, VDEV_INITIALIZE_ACTIVE);
+
         *txg = spa_vdev_config_enter(spa);
  
         /*
@@ -2083,6 +2095,7 @@ spa_vdev_remove_top(vdev_t *vd, uint64_t *txg)
  
         if (error != 0) {
                 metaslab_group_activate(mg);
+               spa_async_request(spa, SPA_ASYNC_INITIALIZE_RESTART);
                 return (error);
         }
  
diff --git a/module/zfs/vdev_root.c b/module/zfs/vdev_root.c

index 9f86cbfa41341f15be40b1146e1b5b8f5f610fa5..e40b7ce8e4e8f3913e8e0567599c5d500bd8c311 100644 (file)
--- a/module/zfs/vdev_root.c
+++ b/module/zfs/vdev_root.c
@@ -24,7 +24,7 @@
   */
  
  /*
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
   */
  
  #include <sys/zfs_context.h>
@@ -150,6 +150,7 @@ vdev_ops_t vdev_root_ops = {
         NULL,
         NULL,
         NULL,
+       NULL,
         VDEV_TYPE_ROOT,         /* name of this vdev type */
         B_FALSE                 /* not a leaf vdev */
  };
diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c

index a71da28374729760eeeba52ae8b12bf783593786..3c36502d8599e321830b0591c9c919f6ca198d82 100644 (file)
--- a/module/zfs/zfs_ioctl.c
+++ b/module/zfs/zfs_ioctl.c
@@ -202,6 +202,8 @@
  #include <sys/zio_checksum.h>
  #include <sys/vdev_removal.h>
  #include <sys/zfs_sysfs.h>
+#include <sys/vdev_impl.h>
+#include <sys/vdev_initialize.h>
  
  #include <linux/miscdevice.h>
  #include <linux/slab.h>
@@ -3842,6 +3844,85 @@ zfs_ioc_destroy(zfs_cmd_t *zc)
         return (err);
  }
  
+/*
+ * innvl: {
+ *     vdevs: {
+ *         guid 1, guid 2, ...
+ *     },
+ *     func: POOL_INITIALIZE_{CANCEL|DO|SUSPEND}
+ * }
+ *
+ * outnvl: {
+ *     [func: EINVAL (if provided command type didn't make sense)],
+ *     [vdevs: {
+ *         guid1: errno, (see function body for possible errnos)
+ *         ...
+ *     }]
+ * }
+ *
+ */
+static const zfs_ioc_key_t zfs_keys_pool_initialize[] = {
+       {ZPOOL_INITIALIZE_COMMAND,      DATA_TYPE_UINT64,       0},
+       {ZPOOL_INITIALIZE_VDEVS,        DATA_TYPE_NVLIST,       0}
+};
+
+static int
+zfs_ioc_pool_initialize(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+       spa_t *spa;
+       int error;
+
+       error = spa_open(poolname, &spa, FTAG);
+       if (error != 0)
+               return (error);
+
+       uint64_t cmd_type;
+       if (nvlist_lookup_uint64(innvl, ZPOOL_INITIALIZE_COMMAND,
+           &cmd_type) != 0) {
+               spa_close(spa, FTAG);
+               return (SET_ERROR(EINVAL));
+       }
+       if (!(cmd_type == POOL_INITIALIZE_CANCEL ||
+           cmd_type == POOL_INITIALIZE_DO ||
+           cmd_type == POOL_INITIALIZE_SUSPEND)) {
+               spa_close(spa, FTAG);
+               return (SET_ERROR(EINVAL));
+       }
+
+       nvlist_t *vdev_guids;
+       if (nvlist_lookup_nvlist(innvl, ZPOOL_INITIALIZE_VDEVS,
+           &vdev_guids) != 0) {
+               spa_close(spa, FTAG);
+               return (SET_ERROR(EINVAL));
+       }
+
+       nvlist_t *vdev_errlist = fnvlist_alloc();
+       int total_errors = 0;
+
+       for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
+           pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
+               uint64_t vdev_guid = fnvpair_value_uint64(pair);
+
+               error = spa_vdev_initialize(spa, vdev_guid, cmd_type);
+               if (error != 0) {
+                       char guid_as_str[MAXNAMELEN];
+
+                       (void) snprintf(guid_as_str, sizeof (guid_as_str),
+                           "%llu", (unsigned long long)vdev_guid);
+                       fnvlist_add_int64(vdev_errlist, guid_as_str, error);
+                       total_errors++;
+               }
+       }
+       if (fnvlist_size(vdev_errlist) > 0) {
+               fnvlist_add_nvlist(outnvl, ZPOOL_INITIALIZE_VDEVS,
+                   vdev_errlist);
+       }
+       fnvlist_free(vdev_errlist);
+
+       spa_close(spa, FTAG);
+       return (total_errors > 0 ? EINVAL : 0);
+}
+
  /*
   * fsname is name of dataset to rollback (to most recent snapshot)
   *
@@ -6453,6 +6534,11 @@ zfs_ioctl_init(void)
             zfs_keys_pool_discard_checkpoint,
             ARRAY_SIZE(zfs_keys_pool_discard_checkpoint));
  
+       zfs_ioctl_register("initialize", ZFS_IOC_POOL_INITIALIZE,
+           zfs_ioc_pool_initialize, zfs_secpolicy_config, POOL_NAME,
+           POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
+           zfs_keys_pool_initialize, ARRAY_SIZE(zfs_keys_pool_initialize));
+
         /* IOCTLS that use the legacy function signature */
  
         zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
diff --git a/module/zfs/zfs_sysfs.c b/module/zfs/zfs_sysfs.c

index b17c91f6527c09a962940fcbf5bd3046fd047090..87c4ac117c14199784583cace61a813f323243b5 100644 (file)
--- a/module/zfs/zfs_sysfs.c
+++ b/module/zfs/zfs_sysfs.c
@@ -358,6 +358,7 @@ pool_property_show(struct kobject *kobj, struct attribute *attr, char *buf)
   */
  static const char *zfs_features[]  = {
         /* --> Add new kernel features here (post ZoL 0.8.0) */
+       "vdev_initialize"
  };
  
  #define        ZFS_FEATURE_COUNT       ARRAY_SIZE(zfs_features)
diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run

index 4463bfa1e71f9d7ae7f88e29983cf6271f75513c..38f040126a82757ed242292f75208e085ab8e33d 100644 (file)
--- a/tests/runfiles/linux.run
+++ b/tests/runfiles/linux.run
@@ -397,6 +397,21 @@ pre =
  post =
  tags = ['functional', 'cli_root', 'zpool_labelclear']
  
+[tests/functional/cli_root/zpool_initialize]
+tests = ['zpool_initialize_attach_detach_add_remove',
+    'zpool_initialize_import_export',
+    'zpool_initialize_offline_export_import_online',
+    'zpool_initialize_online_offline',
+    'zpool_initialize_split',
+    'zpool_initialize_start_and_cancel_neg',
+    'zpool_initialize_start_and_cancel_pos',
+    'zpool_initialize_suspend_resume',
+    'zpool_initialize_unsupported_vdevs',
+    'zpool_initialize_verify_checksums',
+    'zpool_initialize_verify_initialized']
+pre =
+tags = ['functional', 'cli_root', 'zpool_initialize']
+
  [tests/functional/cli_root/zpool_offline]
  tests = ['zpool_offline_001_pos', 'zpool_offline_002_neg',
      'zpool_offline_003_pos']
diff --git a/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c b/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c

index 88a18d8f0f6a288db756d6a4b03e3b0286b14562..61ac2feccac36d7bbd9e2a17d751f8c49d55636e 100644 (file)
--- a/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c
+++ b/tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c
@@ -642,6 +642,22 @@ test_unload_key(const char *dataset)
         IOC_INPUT_TEST(ZFS_IOC_UNLOAD_KEY, dataset, NULL, NULL, EACCES);
  }
  
+static void
+test_vdev_initialize(const char *pool)
+{
+       nvlist_t *required = fnvlist_alloc();
+       nvlist_t *vdev_guids = fnvlist_alloc();
+
+       fnvlist_add_uint64(vdev_guids, "path", 0xdeadbeefdeadbeef);
+       fnvlist_add_uint64(required, ZPOOL_INITIALIZE_COMMAND,
+           POOL_INITIALIZE_DO);
+       fnvlist_add_nvlist(required, ZPOOL_INITIALIZE_VDEVS, vdev_guids);
+
+       IOC_INPUT_TEST(ZFS_IOC_POOL_INITIALIZE, pool, required, NULL, EINVAL);
+       nvlist_free(vdev_guids);
+       nvlist_free(required);
+}
+
  static int
  zfs_destroy(const char *dataset)
  {
@@ -732,6 +748,8 @@ zfs_ioc_input_tests(const char *pool)
         test_change_key(dataset);
         test_unload_key(dataset);
  
+       test_vdev_initialize(pool);
+
         /*
          * cleanup
          */
@@ -869,6 +887,7 @@ validate_ioc_values(void)
             ZFS_IOC_BASE + 76 == ZFS_IOC_REMAP &&
             ZFS_IOC_BASE + 77 == ZFS_IOC_POOL_CHECKPOINT &&
             ZFS_IOC_BASE + 78 == ZFS_IOC_POOL_DISCARD_CHECKPOINT &&
+           ZFS_IOC_BASE + 79 == ZFS_IOC_POOL_INITIALIZE &&
             LINUX_IOC_BASE + 1 == ZFS_IOC_EVENTS_NEXT &&
             LINUX_IOC_BASE + 2 == ZFS_IOC_EVENTS_CLEAR &&
             LINUX_IOC_BASE + 3 == ZFS_IOC_EVENTS_SEEK);
diff --git a/tests/zfs-tests/include/commands.cfg b/tests/zfs-tests/include/commands.cfg

index 8ced03e9301a5c56f9888c5fbc2960604d35fa05..78ba2488d460dc655da9a7adff65bbfbbda662fc 100644 (file)
--- a/tests/zfs-tests/include/commands.cfg
+++ b/tests/zfs-tests/include/commands.cfg
@@ -82,6 +82,7 @@ export SYSTEM_FILES='arp
      mv
      net
      nproc
+    od
      openssl
      parted
      pax
diff --git a/tests/zfs-tests/tests/functional/cli_root/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/Makefile.am

index 13ff889d81966d8da527f3f985ba531e5177b361..625cf8579f821d23f4f230fc5183b0b39ed1ca6a 100644 (file)
--- a/tests/zfs-tests/tests/functional/cli_root/Makefile.am
+++ b/tests/zfs-tests/tests/functional/cli_root/Makefile.am
@@ -46,6 +46,7 @@ SUBDIRS = \
         zpool_get \
         zpool_history \
         zpool_import \
+       zpool_initialize \
         zpool_labelclear \
         zpool_offline \
         zpool_online \
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_clear/zpool_clear_001_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_clear/zpool_clear_001_pos.ksh

index 0cd9b595958b5ba188d1e4456b6325f371680f95..79ceaabd0dd07faf3fe1ce044a0c1e2bc5917209 100755 (executable)
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_clear/zpool_clear_001_pos.ksh
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_clear/zpool_clear_001_pos.ksh
@@ -150,7 +150,7 @@ function do_testing #<clear type> <vdevs>
  
         #
         # Make errors to the testing pool by overwrite the vdev device with
-       # /usr/bin/dd command. We do not want to have a full overwrite. That
+       # dd command. We do not want to have a full overwrite. That
         # may cause the system panic. So, we should skip the vdev label space.
         #
         (( i = $RANDOM % 3 ))
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/Makefile.am

new file mode 100644 (file)

index 0000000..a0a0e0b
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/Makefile.am
@@ -0,0 +1,18 @@
+pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cli_root/zpool_initialize
+dist_pkgdata_SCRIPTS = \
+       cleanup.ksh \
+       zpool_initialize_attach_detach_add_remove.ksh \
+       zpool_initialize_import_export.ksh \
+       zpool_initialize.kshlib \
+       zpool_initialize_offline_export_import_online.ksh \
+       zpool_initialize_online_offline.ksh \
+       zpool_initialize_split.ksh \
+       zpool_initialize_start_and_cancel_neg.ksh \
+       zpool_initialize_start_and_cancel_pos.ksh \
+       zpool_initialize_suspend_resume.ksh \
+       zpool_initialize_unsupported_vdevs.ksh \
+       zpool_initialize_verify_checksums.ksh \
+       zpool_initialize_verify_initialized.ksh
+
+dist_pkgdata_DATA = \
+       zpool_initialize.kshlib
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/cleanup.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/cleanup.ksh

new file mode 100755 (executable)

index 0000000..3c7dbd3
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/cleanup.ksh
@@ -0,0 +1,31 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2016 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+verify_runnable "global"
+
+default_cleanup
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize.kshlib b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize.kshlib

new file mode 100644 (file)

index 0000000..0f4e7f0
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize.kshlib
@@ -0,0 +1,43 @@
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2016 by Delphix. All rights reserved.
+#
+
+function initialize_prog_line # pool disk
+{
+        typeset pool="$1"
+        typeset disk="$2"
+        zpool status "$pool" | grep "$disk" | grep "initialized"
+}
+
+function initialize_progress # pool disk
+{
+        initialize_prog_line "$1" "$2" | \
+           sed 's/.*(\([0-9]\{1,\}\)% initialized.*/\1/g'
+}
+
+function cleanup
+{
+        if poolexists $TESTPOOL; then
+                log_must zpool destroy -f $TESTPOOL
+        fi
+
+       if poolexists $TESTPOOL1; then
+               log_must zpool destroy -f $TESTPOOL1
+       fi
+}
+log_onexit cleanup
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_attach_detach_add_remove.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_attach_detach_add_remove.ksh

new file mode 100755 (executable)

index 0000000..2a69502
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_attach_detach_add_remove.ksh
@@ -0,0 +1,68 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2016 by Delphix. All rights reserved.
+#
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_initialize/zpool_initialize.kshlib
+
+#
+# DESCRIPTION:
+# Detaching/attaching, adding/removing data devices works with initializing.
+#
+# STRATEGY:
+# 1. Create a single-disk pool.
+# 2. Start initializing.
+# 3. Attach a second disk, ensure initializing continues.
+# 4. Detach the second disk, ensure initializing continues.
+# 5. Add a second disk, ensure initializing continues.
+# 6. Remove the first disk, ensure initializing stops.
+#
+
+DISK1="$(echo $DISKS | cut -d' ' -f1)"
+DISK2="$(echo $DISKS | cut -d' ' -f2)"
+
+log_must zpool create -f $TESTPOOL $DISK1
+
+log_must zpool initialize $TESTPOOL $DISK1
+progress="$(initialize_progress $TESTPOOL $DISK1)"
+[[ -z "$progress" ]] && log_fail "Initializing did not start"
+
+log_must zpool attach $TESTPOOL $DISK1 $DISK2
+new_progress="$(initialize_progress $TESTPOOL $DISK1)"
+[[ "$progress" -le "$new_progress" ]] || \
+        log_fail "Lost initializing progress on demotion to child vdev"
+progress="$new_progress"
+
+log_must zpool detach $TESTPOOL $DISK2
+new_progress="$(initialize_progress $TESTPOOL $DISK1)"
+[[ "$progress" -le "$new_progress" ]] || \
+        log_fail "Lost initializing progress on promotion to top vdev"
+progress="$new_progress"
+
+log_must zpool add $TESTPOOL $DISK2
+log_must zpool remove $TESTPOOL $DISK1
+[[ -z "$(initialize_prog_line $TESTPOOL $DISK1)" ]] || \
+        log_fail "Initializing continued after initiating removal"
+
+log_pass "Initializing worked as expected across attach/detach and add/remove"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_import_export.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_import_export.ksh

new file mode 100755 (executable)

index 0000000..386d2a5
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_import_export.ksh
@@ -0,0 +1,78 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2016 by Delphix. All rights reserved.
+#
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_initialize/zpool_initialize.kshlib
+
+#
+# DESCRIPTION:
+# Initializing automatically resumes across import/export.
+#
+# STRATEGY:
+# 1. Create a one-disk pool.
+# 2. Start initializing and verify that initializing is active.
+# 3. Export the pool.
+# 4. Import the pool.
+# 5. Verify that initializing resumes and progress does not regress.
+# 6. Suspend initializing.
+# 7. Repeat steps 3-4.
+# 8. Verify that progress does not regress but initializing is still suspended.
+#
+
+DISK1=${DISKS%% *}
+
+log_must zpool create -f $TESTPOOL $DISK1
+log_must zpool initialize $TESTPOOL
+
+sleep 2
+
+progress="$(initialize_progress $TESTPOOL $DISK1)"
+[[ -z "$progress" ]] && log_fail "Initializing did not start"
+
+log_must zpool export $TESTPOOL
+log_must zpool import $TESTPOOL
+
+new_progress="$(initialize_progress $TESTPOOL $DISK1)"
+[[ -z "$new_progress" ]] && log_fail "Initializing did not restart after import"
+[[ "$progress" -le "$new_progress" ]] || \
+    log_fail "Initializing lost progress after import"
+log_mustnot eval "initialize_prog_line $TESTPOOL $DISK1 | grep suspended"
+
+log_must zpool initialize -s $TESTPOOL $DISK1
+action_date="$(initialize_prog_line $TESTPOOL $DISK1 | \
+    sed 's/.*ed at \(.*\)).*/\1/g')"
+log_must zpool export $TESTPOOL
+log_must zpool import $TESTPOOL
+new_action_date=$(initialize_prog_line $TESTPOOL $DISK1 | \
+    sed 's/.*ed at \(.*\)).*/\1/g')
+[[ "$action_date" != "$new_action_date" ]] && \
+    log_fail "Initializing action date did not persist across export/import"
+
+[[ "$new_progress" -le "$(initialize_progress $TESTPOOL $DISK1)" ]] || \
+        log_fail "Initializing lost progress after import"
+
+log_must eval "initialize_prog_line $TESTPOOL $DISK1 | grep suspended"
+
+log_pass "Initializing retains state as expected across export/import"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_offline_export_import_online.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_offline_export_import_online.ksh

new file mode 100755 (executable)

index 0000000..dedd466
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_offline_export_import_online.ksh
@@ -0,0 +1,66 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2016 by Delphix. All rights reserved.
+#
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_initialize/zpool_initialize.kshlib
+
+#
+# DESCRIPTION:
+# Miscellaneous complex sequences of operations function as expected.
+#
+# STRATEGY:
+# 1. Create a pool with a two-way mirror.
+# 2. Start initializing, offline, export, import, online and verify that
+#    initializing state is preserved / initializing behaves as expected
+#    at each step.
+#
+
+DISK1="$(echo $DISKS | cut -d' ' -f1)"
+DISK2="$(echo $DISKS | cut -d' ' -f2)"
+
+log_must zpool create -f $TESTPOOL mirror $DISK1 $DISK2
+
+log_must zpool initialize $TESTPOOL $DISK1
+log_must zpool offline $TESTPOOL $DISK1
+progress="$(initialize_progress $TESTPOOL $DISK1)"
+[[ -z "$progress" ]] && log_fail "Initializing did not start"
+log_mustnot eval "initialize_prog_line $TESTPOOL $DISK1 | grep suspended"
+
+log_must zpool export $TESTPOOL
+log_must zpool import $TESTPOOL
+
+new_progress="$(initialize_progress $TESTPOOL $DISK1)"
+[[ -z "$new_progress" ]] && log_fail "Initializing did not start after import"
+[[ "$new_progress" -ge "$progress" ]] || \
+    log_fail "Initializing lost progress after import"
+log_mustnot eval "initialize_prog_line $TESTPOOL $DISK1 | grep suspended"
+
+log_must zpool online $TESTPOOL $DISK1
+new_progress="$(initialize_progress $TESTPOOL $DISK1)"
+[[ "$new_progress" -ge "$progress" ]] || \
+    log_fail "Initializing lost progress after online"
+
+log_pass "Initializing behaves as expected at each step of:" \
+    "initialize + offline + export + import + online"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_online_offline.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_online_offline.ksh

new file mode 100755 (executable)

index 0000000..55bd318
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_online_offline.ksh
@@ -0,0 +1,74 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2016 by Delphix. All rights reserved.
+#
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_initialize/zpool_initialize.kshlib
+
+#
+# DESCRIPTION:
+# Initializing automatically resumes across offline/online.
+#
+# STRATEGY:
+# 1. Create a pool with a two-way mirror.
+# 2. Start initializing one of the disks and verify that initializing is active.
+# 3. Offline the disk.
+# 4. Online the disk.
+# 5. Verify that initializing resumes and progress does not regress.
+# 6. Suspend initializing.
+# 7. Repeat steps 3-4 and verify that initializing does not resume.
+#
+
+DISK1=${DISKS%% *}
+DISK2="$(echo $DISKS | cut -d' ' -f2)"
+
+log_must zpool create -f $TESTPOOL mirror $DISK1 $DISK2
+log_must zpool initialize $TESTPOOL $DISK1
+
+log_must zpool offline $TESTPOOL $DISK1
+
+progress="$(initialize_progress $TESTPOOL $DISK1)"
+[[ -z "$progress" ]] && log_fail "Initializing did not start"
+
+log_must zpool online $TESTPOOL $DISK1
+
+new_progress="$(initialize_progress $TESTPOOL $DISK1)"
+[[ -z "$new_progress" ]] && \
+    log_fail "Initializing did not restart after onlining"
+[[ "$progress" -le "$new_progress" ]] || \
+    log_fail "Initializing lost progress after onlining"
+log_mustnot eval "initialize_prog_line $TESTPOOL $DISK1 | grep suspended"
+
+log_must zpool initialize -s $TESTPOOL $DISK1
+action_date="$(initialize_prog_line $TESTPOOL $DISK1 | \
+    sed 's/.*ed at \(.*\)).*/\1/g')"
+log_must zpool offline $TESTPOOL $DISK1
+log_must zpool online $TESTPOOL $DISK1
+new_action_date=$(initialize_prog_line $TESTPOOL $DISK1 | \
+    sed 's/.*ed at \(.*\)).*/\1/g')
+[[ "$action_date" != "$new_action_date" ]] && \
+    log_fail "Initializing action date did not persist across offline/online"
+log_must eval "initialize_prog_line $TESTPOOL $DISK1 | grep suspended"
+
+log_pass "Initializing performs as expected across offline/online"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_split.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_split.ksh

new file mode 100755 (executable)

index 0000000..69b27c2
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_split.ksh
@@ -0,0 +1,64 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2016 by Delphix. All rights reserved.
+#
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_initialize/zpool_initialize.kshlib
+
+#
+# DESCRIPTION:
+# Initializing state is preserved across zpool split.
+#
+# STRATEGY:
+# 1. Create a pool with a two-way mirror.
+# 2. Start initializing both devices.
+# 3. Split the pool. Ensure initializing continues on the original.
+# 4. Import the new pool. Ensure initializing resumes on it.
+#
+
+DISK1="$(echo $DISKS | cut -d' ' -f1)"
+DISK2="$(echo $DISKS | cut -d' ' -f2)"
+POOL2="${TESTPOOL}_split"
+
+log_must zpool create -f $TESTPOOL mirror $DISK1 $DISK2
+
+log_must zpool initialize $TESTPOOL $DISK1 $DISK2
+orig_prog1="$(initialize_progress $TESTPOOL $DISK1)"
+orig_prog2="$(initialize_progress $TESTPOOL $DISK2)"
+[[ -z "$orig_prog1" ]] && log_fail "Initializing did not start"
+
+log_must zpool split $TESTPOOL $TESTPOOL1 $DISK2
+
+# Ensure initializing continued as expected on the original pool.
+[[ "$(initialize_progress $TESTPOOL $DISK1)" -ge "$orig_prog1" ]] || \
+        log_fail "Initializing lost progress on original pool"
+log_mustnot eval "initialize_prog_line $TESTPOOL $DISK1 | grep suspended"
+
+log_must zpool import $TESTPOOL1
+
+[[ "$(initialize_progress $TESTPOOL1 $DISK2)" -ge "$orig_prog2" ]] || \
+        log_fail "Initializing lost progress on split pool"
+log_mustnot eval "initialize_prog_line $TESTPOOL1 $DISK1 | grep suspended"
+
+log_pass "Initializing behaves as expected on zpool split"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_start_and_cancel_neg.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_start_and_cancel_neg.ksh

new file mode 100755 (executable)

index 0000000..59b266d
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_start_and_cancel_neg.ksh
@@ -0,0 +1,60 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2016 by Delphix. All rights reserved.
+#
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_initialize/zpool_initialize.kshlib
+
+#
+# DESCRIPTION:
+# Cancelling and suspending initialize doesn't work if not all specified vdevs
+# are being initialized.
+#
+# STRATEGY:
+# 1. Create a three-disk pool.
+# 2. Start initializing and verify that initializing is active.
+# 3. Try to cancel and suspend initializing on the non-initializing disks.
+# 4. Try to re-initialize the currently initializing disk.
+#
+
+DISK1=${DISKS%% *}
+DISK2="$(echo $DISKS | cut -d' ' -f2)"
+DISK3="$(echo $DISKS | cut -d' ' -f3)"
+
+log_must zpool list -v
+log_must zpool create -f $TESTPOOL $DISK1 $DISK2 $DISK3
+log_must zpool initialize $TESTPOOL $DISK1
+
+[[ -z "$(initialize_progress $TESTPOOL $DISK1)" ]] && \
+    log_fail "Initialize did not start"
+
+log_mustnot zpool initialize -c $TESTPOOL $DISK2
+log_mustnot zpool initialize -c $TESTPOOL $DISK2 $DISK3
+
+log_mustnot zpool initialize -s $TESTPOOL $DISK2
+log_mustnot zpool initialize -s $TESTPOOL $DISK2 $DISK3
+
+log_mustnot zpool initialize $TESTPOOL $DISK1
+
+log_pass "Nonsensical initialize operations fail"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_start_and_cancel_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_start_and_cancel_pos.ksh

new file mode 100755 (executable)

index 0000000..5003b5f
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_start_and_cancel_pos.ksh
@@ -0,0 +1,52 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2016 by Delphix. All rights reserved.
+#
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_initialize/zpool_initialize.kshlib
+
+#
+# DESCRIPTION:
+# Starting and stopping an initialize works.
+#
+# STRATEGY:
+# 1. Create a one-disk pool.
+# 2. Start initializing and verify that initializing is active.
+# 3. Cancel initializing and verify that initializing is not active.
+#
+
+DISK1=${DISKS%% *}
+
+log_must zpool create -f $TESTPOOL $DISK1
+log_must zpool initialize $TESTPOOL
+
+[[ -z "$(initialize_progress $TESTPOOL $DISK1)" ]] && \
+    log_fail "Initialize did not start"
+
+log_must zpool initialize -c $TESTPOOL
+
+[[ -z "$(initialize_progress $TESTPOOL $DISK1)" ]] || \
+    log_fail "Initialize did not stop"
+
+log_pass "Initialize start + cancel works"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_suspend_resume.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_suspend_resume.ksh

new file mode 100755 (executable)

index 0000000..bce3da5
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_suspend_resume.ksh
@@ -0,0 +1,63 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2016 by Delphix. All rights reserved.
+#
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_initialize/zpool_initialize.kshlib
+
+#
+# DESCRIPTION:
+# Suspending and resuming initializing works.
+#
+# STRATEGY:
+# 1. Create a one-disk pool.
+# 2. Start initializing and verify that initializing is active.
+# 3. Wait 3 seconds, then suspend initializing and verify that the progress
+#    reporting says so.
+# 4. Wait 5 seconds and ensure initializing progress doesn't advance.
+# 5. Restart initializing and verify that the progress doesn't regress.
+#
+
+DISK1=${DISKS%% *}
+
+log_must zpool create -f $TESTPOOL $DISK1
+log_must zpool initialize $TESTPOOL
+
+[[ -z "$(initialize_progress $TESTPOOL $DISK1)" ]] && \
+    log_fail "Initializing did not start"
+
+sleep 5
+log_must zpool initialize -s $TESTPOOL
+log_must eval "initialize_prog_line $TESTPOOL $DISK1 | grep suspended"
+progress="$(initialize_progress $TESTPOOL $DISK1)"
+
+sleep 3
+[[ "$progress" -eq "$(initialize_progress $TESTPOOL $DISK1)" ]] || \
+        log_fail "Initializing progress advanced while suspended"
+
+log_must zpool initialize $TESTPOOL $DISK1
+[[ "$progress" -le "$(initialize_progress $TESTPOOL $DISK1)" ]] ||
+        log_fail "Initializing progress regressed after resuming"
+
+log_pass "Suspend + resume initializing works as expected"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_unsupported_vdevs.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_unsupported_vdevs.ksh

new file mode 100755 (executable)

index 0000000..bd4ca06
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_unsupported_vdevs.ksh
@@ -0,0 +1,74 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2016 by Delphix. All rights reserved.
+#
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_initialize/zpool_initialize.kshlib
+
+#
+# DESCRIPTION:
+# Attempting to initialize unsupported vdevs should fail.
+#
+# STRATEGY:
+# 1. Create a pool with the following configuration:
+#    root
+#      mirror
+#        vdev0
+#        vdev1 (offline)
+#      cache
+#        vdev2
+#      spare
+#        vdev3
+# 2. Try to initialize vdev1, vdev2, and vdev3. Ensure that all 3 fail.
+#
+function cleanup
+{
+        if datasetexists $TESTPOOL; then
+                log_must zpool destroy -f $TESTPOOL
+        fi
+        if [[ -d $TESTDIR ]]; then
+                log_must rm -rf $TESTDIR
+        fi
+}
+log_onexit cleanup
+
+log_must mkdir $TESTDIR
+set -A FDISKS
+for n in {0..2}; do
+        log_must mkfile $MINVDEVSIZE $TESTDIR/vdev$n
+        FDISKS+=("$TESTDIR/vdev$n")
+done
+FDISKS+=("${DISKS%% *}")
+
+log_must zpool create $TESTPOOL mirror ${FDISKS[0]} ${FDISKS[1]} \
+        spare ${FDISKS[2]} cache ${FDISKS[3]}
+
+log_must zpool offline $TESTPOOL ${FDISKS[1]}
+
+log_mustnot zpool initialize $TESTPOOL mirror-0
+for n in {1..3}; do
+        log_mustnot zpool initialize $TESTPOOL ${FDISKS[$n]}
+done
+
+log_pass "Attempting to initialize failed on unsupported devices"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_verify_checksums.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_verify_checksums.ksh

new file mode 100755 (executable)

index 0000000..9be752f
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_verify_checksums.ksh
@@ -0,0 +1,59 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2016 by Delphix. All rights reserved.
+#
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_initialize/zpool_initialize.kshlib
+
+#
+# DESCRIPTION:
+# Initializing does not cause file corruption.
+#
+# STRATEGY:
+# 1. Create a one-disk pool.
+# 2. Write data to the pool.
+# 3. Start initializing and verify that initializing is active.
+# 4. Write more data to the pool.
+# 5. Run zdb to validate checksums.
+#
+
+DISK1=${DISKS%% *}
+
+log_must zpool create -f $TESTPOOL $DISK1
+log_must dd if=/dev/urandom of=/$TESTPOOL/file1 bs=1M count=30
+log_must sync
+
+log_must zpool initialize $TESTPOOL
+
+log_must zdb -cc $TESTPOOL
+
+[[ -z "$(initialize_progress $TESTPOOL $DISK1)" ]] && \
+    log_fail "Initializing did not start"
+
+log_must dd if=/dev/urandom of=/$TESTPOOL/file2 bs=1M count=30
+log_must sync
+
+log_must zdb -cc $TESTPOOL
+
+log_pass "Initializing does not corrupt existing or new data"
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_verify_initialized.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_verify_initialized.ksh

new file mode 100755 (executable)

index 0000000..0fa6a0b
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_verify_initialized.ksh
@@ -0,0 +1,89 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2016 by Delphix. All rights reserved.
+#
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_initialize/zpool_initialize.kshlib
+
+#
+# DESCRIPTION:
+# After initializing, the disk is actually initialized.
+#
+# STRATEGY:
+# 1. Create a one-disk pool.
+# 2. Initialize the disk to completion.
+# 3. Load all metaslabs that don't have a spacemap, and make sure the entire
+#    metaslab has been filled with the initializing pattern (deadbeef).
+#
+
+function cleanup
+{
+       set_tunable64 zfs_initialize_value $ORIG_PATTERN
+        zpool import -d $TESTDIR $TESTPOOL
+
+        if datasetexists $TESTPOOL ; then
+                zpool destroy -f $TESTPOOL
+        fi
+        if [[ -d "$TESTDIR" ]]; then
+                rm -rf "$TESTDIR"
+        fi
+}
+log_onexit cleanup
+
+PATTERN="deadbeefdeadbeef"
+SMALLFILE="$TESTDIR/smallfile"
+
+ORIG_PATTERN=$(get_tunable zfs_initialize_value)
+log_must set_tunable64 zfs_initialize_value $(printf %llu 0x$PATTERN)
+
+log_must mkdir "$TESTDIR"
+log_must mkfile $MINVDEVSIZE "$SMALLFILE"
+log_must zpool create $TESTPOOL "$SMALLFILE"
+log_must zpool initialize $TESTPOOL
+
+while [[ "$(initialize_progress $TESTPOOL $SMALLFILE)" -lt "100" ]]; do
+        sleep 0.5
+done
+
+log_must zpool export $TESTPOOL
+
+spacemaps=0
+bs=512
+while read -r sm; do
+        typeset offset="$(echo $sm | cut -d ' ' -f1)"
+        typeset size="$(echo $sm | cut -d ' ' -f2)"
+
+       spacemaps=$((spacemaps + 1))
+        offset=$(((4 * 1024 * 1024) + 16#$offset))
+       out=$(dd if=$SMALLFILE skip=$(($offset / $bs)) \
+           count=$(($size / $bs)) bs=$bs 2>/dev/null | od -t x8 -Ad)
+       echo "$out" | log_must egrep "$PATTERN|\*|$size"
+done <<< "$(zdb -p $TESTDIR -Pme $TESTPOOL | egrep 'spacemap[ ]+0 ' | \
+    awk '{print $4, $8}')"
+
+if [[ $spacemaps -eq 0 ]];then
+       log_fail "Did not find any empty space maps to check"
+else
+       log_pass "Initializing wrote appropriate amount to disk"
+fi
author	George Wilson <george.wilson@delphix.com>
	Wed, 19 Dec 2018 14:54:59 +0000 (07:54 -0700)
committer	Brian Behlendorf <behlendorf1@llnl.gov>
	Mon, 7 Jan 2019 18:37:26 +0000 (10:37 -0800)
cmd/zpool/zpool_main.c		patch \| blob \| history
cmd/ztest/ztest.c		patch \| blob \| history
configure.ac		patch \| blob \| history
include/libzfs.h		patch \| blob \| history
include/libzfs_core.h		patch \| blob \| history
include/sys/Makefile.am		patch \| blob \| history
include/sys/fs/zfs.h		patch \| blob \| history
include/sys/metaslab_impl.h		patch \| blob \| history
include/sys/spa.h		patch \| blob \| history
include/sys/vdev_impl.h		patch \| blob \| history
include/sys/vdev_initialize.h	[new file with mode: 0644]	patch \| blob
include/sys/zio_priority.h		patch \| blob \| history
lib/libzfs/libzfs_pool.c		patch \| blob \| history
lib/libzfs/libzfs_util.c		patch \| blob \| history
lib/libzfs_core/libzfs_core.c		patch \| blob \| history
lib/libzpool/Makefile.am		patch \| blob \| history
man/man5/zfs-module-parameters.5		patch \| blob \| history
man/man8/zpool.8		patch \| blob \| history
module/zfs/Makefile.in		patch \| blob \| history
module/zfs/metaslab.c		patch \| blob \| history
module/zfs/spa.c		patch \| blob \| history
module/zfs/spa_misc.c		patch \| blob \| history
module/zfs/vdev.c		patch \| blob \| history
module/zfs/vdev_disk.c		patch \| blob \| history
module/zfs/vdev_file.c		patch \| blob \| history
module/zfs/vdev_indirect.c		patch \| blob \| history
module/zfs/vdev_initialize.c	[new file with mode: 0644]	patch \| blob
module/zfs/vdev_mirror.c		patch \| blob \| history
module/zfs/vdev_missing.c		patch \| blob \| history
module/zfs/vdev_queue.c		patch \| blob \| history
module/zfs/vdev_raidz.c		patch \| blob \| history
module/zfs/vdev_removal.c		patch \| blob \| history
module/zfs/vdev_root.c		patch \| blob \| history
module/zfs/zfs_ioctl.c		patch \| blob \| history
module/zfs/zfs_sysfs.c		patch \| blob \| history
tests/runfiles/linux.run		patch \| blob \| history
tests/zfs-tests/cmd/libzfs_input_check/libzfs_input_check.c		patch \| blob \| history
tests/zfs-tests/include/commands.cfg		patch \| blob \| history
tests/zfs-tests/tests/functional/cli_root/Makefile.am		patch \| blob \| history
tests/zfs-tests/tests/functional/cli_root/zpool_clear/zpool_clear_001_pos.ksh		patch \| blob \| history
tests/zfs-tests/tests/functional/cli_root/zpool_initialize/Makefile.am	[new file with mode: 0644]	patch \| blob
tests/zfs-tests/tests/functional/cli_root/zpool_initialize/cleanup.ksh	[new file with mode: 0755]	patch \| blob
tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize.kshlib	[new file with mode: 0644]	patch \| blob
tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_attach_detach_add_remove.ksh	[new file with mode: 0755]	patch \| blob
tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_import_export.ksh	[new file with mode: 0755]	patch \| blob
tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_offline_export_import_online.ksh	[new file with mode: 0755]	patch \| blob
tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_online_offline.ksh	[new file with mode: 0755]	patch \| blob
tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_split.ksh	[new file with mode: 0755]	patch \| blob
tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_start_and_cancel_neg.ksh	[new file with mode: 0755]	patch \| blob
tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_start_and_cancel_pos.ksh	[new file with mode: 0755]	patch \| blob
tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_suspend_resume.ksh	[new file with mode: 0755]	patch \| blob
tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_unsupported_vdevs.ksh	[new file with mode: 0755]	patch \| blob
tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_verify_checksums.ksh	[new file with mode: 0755]	patch \| blob
tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_verify_initialized.ksh	[new file with mode: 0755]	patch \| blob