]> granicus.if.org Git - zfs/commitdiff
Add -lhHpw options to "zpool iostat" for avg latency, histograms, & queues
authorTony Hutter <hutter2@llnl.gov>
Mon, 29 Feb 2016 18:05:23 +0000 (10:05 -0800)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Thu, 12 May 2016 19:36:32 +0000 (12:36 -0700)
Update the zfs module to collect statistics on average latencies, queue sizes,
and keep an internal histogram of all IO latencies.  Along with this, update
"zpool iostat" with some new options to print out the stats:

-l: Include average IO latencies stats:

 total_wait     disk_wait    syncq_wait    asyncq_wait  scrub
 read  write   read  write   read  write   read  write   wait
-----  -----  -----  -----  -----  -----  -----  -----  -----
    -   41ms      -    2ms      -   46ms      -    4ms      -
    -    5ms      -    1ms      -    1us      -    4ms      -
    -    5ms      -    1ms      -    1us      -    4ms      -
    -      -      -      -      -      -      -      -      -
    -   49ms      -    2ms      -   47ms      -      -      -
    -      -      -      -      -      -      -      -      -
    -    2ms      -    1ms      -      -      -    1ms      -
-----  -----  -----  -----  -----  -----  -----  -----  -----
  1ms    1ms    1ms  413us   16us   25us      -    5ms      -
  1ms    1ms    1ms  413us   16us   25us      -    5ms      -
  2ms    1ms    2ms  412us   26us   25us      -    5ms      -
    -    1ms      -  413us      -   25us      -    5ms      -
    -    1ms      -  460us      -   29us      -    5ms      -
196us    1ms  196us  370us    7us   23us      -    5ms      -
-----  -----  -----  -----  -----  -----  -----  -----  -----

-w: Print out latency histograms:

sdb           total           disk         sync_queue      async_queue
latency    read   write    read   write    read   write    read   write   scrub
-------  ------  ------  ------  ------  ------  ------  ------  ------  ------
1ns           0       0       0       0       0       0       0       0       0
...
33us          0       0       0       0       0       0       0       0       0
66us          0       0     107    2486       2     788      12      12       0
131us         2     797     359    4499      10     558     184     184       6
262us        22     801     264    1563      10     286     287     287      24
524us        87     575      71   52086      15    1063     136     136      92
1ms         152    1190       5   41292       4    1693     252     252     141
2ms         245    2018       0   50007       0    2322     371     371     220
4ms         189    7455      22  162957       0    3912    6726    6726     199
8ms         108    9461       0  102320       0    5775    2526    2526      86
17ms         23   11287       0   37142       0    8043    1813    1813      19
34ms          0   14725       0   24015       0   11732    3071    3071       0
67ms          0   23597       0    7914       0   18113    5025    5025       0
134ms         0   33798       0     254       0   25755    7326    7326       0
268ms         0   51780       0      12       0   41593   10002   10002       0
537ms         0   77808       0       0       0   64255   13120   13120       0
1s            0  105281       0       0       0   83805   20841   20841       0
2s            0   88248       0       0       0   73772   14006   14006       0
4s            0   47266       0       0       0   29783   17176   17176       0
9s            0   10460       0       0       0    4130    6295    6295       0
17s           0       0       0       0       0       0       0       0       0
34s           0       0       0       0       0       0       0       0       0
69s           0       0       0       0       0       0       0       0       0
137s          0       0       0       0       0       0       0       0       0
-------------------------------------------------------------------------------

-h: Help

-H: Scripted mode. Do not display headers, and separate fields by a single
    tab instead of arbitrary space.

-q: Include current number of entries in sync & async read/write queues,
    and scrub queue:

 syncq_read    syncq_write   asyncq_read  asyncq_write   scrubq_read
 pend  activ   pend  activ   pend  activ   pend  activ   pend  activ
-----  -----  -----  -----  -----  -----  -----  -----  -----  -----
    0      0      0      0     78     29      0      0      0      0
    0      0      0      0     78     29      0      0      0      0
    0      0      0      0      0      0      0      0      0      0
    -      -      -      -      -      -      -      -      -      -
    0      0      0      0      0      0      0      0      0      0
    -      -      -      -      -      -      -      -      -      -
    0      0      0      0      0      0      0      0      0      0
-----  -----  -----  -----  -----  -----  -----  -----  -----  -----
    0      0    227    394      0     19      0      0      0      0
    0      0    227    394      0     19      0      0      0      0
    0      0    108     98      0     19      0      0      0      0
    0      0     19     98      0      0      0      0      0      0
    0      0     78     98      0      0      0      0      0      0
    0      0     19     88      0      0      0      0      0      0
-----  -----  -----  -----  -----  -----  -----  -----  -----  -----

-p: Display numbers in parseable (exact) values.

Also, update iostat syntax to allow the user to specify specific vdevs
to show statistics for.  The three options for choosing pools/vdevs are:

Display a list of pools:
    zpool iostat ... [pool ...]

Display a list of vdevs from a specific pool:
    zpool iostat ... [pool vdev ...]

Display a list of vdevs from any pools:
    zpool iostat ... [vdev ...]

Lastly, allow zpool command "interval" value to be floating point:
    zpool iostat -v 0.5

Signed-off-by: Tony Hutter <hutter2@llnl.gov
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #4433

29 files changed:
cmd/zpool/Makefile.am
cmd/zpool/zpool_iter.c
cmd/zpool/zpool_main.c
cmd/zpool/zpool_util.c
cmd/zpool/zpool_util.h
include/libzfs.h
include/sys/fs/zfs.h
include/sys/vdev.h
include/sys/vdev_impl.h
include/sys/zfs_context.h
include/sys/zio.h
include/sys/zio_priority.h
lib/libspl/include/sys/sysmacros.h
lib/libzfs/libzfs_pool.c
lib/libzfs/libzfs_util.c
lib/libzpool/kernel.c
lib/libzpool/util.c
man/man8/zpool.8
module/zfs/spa.c
module/zfs/vdev.c
module/zfs/vdev_disk.c
module/zfs/vdev_label.c
module/zfs/zio.c
tests/runfiles/linux.run
tests/zfs-tests/tests/functional/cli_user/zpool_iostat/Makefile.am
tests/zfs-tests/tests/functional/cli_user/zpool_iostat/setup.ksh
tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_002_pos.ksh
tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_003_neg.ksh
tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_004_pos.ksh [new file with mode: 0755]

index c11951b2277947988714ca3acf316fda5aa73a67..b4ff106e1a399b8416b3c0f06822dacfbea9cbb8 100644 (file)
@@ -19,4 +19,4 @@ zpool_LDADD = \
        $(top_builddir)/lib/libzpool/libzpool.la \
        $(top_builddir)/lib/libzfs/libzfs.la \
        $(top_builddir)/lib/libzfs_core/libzfs_core.la \
-       $(LIBBLKID)
+       -lm $(LIBBLKID)
index 952d19172c06590c2c92aa33941fb7fc6ff09055..a18ccf29df33ac8706337f91f3acd99cc306aa22 100644 (file)
@@ -250,3 +250,69 @@ for_each_pool(int argc, char **argv, boolean_t unavail,
 
        return (ret);
 }
+
+static int
+for_each_vdev_cb(zpool_handle_t *zhp, nvlist_t *nv, pool_vdev_iter_f func,
+    void *data)
+{
+       nvlist_t **child;
+       uint_t c, children;
+       int ret = 0;
+       int i;
+       char *type;
+
+       const char *list[] = {
+           ZPOOL_CONFIG_SPARES,
+           ZPOOL_CONFIG_L2CACHE,
+           ZPOOL_CONFIG_CHILDREN
+       };
+
+       for (i = 0; i < ARRAY_SIZE(list); i++) {
+               if (nvlist_lookup_nvlist_array(nv, list[i], &child,
+                   &children) == 0) {
+                       for (c = 0; c < children; c++) {
+                               uint64_t ishole = 0;
+
+                               (void) nvlist_lookup_uint64(child[c],
+                                   ZPOOL_CONFIG_IS_HOLE, &ishole);
+
+                               if (ishole)
+                                       continue;
+
+                               ret |= for_each_vdev_cb(zhp, child[c], func,
+                                   data);
+                       }
+               }
+       }
+
+       if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0)
+               return (ret);
+
+       /* Don't run our function on root vdevs */
+       if (strcmp(type, VDEV_TYPE_ROOT) != 0) {
+               ret |= func(zhp, nv, data);
+       }
+
+       return (ret);
+}
+
+/*
+ * This is the equivalent of for_each_pool() for vdevs.  It iterates thorough
+ * all vdevs in the pool, ignoring root vdevs and holes, calling func() on
+ * each one.
+ *
+ * @zhp:       Zpool handle
+ * @func:      Function to call on each vdev
+ * @data:      Custom data to pass to the function
+ */
+int
+for_each_vdev(zpool_handle_t *zhp, pool_vdev_iter_f func, void *data)
+{
+       nvlist_t *config, *nvroot;
+
+       if ((config = zpool_get_config(zhp, NULL)) != NULL) {
+               verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+                   &nvroot) == 0);
+       }
+       return (for_each_vdev_cb(zhp, nvroot, func, data));
+}
index 9c7e2a0c4a3233c4765209dd17e56f401ad7023e..6412a8e935b9355d0e978d5c2a2ab45582493a9d 100644 (file)
@@ -51,6 +51,7 @@
 #include <sys/fm/util.h>
 #include <sys/fm/protocol.h>
 #include <sys/zfs_ioctl.h>
+#include <math.h>
 
 #include <libzfs.h>
 
@@ -144,6 +145,23 @@ typedef enum {
 } zpool_help_t;
 
 
+/*
+ * Flags for stats to display with "zpool iostats"
+ */
+enum iostat_type {
+       IOS_DEFAULT = 0,
+       IOS_LATENCY = 1,
+       IOS_QUEUES = 2,
+       IOS_L_HISTO = 3,
+       IOS_COUNT,      /* always last element */
+};
+
+/* iostat_type entries as bitmasks */
+#define        IOS_DEFAULT_M   (1ULL << IOS_DEFAULT)
+#define        IOS_LATENCY_M   (1ULL << IOS_LATENCY)
+#define        IOS_QUEUES_M    (1ULL << IOS_QUEUES)
+#define        IOS_L_HISTO_M   (1ULL << IOS_L_HISTO)
+
 typedef struct zpool_command {
        const char      *name;
        int             (*func)(int, char **);
@@ -196,7 +214,7 @@ static zpool_command_t command_table[] = {
        { "set",        zpool_do_set,           HELP_SET                },
 };
 
-#define        NCOMMAND        (sizeof (command_table) / sizeof (command_table[0]))
+#define        NCOMMAND        (ARRAY_SIZE(command_table))
 
 static zpool_command_t *current_command;
 static char history_str[HIS_MAX_RECORD_LEN];
@@ -237,7 +255,8 @@ get_usage(zpool_help_t idx) {
                    "[-R root] [-F [-n]]\n"
                    "\t    <pool | id> [newpool]\n"));
        case HELP_IOSTAT:
-               return (gettext("\tiostat [-gLPvy] [-T d|u] [pool] ... "
+               return (gettext("\tiostat [-T d | u] [-ghHLpPvy] [[-lq]|-w]\n"
+                   "\t    [[pool ...]|[pool vdev ...]|[vdev ...]] "
                    "[interval [count]]\n"));
        case HELP_LABELCLEAR:
                return (gettext("\tlabelclear [-f] <vdev>\n"));
@@ -2481,61 +2500,690 @@ error:
 }
 
 typedef struct iostat_cbdata {
-       boolean_t cb_verbose;
+       uint64_t cb_flags;
        int cb_name_flags;
        int cb_namewidth;
        int cb_iteration;
+       char **cb_vdev_names; /* Only show these vdevs */
+       unsigned int cb_vdev_names_count;
+       boolean_t cb_verbose;
+       boolean_t cb_literal;
+       boolean_t cb_scripted;
        zpool_list_t *cb_list;
 } iostat_cbdata_t;
 
+/*  iostat labels */
+typedef struct name_and_columns {
+       const char *name;       /* Column name */
+       unsigned int columns;   /* Center name to this number of columns */
+} name_and_columns_t;
+
+#define        IOSTAT_MAX_LABELS       11      /* Max number of labels on one line */
+
+static const name_and_columns_t iostat_top_labels[][IOSTAT_MAX_LABELS] =
+{
+       [IOS_DEFAULT] = {{"capacity", 2}, {"operations", 2}, {"bandwidth", 2},
+           {NULL}},
+       [IOS_LATENCY] = {{"total_wait", 2}, {"disk_wait", 2}, {"syncq_wait", 2},
+           {"asyncq_wait", 2}, {"scrub"}},
+       [IOS_QUEUES] = {{"syncq_read", 2}, {"syncq_write", 2},
+           {"asyncq_read", 2}, {"asyncq_write", 2}, {"scrubq_read", 2},
+           {NULL}},
+       [IOS_L_HISTO] = {{"total_wait", 2}, {"disk_wait", 2},
+           {"sync_queue", 2}, {"async_queue", 2}, {NULL}},
+};
+
+/* Shorthand - if "columns" field not set, default to 1 column */
+static const name_and_columns_t iostat_bottom_labels[][IOSTAT_MAX_LABELS] =
+{
+       [IOS_DEFAULT] = {{"alloc"}, {"free"}, {"read"}, {"write"}, {"read"},
+           {"write"}, {NULL}},
+       [IOS_LATENCY] = {{"read"}, {"write"}, {"read"}, {"write"}, {"read"},
+           {"write"}, {"read"}, {"write"}, {"wait"}, {NULL}},
+       [IOS_QUEUES] = {{"pend"}, {"activ"}, {"pend"}, {"activ"}, {"pend"},
+           {"activ"}, {"pend"}, {"activ"}, {"pend"}, {"activ"}, {NULL}},
+       [IOS_L_HISTO] = {{"read"}, {"write"}, {"read"}, {"write"}, {"read"},
+           {"write"}, {"read"}, {"write"}, {"scrub"}, {NULL}},
+};
+
+/*
+ * Return the number of labels in a null-terminated name_and_columns_t
+ * array.
+ *
+ */
+static unsigned int
+label_array_len(const name_and_columns_t *labels)
+{
+       int i = 0;
+
+       while (labels[i].name)
+               i++;
+
+       return (i);
+}
+
+/*
+ * Return a default column width for default/latency/queue columns. This does
+ * not include histograms, which have their columns autosized.
+ */
+static unsigned int
+default_column_width(iostat_cbdata_t *cb, enum iostat_type type)
+{
+       unsigned long column_width = 5; /* Normal niceprint */
+       static unsigned long widths[] = {
+               /*
+                * Choose some sane default column sizes for printing the
+                * raw numbers.
+                */
+               [IOS_DEFAULT] = 15, /* 1PB capacity */
+               [IOS_LATENCY] = 10, /* 1B ns = 10sec */
+               [IOS_QUEUES] = 6,   /* 1M queue entries */
+       };
+
+       if (cb->cb_literal)
+               column_width = widths[type];
+
+       return (column_width);
+}
+
+/*
+ * Print the column labels, i.e:
+ *
+ *   capacity     operations     bandwidth
+ * alloc   free   read  write   read  write  ...
+ *
+ * If force_column_width is set, use it for the column width.  If not set, use
+ * the default column width.
+ */
+void
+print_iostat_labels(iostat_cbdata_t *cb, unsigned int force_column_width,
+    const name_and_columns_t labels[][IOSTAT_MAX_LABELS])
+{
+       int i, idx, s;
+       unsigned int text_start, rw_column_width, spaces_to_end;
+       uint64_t flags = cb->cb_flags;
+       uint64_t f;
+       unsigned int column_width = force_column_width;
+
+       /* For each bit set in flags */
+       for (f = flags; f; f &= ~(1ULL << idx)) {
+               idx = lowbit64(f) - 1;
+               if (!force_column_width)
+                       column_width = default_column_width(cb, idx);
+               /* Print our top labels centered over "read  write" label. */
+               for (i = 0; i < label_array_len(labels[idx]); i++) {
+                       const char *name = labels[idx][i].name;
+                       /*
+                        * We treat labels[][].columns == 0 as shorthand
+                        * for one column.  It makes writing out the label
+                        * tables more concise.
+                        */
+                       unsigned int columns = MAX(1, labels[idx][i].columns);
+                       unsigned int slen = strlen(name);
+
+                       rw_column_width = (column_width * columns) +
+                           (2 * (columns - 1));
+
+                       text_start = (int) ((rw_column_width)/columns -
+                           slen/columns);
+
+                       printf("  ");   /* Two spaces between columns */
+
+                       /* Space from beginning of column to label */
+                       for (s = 0; s < text_start; s++)
+                               printf(" ");
+
+                       printf("%s", name);
+
+                       /* Print space after label to end of column */
+                       spaces_to_end = rw_column_width - text_start - slen;
+                       for (s = 0; s < spaces_to_end; s++)
+                               printf(" ");
+
+               }
+       }
+       printf("\n");
+}
+
+/*
+ * Utility function to print out a line of dashes like:
+ *
+ *     --------------------------------  -----  -----  -----  -----  -----
+ *
+ * ...or a dashed named-row line like:
+ *
+ *     logs                                  -      -      -      -      -
+ *
+ * @cb:                                iostat data
+ *
+ * @force_column_width         If non-zero, use the value as the column width.
+ *                             Otherwise use the default column widths.
+ *
+ * @name:                      Print a dashed named-row line starting
+ *                             with @name.  Otherwise, print a regular
+ *                             dashed line.
+ */
+static void
+print_iostat_dashes(iostat_cbdata_t *cb, unsigned int force_column_width,
+    const char *name)
+{
+       int i;
+       unsigned int namewidth;
+       uint64_t flags = cb->cb_flags;
+       uint64_t f;
+       int idx;
+       const name_and_columns_t *labels;
+
+       if (cb->cb_flags & IOS_L_HISTO_M)
+               namewidth = MAX(cb->cb_namewidth, strlen("latency"));
+       else
+               namewidth = cb->cb_namewidth;
+
+       if (name) {
+               namewidth = MAX(cb->cb_namewidth, strlen(name));
+               printf("%-*s", namewidth, name);
+       } else {
+               for (i = 0; i < namewidth; i++)
+                       (void) printf("-");
+       }
+
+       /* For each bit in flags */
+       for (f = flags; f; f &= ~(1ULL << idx)) {
+               unsigned int column_width;
+               idx = lowbit64(f) - 1;
+               if (force_column_width)
+                       column_width = force_column_width;
+               else
+                       column_width = default_column_width(cb, idx);
+
+               labels = iostat_bottom_labels[idx];
+               for (i = 0; i < label_array_len(labels); i++) {
+                       if (name)
+                               printf("  %*s-", column_width - 1, " ");
+                       else
+                               printf("  %.*s", column_width,
+                                   "--------------------");
+               }
+       }
+       printf("\n");
+}
+
+
+static void
+print_iostat_separator_impl(iostat_cbdata_t *cb,
+    unsigned int force_column_width)
+{
+       print_iostat_dashes(cb, force_column_width, NULL);
+}
+
 static void
 print_iostat_separator(iostat_cbdata_t *cb)
 {
-       int i = 0;
+       print_iostat_separator_impl(cb, 0);
+}
+
+static void
+print_iostat_header_impl(iostat_cbdata_t *cb, unsigned int force_column_width,
+    const char *histo_vdev_name)
+{
+       unsigned int namewidth;
+       uint64_t flags = cb->cb_flags;
+
+       if (flags & IOS_L_HISTO_M)
+               namewidth = MAX(cb->cb_namewidth, strlen("latency"));
+       else
+               namewidth = cb->cb_namewidth;
+
+       if (flags & IOS_L_HISTO_M)
+               printf("%-*s", namewidth, histo_vdev_name);
+       else
+               printf("%*s", namewidth, "");
 
-       for (i = 0; i < cb->cb_namewidth; i++)
-               (void) printf("-");
-       (void) printf("  -----  -----  -----  -----  -----  -----\n");
+       print_iostat_labels(cb, force_column_width, iostat_top_labels);
+
+       printf("%-*s", namewidth, flags & IOS_L_HISTO_M ? "latency" :
+           cb->cb_vdev_names_count ? "vdev" : "pool");
+
+       print_iostat_labels(cb, force_column_width, iostat_bottom_labels);
+
+       print_iostat_separator_impl(cb, force_column_width);
 }
 
 static void
 print_iostat_header(iostat_cbdata_t *cb)
 {
-       (void) printf("%*s     capacity     operations    bandwidth\n",
-           cb->cb_namewidth, "");
-       (void) printf("%-*s  alloc   free   read  write   read  write\n",
-           cb->cb_namewidth, "pool");
-       print_iostat_separator(cb);
+       print_iostat_header_impl(cb, 0, NULL);
 }
 
+
 /*
  * Display a single statistic.
  */
 static void
-print_one_stat(uint64_t value)
+print_one_stat(uint64_t value, enum zfs_nicenum_format format,
+    unsigned int column_size, boolean_t scripted)
 {
        char buf[64];
 
-       zfs_nicenum(value, buf, sizeof (buf));
-       (void) printf("  %5s", buf);
+       zfs_nicenum_format(value, buf, sizeof (buf), format);
+
+       if (scripted)
+               printf("\t%s", buf);
+       else
+               printf("  %*s", column_size, buf);
+}
+
+/*
+ * Calculate the default vdev stats
+ *
+ * Subtract oldvs from newvs, apply a scaling factor, and save the resulting
+ * stats into calcvs.
+ */
+static void
+calc_default_iostats(vdev_stat_t *oldvs, vdev_stat_t *newvs,
+    vdev_stat_t *calcvs)
+{
+       int i;
+
+       memcpy(calcvs, newvs, sizeof (*calcvs));
+       for (i = 0; i < ARRAY_SIZE(calcvs->vs_ops); i++)
+               calcvs->vs_ops[i] = (newvs->vs_ops[i] - oldvs->vs_ops[i]);
+
+       for (i = 0; i < ARRAY_SIZE(calcvs->vs_bytes); i++)
+               calcvs->vs_bytes[i] = (newvs->vs_bytes[i] - oldvs->vs_bytes[i]);
+}
+
+/*
+ * Internal representation of the extended iostats data.
+ *
+ * The extended iostat stats are exported in nvlists as either uint64_t arrays
+ * or single uint64_t's.  We make both look like arrays to make them easier
+ * to process.  In order to make single uint64_t's look like arrays, we set
+ * __data to the stat data, and then set *data = &__data with count = 1.  Then,
+ * we can just use *data and count.
+ */
+struct stat_array {
+       uint64_t *data;
+       uint_t count;   /* Number of entries in data[] */
+       uint64_t __data; /* Only used when data is a single uint64_t */
+};
+
+static uint64_t
+stat_histo_max(struct stat_array *nva, unsigned int len) {
+       uint64_t max = 0;
+       int i;
+       for (i = 0; i < len; i++)
+               max = MAX(max, array64_max(nva[i].data, nva[i].count));
+
+       return (max);
+}
+
+/*
+ * Helper function to lookup a uint64_t array or uint64_t value and store its
+ * data as a stat_array.  If the nvpair is a single uint64_t value, then we make
+ * it look like a one element array to make it easier to process.
+ */
+static int
+nvpair64_to_stat_array(nvlist_t *nvl, const char *name,
+    struct stat_array *nva) {
+       nvpair_t *tmp;
+       int ret;
+
+       verify(nvlist_lookup_nvpair(nvl, name, &tmp) == 0);
+       switch (nvpair_type(tmp)) {
+       case DATA_TYPE_UINT64_ARRAY:
+               ret = nvpair_value_uint64_array(tmp, &nva->data, &nva->count);
+               break;
+       case DATA_TYPE_UINT64:
+               ret = nvpair_value_uint64(tmp, &nva->__data);
+               nva->data = &nva->__data;
+               nva->count = 1;
+               break;
+       default:
+               /* Not a uint64_t */
+               ret = EINVAL;
+               break;
+       }
+
+       return (ret);
+}
+
+/*
+ * Given a list of nvlist names, look up the extended stats in newnv and oldnv,
+ * subtract them, and return the results in a newly allocated stat_array.
+ * You must free the returned array after you are done with it with
+ * free_calc_stats().
+ *
+ * Additionally, you can set "oldnv" to NULL if you simply want the newnv
+ * values.
+ */
+static struct stat_array *
+calc_and_alloc_stats_ex(const char **names, unsigned int len, nvlist_t *oldnv,
+    nvlist_t *newnv)
+{
+       nvlist_t *oldnvx = NULL, *newnvx;
+       struct stat_array *oldnva, *newnva, *calcnva;
+       int i, j;
+       unsigned int alloc_size = (sizeof (struct stat_array)) * len;
+
+       /* Extract our extended stats nvlist from the main list */
+       verify(nvlist_lookup_nvlist(newnv, ZPOOL_CONFIG_VDEV_STATS_EX,
+           &newnvx) == 0);
+       if (oldnv) {
+               verify(nvlist_lookup_nvlist(oldnv, ZPOOL_CONFIG_VDEV_STATS_EX,
+                   &oldnvx) == 0);
+       }
+
+       newnva = safe_malloc(alloc_size);
+       oldnva = safe_malloc(alloc_size);
+       calcnva = safe_malloc(alloc_size);
+
+       for (j = 0; j < len; j++) {
+               verify(nvpair64_to_stat_array(newnvx, names[j],
+                   &newnva[j]) == 0);
+               calcnva[j].count = newnva[j].count;
+               alloc_size = calcnva[j].count * sizeof (calcnva[j].data[0]);
+               calcnva[j].data = safe_malloc(alloc_size);
+               memcpy(calcnva[j].data, newnva[j].data, alloc_size);
+
+               if (oldnvx) {
+                       verify(nvpair64_to_stat_array(oldnvx, names[j],
+                           &oldnva[j]) == 0);
+                       for (i = 0; i < oldnva[j].count; i++)
+                               calcnva[j].data[i] -= oldnva[j].data[i];
+               }
+       }
+       free(newnva);
+       free(oldnva);
+       return (calcnva);
+}
+
+static void
+free_calc_stats(struct stat_array *nva, unsigned int len)
+{
+       int i;
+       for (i = 0; i < len; i++)
+               free(nva[i].data);
+
+       free(nva);
+}
+
+static void
+print_iostat_histo(struct stat_array *nva, unsigned int len,
+    iostat_cbdata_t *cb, unsigned int column_width, unsigned int namewidth,
+    double scale)
+{
+       int i, j;
+       char buf[6];
+       uint64_t val;
+       enum zfs_nicenum_format format;
+       unsigned int buckets;
+
+       if (cb->cb_literal)
+               format = ZFS_NICENUM_RAW;
+       else
+               format = ZFS_NICENUM_1024;
+
+       /* All these histos are the same size, so just use nva[0].count */
+       buckets = nva[0].count;
+
+       for (j = 0; j < buckets; j++) {
+               /* Ending range of this bucket */
+               val = (1UL << (j + 1)) - 1;
+
+               /* Print histogram bucket label */
+               zfs_nicetime(val, buf, sizeof (buf));
+               if (cb->cb_scripted)
+                       printf("%llu", (u_longlong_t) val);
+               else
+                       printf("%-*s", namewidth, buf);
+
+               /* Print the values on the line */
+               for (i = 0; i < len; i++) {
+                       print_one_stat(nva[i].data[j] * scale, format,
+                           column_width, cb->cb_scripted);
+               }
+               printf("\n");
+       }
+}
+
+static void
+print_solid_separator(unsigned int length)
+{
+       while (length--)
+               printf("-");
+       printf("\n");
+}
+
+static void
+print_iostat_histos(iostat_cbdata_t *cb, nvlist_t *oldnv,
+    nvlist_t *newnv, double scale, const char *name)
+{
+       unsigned int column_width;
+       unsigned int namewidth;
+       unsigned int entire_width;
+
+       const char *names[] = {
+               ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO,
+               ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO,
+               ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO,
+               ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO,
+               ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO,
+               ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO,
+               ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO,
+               ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO,
+               ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO,
+       };
+       struct stat_array *nva;
+       nva = calc_and_alloc_stats_ex(names, ARRAY_SIZE(names), oldnv, newnv);
+
+       if (cb->cb_literal) {
+               column_width = MAX(5,
+                   (unsigned int) log10(stat_histo_max(nva,
+                   ARRAY_SIZE(names))) + 1);
+       } else {
+               column_width = 5;
+       }
+
+       namewidth = MAX(cb->cb_namewidth, strlen("latency"));
+
+       /*
+        * Calculate the entire line width of what we're printing.  The
+        * +2 is for the two spaces between columns:
+        */
+       /*       read  write                            */
+       /*      -----  -----                            */
+       /*      |___|  <---------- column_width         */
+       /*                                              */
+       /*      |__________|  <--- entire_width         */
+       /*                                              */
+       entire_width = namewidth + (column_width + 2) *
+           label_array_len(iostat_bottom_labels[IOS_L_HISTO]);
+
+       if (cb->cb_scripted)
+               printf("%s\n", name);
+       else
+               print_iostat_header_impl(cb, column_width, name);
+
+       print_iostat_histo(nva, ARRAY_SIZE(names), cb, column_width,
+           namewidth, scale);
+
+       free_calc_stats(nva, ARRAY_SIZE(names));
+       if (!cb->cb_scripted)
+               print_solid_separator(entire_width);
+}
+
+/*
+ * Calculate the average latency of a power-of-two latency histogram
+ */
+static uint64_t
+single_histo_average(uint64_t *histo, unsigned int buckets)
+{
+       int i;
+       uint64_t count = 0, total = 0;
+
+       for (i = 0; i < buckets; i++) {
+               /*
+                * Our buckets are power-of-two latency ranges.  Use the
+                * midpoint latency of each bucket to calculate the average.
+                * For example:
+                *
+                * Bucket          Midpoint
+                * 8ns-15ns:       12ns
+                * 16ns-31ns:      24ns
+                * ...
+                */
+               if (histo[i] != 0) {
+                       total += histo[i] * (((1UL << i) + ((1UL << i)/2)));
+                       count += histo[i];
+               }
+       }
+
+       /* Prevent divide by zero */
+       return (count == 0 ? 0 : total / count);
+}
+
+static void
+print_iostat_queues(iostat_cbdata_t *cb, nvlist_t *oldnv,
+    nvlist_t *newnv, double scale)
+{
+       int i;
+       uint64_t val;
+       const char *names[] = {
+               ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE,
+               ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE,
+               ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE,
+               ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE,
+               ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE,
+               ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE,
+               ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE,
+               ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE,
+               ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE,
+               ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE,
+       };
+
+       struct stat_array *nva;
+
+       unsigned int column_width = default_column_width(cb, IOS_QUEUES);
+       enum zfs_nicenum_format format;
+
+       nva = calc_and_alloc_stats_ex(names, ARRAY_SIZE(names), NULL, newnv);
+
+       if (cb->cb_literal)
+               format = ZFS_NICENUM_RAW;
+       else
+               format = ZFS_NICENUM_1024;
+
+       for (i = 0; i < ARRAY_SIZE(names); i++) {
+               val = nva[i].data[0] * scale;
+               print_one_stat(val, format, column_width, cb->cb_scripted);
+       }
+
+       free_calc_stats(nva, ARRAY_SIZE(names));
+}
+
+static void
+print_iostat_latency(iostat_cbdata_t *cb, nvlist_t *oldnv,
+    nvlist_t *newnv, double scale)
+{
+       int i;
+       uint64_t val;
+       const char *names[] = {
+               ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO,
+               ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO,
+               ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO,
+               ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO,
+               ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO,
+               ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO,
+               ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO,
+               ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO,
+               ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO,
+       };
+       struct stat_array *nva;
+
+       unsigned int column_width = default_column_width(cb, IOS_LATENCY);
+       enum zfs_nicenum_format format;
+
+       nva = calc_and_alloc_stats_ex(names, ARRAY_SIZE(names), oldnv, newnv);
+
+       if (cb->cb_literal)
+               format = ZFS_NICENUM_RAW;
+       else
+               format = ZFS_NICENUM_TIME;
+
+       /* Print our avg latencies on the line */
+       for (i = 0; i < ARRAY_SIZE(names); i++) {
+               /* Compute average latency for a latency histo */
+               val = single_histo_average(nva[i].data, nva[i].count) * scale;
+               print_one_stat(val, format, column_width, cb->cb_scripted);
+       }
+       free_calc_stats(nva, ARRAY_SIZE(names));
+}
+
+/*
+ * Print default statistics (capacity/operations/bandwidth)
+ */
+static void
+print_iostat_default(vdev_stat_t *vs, iostat_cbdata_t *cb, double scale)
+{
+       unsigned int column_width = default_column_width(cb, IOS_DEFAULT);
+       enum zfs_nicenum_format format;
+       char na;        /* char to print for "not applicable" values */
+
+       if (cb->cb_literal) {
+               format = ZFS_NICENUM_RAW;
+               na = '0';
+       } else {
+               format = ZFS_NICENUM_1024;
+               na = '-';
+       }
+
+       /* only toplevel vdevs have capacity stats */
+       if (vs->vs_space == 0) {
+               if (cb->cb_scripted)
+                       printf("\t%c\t%c", na, na);
+               else
+                       printf("  %*c  %*c", column_width, na, column_width,
+                           na);
+       } else {
+               print_one_stat(vs->vs_alloc, format, column_width,
+                   cb->cb_scripted);
+               print_one_stat(vs->vs_space - vs->vs_alloc, format,
+                   column_width, cb->cb_scripted);
+       }
+
+       print_one_stat((uint64_t)(vs->vs_ops[ZIO_TYPE_READ] * scale),
+           format, column_width, cb->cb_scripted);
+       print_one_stat((uint64_t)(vs->vs_ops[ZIO_TYPE_WRITE] * scale),
+           format, column_width, cb->cb_scripted);
+       print_one_stat((uint64_t)(vs->vs_bytes[ZIO_TYPE_READ] * scale),
+           format, column_width, cb->cb_scripted);
+       print_one_stat((uint64_t)(vs->vs_bytes[ZIO_TYPE_WRITE] * scale),
+           format, column_width, cb->cb_scripted);
 }
 
 /*
  * Print out all the statistics for the given vdev.  This can either be the
  * toplevel configuration, or called recursively.  If 'name' is NULL, then this
  * is a verbose output, and we don't want to display the toplevel pool stats.
+ *
+ * Returns the number of stat lines printed.
  */
-void
+unsigned int
 print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv,
     nvlist_t *newnv, iostat_cbdata_t *cb, int depth)
 {
        nvlist_t **oldchild, **newchild;
        uint_t c, children;
-       vdev_stat_t *oldvs, *newvs;
+       vdev_stat_t *oldvs, *newvs, *calcvs;
        vdev_stat_t zerovs = { 0 };
+       char *vname;
+       int i;
+       int ret = 0;
        uint64_t tdelta;
        double scale;
-       char *vname;
+
+       calcvs = safe_malloc(sizeof (*calcvs));
 
        if (oldnv != NULL) {
                verify(nvlist_lookup_uint64_array(oldnv,
@@ -2544,54 +3192,92 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv,
                oldvs = &zerovs;
        }
 
+       /* Do we only want to see a specific vdev? */
+       for (i = 0; i < cb->cb_vdev_names_count; i++) {
+               /* Yes we do.  Is this the vdev? */
+               if (strcmp(name, cb->cb_vdev_names[i]) == 0) {
+                       /*
+                        * This is our vdev.  Since it is the only vdev we
+                        * will be displaying, make depth = 0 so that it
+                        * doesn't get indented.
+                        */
+                       depth = 0;
+                       break;
+               }
+       }
+
+       if (cb->cb_vdev_names_count && (i == cb->cb_vdev_names_count)) {
+               /* Couldn't match the name */
+               goto children;
+       }
+
+
        verify(nvlist_lookup_uint64_array(newnv, ZPOOL_CONFIG_VDEV_STATS,
            (uint64_t **)&newvs, &c) == 0);
 
-       if (strlen(name) + depth > cb->cb_namewidth)
-               (void) printf("%*s%s", depth, "", name);
-       else
-               (void) printf("%*s%s%*s", depth, "", name,
-                   (int)(cb->cb_namewidth - strlen(name) - depth), "");
+       /*
+        * Print the vdev name unless it's is a histogram.  Histograms
+        * display the vdev name in the header itself.
+        */
+       if (!(cb->cb_flags & IOS_L_HISTO_M)) {
+               if (cb->cb_scripted) {
+                       printf("%s", name);
+               } else {
+                       if (strlen(name) + depth > cb->cb_namewidth)
+                               (void) printf("%*s%s", depth, "", name);
+                       else
+                               (void) printf("%*s%s%*s", depth, "", name,
+                                   (int)(cb->cb_namewidth - strlen(name) -
+                                   depth), "");
+               }
+       }
 
+       /* Calculate our scaling factor */
        tdelta = newvs->vs_timestamp - oldvs->vs_timestamp;
-
-       if (tdelta == 0)
-               scale = 1.0;
-       else
-               scale = (double)NANOSEC / tdelta;
-
-       /* only toplevel vdevs have capacity stats */
-       if (newvs->vs_space == 0) {
-               (void) printf("      -      -");
+       if ((oldvs->vs_timestamp == 0) && (cb->cb_flags & IOS_L_HISTO_M)) {
+               /*
+                * If we specify printing histograms with no time interval, then
+                * print the histogram numbers over the entire lifetime of the
+                * vdev.
+                */
+               scale = 1;
        } else {
-               print_one_stat(newvs->vs_alloc);
-               print_one_stat(newvs->vs_space - newvs->vs_alloc);
+               if (tdelta == 0)
+                       scale = 1.0;
+               else
+                       scale = (double)NANOSEC / tdelta;
        }
 
-       print_one_stat((uint64_t)(scale * (newvs->vs_ops[ZIO_TYPE_READ] -
-           oldvs->vs_ops[ZIO_TYPE_READ])));
-
-       print_one_stat((uint64_t)(scale * (newvs->vs_ops[ZIO_TYPE_WRITE] -
-           oldvs->vs_ops[ZIO_TYPE_WRITE])));
-
-       print_one_stat((uint64_t)(scale * (newvs->vs_bytes[ZIO_TYPE_READ] -
-           oldvs->vs_bytes[ZIO_TYPE_READ])));
+       if (cb->cb_flags & IOS_DEFAULT_M) {
+               calc_default_iostats(oldvs, newvs, calcvs);
+               print_iostat_default(calcvs, cb, scale);
+       }
+       if (cb->cb_flags & IOS_LATENCY_M)
+               print_iostat_latency(cb, oldnv, newnv, scale);
+       if (cb->cb_flags & IOS_QUEUES_M)
+               print_iostat_queues(cb, oldnv, newnv, scale);
+       if (cb->cb_flags & IOS_L_HISTO_M) {
+               printf("\n");
+               print_iostat_histos(cb, oldnv, newnv, scale, name);
+       }
 
-       print_one_stat((uint64_t)(scale * (newvs->vs_bytes[ZIO_TYPE_WRITE] -
-           oldvs->vs_bytes[ZIO_TYPE_WRITE])));
+       if (!(cb->cb_flags & IOS_L_HISTO_M))
+               printf("\n");
 
-       (void) printf("\n");
+       free(calcvs);
+       ret++;
 
+children:
        if (!cb->cb_verbose)
-               return;
+               return (ret);
 
        if (nvlist_lookup_nvlist_array(newnv, ZPOOL_CONFIG_CHILDREN,
            &newchild, &children) != 0)
-               return;
+               return (ret);
 
        if (oldnv && nvlist_lookup_nvlist_array(oldnv, ZPOOL_CONFIG_CHILDREN,
            &oldchild, &c) != 0)
-               return;
+               return (ret);
 
        for (c = 0; c < children; c++) {
                uint64_t ishole = B_FALSE, islog = B_FALSE;
@@ -2607,7 +3293,7 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv,
 
                vname = zpool_vdev_name(g_zfs, zhp, newchild[c],
                    cb->cb_name_flags);
-               print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL,
+               ret += print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL,
                    newchild[c], cb, depth + 2);
                free(vname);
        }
@@ -2617,8 +3303,10 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv,
         */
 
        if (num_logs(newnv) > 0) {
-               (void) printf("%-*s      -      -      -      -      -      "
-                   "-\n", cb->cb_namewidth, "logs");
+               if ((!(cb->cb_flags & IOS_L_HISTO_M)) && !cb->cb_scripted &&
+                   !cb->cb_vdev_names) {
+                       print_iostat_dashes(cb, 0, "logs");
+               }
 
                for (c = 0; c < children; c++) {
                        uint64_t islog = B_FALSE;
@@ -2628,7 +3316,7 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv,
                        if (islog) {
                                vname = zpool_vdev_name(g_zfs, zhp, newchild[c],
                                    cb->cb_name_flags);
-                               print_vdev_stats(zhp, vname, oldnv ?
+                               ret += print_vdev_stats(zhp, vname, oldnv ?
                                    oldchild[c] : NULL, newchild[c],
                                    cb, depth + 2);
                                free(vname);
@@ -2642,23 +3330,28 @@ print_vdev_stats(zpool_handle_t *zhp, const char *name, nvlist_t *oldnv,
         */
        if (nvlist_lookup_nvlist_array(newnv, ZPOOL_CONFIG_L2CACHE,
            &newchild, &children) != 0)
-               return;
+               return (ret);
 
        if (oldnv && nvlist_lookup_nvlist_array(oldnv, ZPOOL_CONFIG_L2CACHE,
            &oldchild, &c) != 0)
-               return;
+               return (ret);
 
        if (children > 0) {
-               (void) printf("%-*s      -      -      -      -      -      "
-                   "-\n", cb->cb_namewidth, "cache");
+               if ((!(cb->cb_flags & IOS_L_HISTO_M)) && !cb->cb_scripted &&
+                   !cb->cb_vdev_names) {
+                       print_iostat_dashes(cb, 0, "cache");
+               }
+
                for (c = 0; c < children; c++) {
                        vname = zpool_vdev_name(g_zfs, zhp, newchild[c],
                            cb->cb_name_flags);
-                       print_vdev_stats(zhp, vname, oldnv ? oldchild[c] : NULL,
-                           newchild[c], cb, depth + 2);
+                       ret += print_vdev_stats(zhp, vname, oldnv ? oldchild[c]
+                           : NULL, newchild[c], cb, depth + 2);
                        free(vname);
                }
        }
+
+       return (ret);
 }
 
 static int
@@ -2688,6 +3381,7 @@ print_iostat(zpool_handle_t *zhp, void *data)
        iostat_cbdata_t *cb = data;
        nvlist_t *oldconfig, *newconfig;
        nvlist_t *oldnvroot, *newnvroot;
+       int ret;
 
        newconfig = zpool_get_config(zhp, &oldconfig);
 
@@ -2703,15 +3397,13 @@ print_iostat(zpool_handle_t *zhp, void *data)
                verify(nvlist_lookup_nvlist(oldconfig, ZPOOL_CONFIG_VDEV_TREE,
                    &oldnvroot) == 0);
 
-       /*
-        * Print out the statistics for the pool.
-        */
-       print_vdev_stats(zhp, zpool_get_name(zhp), oldnvroot, newnvroot, cb, 0);
+       ret = print_vdev_stats(zhp, zpool_get_name(zhp), oldnvroot, newnvroot,
+                                                                       cb, 0);
+       if ((ret != 0) && !(cb->cb_flags & IOS_L_HISTO_M) && !cb->cb_scripted &&
+           cb->cb_verbose && !cb->cb_vdev_names_count)
+                               print_iostat_separator(cb);
 
-       if (cb->cb_verbose)
-               print_iostat_separator(cb);
-
-       return (0);
+       return (ret);
 }
 
 static int
@@ -2742,13 +3434,14 @@ get_namewidth(zpool_handle_t *zhp, void *data)
        if ((config = zpool_get_config(zhp, NULL)) != NULL) {
                verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
                    &nvroot) == 0);
+               unsigned int poolname_len = strlen(zpool_get_name(zhp));
                if (!cb->cb_verbose)
-                       cb->cb_namewidth = strlen(zpool_get_name(zhp));
+                       cb->cb_namewidth = poolname_len;
                else
-                       cb->cb_namewidth = max_width(zhp, nvroot, 0,
-                           cb->cb_namewidth, cb->cb_name_flags);
+                       cb->cb_namewidth = MAX(poolname_len,
+                           max_width(zhp, nvroot, 0, cb->cb_namewidth,
+                           cb->cb_name_flags));
        }
-
        /*
         * The width must be at least 10, but may be as large as the
         * column width - 42 so that we can still fit in one line.
@@ -2767,20 +3460,21 @@ get_namewidth(zpool_handle_t *zhp, void *data)
  * Parse the input string, get the 'interval' and 'count' value if there is one.
  */
 static void
-get_interval_count(int *argcp, char **argv, unsigned long *iv,
+get_interval_count(int *argcp, char **argv, float *iv,
     unsigned long *cnt)
 {
-       unsigned long interval = 0, count = 0;
+       float interval = 0;
+       unsigned long count = 0;
        int argc = *argcp;
 
        /*
         * Determine if the last argument is an integer or a pool name
         */
-       if (argc > 0 && isdigit(argv[argc - 1][0])) {
+       if (argc > 0 && isnumber(argv[argc - 1])) {
                char *end;
 
                errno = 0;
-               interval = strtoul(argv[argc - 1], &end, 10);
+               interval = strtof(argv[argc - 1], &end);
 
                if (*end == '\0' && errno == 0) {
                        if (interval == 0) {
@@ -2806,12 +3500,12 @@ get_interval_count(int *argcp, char **argv, unsigned long *iv,
         * If the last argument is also an integer, then we have both a count
         * and an interval.
         */
-       if (argc > 0 && isdigit(argv[argc - 1][0])) {
+       if (argc > 0 && isnumber(argv[argc - 1])) {
                char *end;
 
                errno = 0;
                count = interval;
-               interval = strtoul(argv[argc - 1], &end, 10);
+               interval = strtof(argv[argc - 1], &end);
 
                if (*end == '\0' && errno == 0) {
                        if (interval == 0) {
@@ -2846,12 +3540,299 @@ get_timestamp_arg(char c)
 }
 
 /*
- * zpool iostat [-gLPv] [-T d|u] [pool] ... [interval [count]]
+ * Return stat flags that are supported by all pools by both the module and
+ * zpool iostat.  "*data" should be initialized to all 0xFFs before running.
+ * It will get ANDed down until only the flags that are supported on all pools
+ * remain.
+ */
+static int
+get_stat_flags_cb(zpool_handle_t *zhp, void *data)
+{
+       uint64_t *mask = data;
+       nvlist_t *config, *nvroot, *nvx;
+       uint64_t flags = 0;
+       int i, j;
+
+       /*
+        * Lookup table for extended iostat flags to nvlist names.
+        * Basically a list of all the nvpairs a flag requires.
+        */
+       static const char *vsx_type_to_nvlist[IOS_COUNT][10] = {
+               [IOS_L_HISTO] = {
+                   ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO,
+                   ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO,
+                   ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO,
+                   ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO,
+                   ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO,
+                   ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO,
+                   ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO,
+                   ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO,
+                   ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO,
+                   NULL},
+               [IOS_LATENCY] = {
+                   ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO,
+                   ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO,
+                   ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO,
+                   ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO,
+                   NULL},
+               [IOS_QUEUES] = {
+                   ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE,
+                   ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE,
+                   ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE,
+                   ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE,
+                   ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE,
+                   NULL}
+       };
+
+       config = zpool_get_config(zhp, NULL);
+       verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
+           &nvroot) == 0);
+
+       /* Default stats are always supported, but for completeness.. */
+       if (nvlist_exists(nvroot, ZPOOL_CONFIG_VDEV_STATS))
+               flags |= IOS_DEFAULT_M;
+
+       /* Get our extended stats nvlist from the main list */
+       if (nvlist_lookup_nvlist(nvroot, ZPOOL_CONFIG_VDEV_STATS_EX,
+           &nvx) != 0) {
+               /*
+                * No extended stats; they're probably running an older
+                * module.  No big deal, we support that too.
+                */
+               goto end;
+       }
+
+       /* For each extended stat, make sure all its nvpairs are supported */
+       for (j = 0; j < ARRAY_SIZE(vsx_type_to_nvlist); j++) {
+               if (!vsx_type_to_nvlist[j][0])
+                       continue;
+
+               /* Start off by assuming the flag is supported, then check */
+               flags |= (1ULL << j);
+               for (i = 0; vsx_type_to_nvlist[j][i]; i++) {
+                       if (!nvlist_exists(nvx, vsx_type_to_nvlist[j][i])) {
+                               /* flag isn't supported */
+                               flags = flags & ~(1ULL  << j);
+                               break;
+                       }
+               }
+       }
+end:
+       *mask = *mask & flags;
+       return (0);
+}
+
+/*
+ * Return a bitmask of stats that are supported on all pools by both the module
+ * and zpool iostat.
+ */
+static uint64_t
+get_stat_flags(zpool_list_t *list)
+{
+       uint64_t mask = -1;
+
+       /*
+        * get_stat_flags_cb() will lop off bits from "mask" until only the
+        * flags that are supported on all pools remain.
+        */
+       pool_list_iter(list, B_FALSE, get_stat_flags_cb, &mask);
+       return (mask);
+}
+
+/*
+ * Return 1 if cb_data->cb_vdev_names[0] is this vdev's name, 0 otherwise.
+ */
+static int
+is_vdev_cb(zpool_handle_t *zhp, nvlist_t *nv, void *cb_data)
+{
+       iostat_cbdata_t *cb = cb_data;
+       char *name;
+
+       name = zpool_vdev_name(g_zfs, zhp, nv, cb->cb_name_flags);
+
+       if (strcmp(name, cb->cb_vdev_names[0]) == 0)
+               return (1); /* match */
+
+       return (0);
+}
+
+/*
+ * Returns 1 if cb_data->cb_vdev_names[0] is a vdev name, 0 otherwise.
+ */
+static int
+is_vdev(zpool_handle_t *zhp, void *cb_data)
+{
+       return (for_each_vdev(zhp, is_vdev_cb, cb_data));
+}
+
+/*
+ * Check if vdevs are in a pool
+ *
+ * Return 1 if all argv[] strings are vdev names in pool "pool_name". Otherwise
+ * return 0.  If pool_name is NULL, then search all pools.
+ */
+static int
+are_vdevs_in_pool(int argc, char **argv, char *pool_name,
+    iostat_cbdata_t *cb)
+{
+       char **tmp_name;
+       int ret = 0;
+       int i;
+       int pool_count = 0;
+
+       if ((argc == 0) || !*argv)
+               return (0);
+
+       if (pool_name)
+               pool_count = 1;
+
+       /* Temporarily hijack cb_vdev_names for a second... */
+       tmp_name = cb->cb_vdev_names;
+
+       /* Go though our list of prospective vdev names */
+       for (i = 0; i < argc; i++) {
+               cb->cb_vdev_names = argv + i;
+
+               /* Is this name a vdev in our pools? */
+               ret = for_each_pool(pool_count, &pool_name, B_TRUE, NULL,
+                   is_vdev, cb);
+               if (!ret) {
+                       /* No match */
+                       break;
+               }
+       }
+
+       cb->cb_vdev_names = tmp_name;
+
+       return (ret);
+}
+
+static int
+is_pool_cb(zpool_handle_t *zhp, void *data)
+{
+       char *name = data;
+       if (strcmp(name, zpool_get_name(zhp)) == 0)
+               return (1);
+
+       return (0);
+}
+
+/*
+ * Do we have a pool named *name?  If so, return 1, otherwise 0.
+ */
+static int
+is_pool(char *name)
+{
+       return (for_each_pool(0, NULL, B_TRUE, NULL,  is_pool_cb, name));
+}
+
+/* Are all our argv[] strings pool names?  If so return 1, 0 otherwise. */
+static int
+are_all_pools(int argc, char **argv) {
+       if ((argc == 0) || !*argv)
+               return (0);
+
+       while (--argc >= 0)
+               if (!is_pool(argv[argc]))
+                       return (0);
+
+       return (1);
+}
+
+/*
+ * Helper function to print out vdev/pool names we can't resolve.  Used for an
+ * error message.
+ */
+static void
+error_list_unresolved_vdevs(int argc, char **argv, char *pool_name,
+    iostat_cbdata_t *cb)
+{
+       int i;
+       char *name;
+       char *str;
+       for (i = 0; i < argc; i++) {
+               name = argv[i];
+
+               if (is_pool(name))
+                       str = gettext("pool");
+               else if (are_vdevs_in_pool(1, &name, pool_name, cb))
+                       str = gettext("vdev in this pool");
+               else if (are_vdevs_in_pool(1, &name, NULL, cb))
+                       str = gettext("vdev in another pool");
+               else
+                       str = gettext("unknown");
+
+               fprintf(stderr, "\t%s (%s)\n", name, str);
+       }
+}
+
+/*
+ * Same as get_interval_count(), but with additional checks to not misinterpret
+ * guids as interval/count values.  Assumes VDEV_NAME_GUID is set in
+ * cb.cb_name_flags.
+ */
+static void
+get_interval_count_filter_guids(int *argc, char **argv, float *interval,
+    unsigned long *count, iostat_cbdata_t *cb)
+{
+       char **tmpargv = argv;
+       int argc_for_interval = 0;
+
+       /* Is the last arg an interval value?  Or a guid? */
+       if (*argc >= 1 && !are_vdevs_in_pool(1, &argv[*argc - 1], NULL, cb)) {
+               /*
+                * The last arg is not a guid, so it's probably an
+                * interval value.
+                */
+               argc_for_interval++;
+
+               if (*argc >= 2 &&
+                   !are_vdevs_in_pool(1, &argv[*argc - 2], NULL, cb)) {
+                       /*
+                        * The 2nd to last arg is not a guid, so it's probably
+                        * an interval value.
+                        */
+                       argc_for_interval++;
+               }
+       }
+
+       /* Point to our list of possible intervals */
+       tmpargv = &argv[*argc - argc_for_interval];
+
+       *argc = *argc - argc_for_interval;
+       get_interval_count(&argc_for_interval, tmpargv,
+           interval, count);
+}
+
+/*
+ * Floating point sleep().  Allows you to pass in a floating point value for
+ * seconds.
+ */
+static void
+fsleep(float sec) {
+       struct timespec req;
+       req.tv_sec = floor(sec);
+       req.tv_nsec = (sec - (float)req.tv_sec) * NANOSEC;
+       nanosleep(&req, NULL);
+}
+
+
+/*
+ * zpool iostat [-ghHLpPvy] [[-lq]-w] [-n name] [-T d|u]
+ *             [[ pool ...]|[pool vdev ...]|[vdev ...]]
+ *             [interval [count]]
  *
  *     -g      Display guid for individual vdev name.
  *     -L      Follow links when resolving vdev path name.
  *     -P      Display full path for vdev name.
  *     -v      Display statistics for individual vdevs
+ *     -h      Display help
+ *     -p      Display values in parsable (exact) format.
+ *     -H      Scripted mode.  Don't display headers, and separate properties
+ *             by a single tab.
+ *     -l      Display average latency
+ *     -q      Display queue depths
+ *     -w      Display histograms
  *     -T      Display a timestamp in date(1) or Unix format
  *
  * This command can be tricky because we want to be able to deal with pool
@@ -2866,17 +3847,26 @@ zpool_do_iostat(int argc, char **argv)
        int c;
        int ret;
        int npools;
-       unsigned long interval = 0, count = 0;
+       float interval = 0;
+       unsigned long count = 0;
        zpool_list_t *list;
        boolean_t verbose = B_FALSE;
+       boolean_t latency = B_FALSE, histo = B_FALSE;
+       boolean_t queues = B_FALSE, parseable = B_FALSE, scripted = B_FALSE;
        boolean_t omit_since_boot = B_FALSE;
        boolean_t guid = B_FALSE;
        boolean_t follow_links = B_FALSE;
        boolean_t full_name = B_FALSE;
        iostat_cbdata_t cb = { 0 };
 
+       /* Used for printing error message */
+       const char flag_to_arg[] = {[IOS_LATENCY] = 'l', [IOS_QUEUES] = 'q',
+           [IOS_L_HISTO] = 'w'};
+
+       uint64_t unsupported_flags;
+
        /* check options */
-       while ((c = getopt(argc, argv, "gLPT:vy")) != -1) {
+       while ((c = getopt(argc, argv, "gLPT:vyhplqwH")) != -1) {
                switch (c) {
                case 'g':
                        guid = B_TRUE;
@@ -2893,9 +3883,27 @@ zpool_do_iostat(int argc, char **argv)
                case 'v':
                        verbose = B_TRUE;
                        break;
+               case 'p':
+                       parseable = B_TRUE;
+                       break;
+               case 'l':
+                       latency = B_TRUE;
+                       break;
+               case 'q':
+                       queues = B_TRUE;
+                       break;
+               case 'H':
+                       scripted = B_TRUE;
+                       break;
+               case 'w':
+                       histo = B_TRUE;
+                       break;
                case 'y':
                        omit_since_boot = B_TRUE;
                        break;
+               case 'h':
+                       usage(B_FALSE);
+                       break;
                case '?':
                        (void) fprintf(stderr, gettext("invalid option '%c'\n"),
                            optopt);
@@ -2906,7 +3914,70 @@ zpool_do_iostat(int argc, char **argv)
        argc -= optind;
        argv += optind;
 
-       get_interval_count(&argc, argv, &interval, &count);
+       cb.cb_literal = parseable;
+       cb.cb_scripted = scripted;
+
+       if (guid)
+               cb.cb_name_flags |= VDEV_NAME_GUID;
+       if (follow_links)
+               cb.cb_name_flags |= VDEV_NAME_FOLLOW_LINKS;
+       if (full_name)
+               cb.cb_name_flags |= VDEV_NAME_PATH;
+       cb.cb_iteration = 0;
+       cb.cb_namewidth = 0;
+       cb.cb_verbose = verbose;
+
+       /* Get our interval and count values (if any) */
+       if (guid) {
+               get_interval_count_filter_guids(&argc, argv, &interval,
+                   &count, &cb);
+       } else {
+               get_interval_count(&argc, argv, &interval, &count);
+       }
+
+       if (argc == 0) {
+               /* No args, so just print the defaults. */
+       } else if (are_all_pools(argc, argv)) {
+               /* All the args are pool names */
+       } else if (are_vdevs_in_pool(argc, argv, NULL, &cb)) {
+               /* All the args are vdevs */
+               cb.cb_vdev_names = argv;
+               cb.cb_vdev_names_count = argc;
+               argc = 0; /* No pools to process */
+       } else if (are_all_pools(1, argv)) {
+               /* The first arg is a pool name */
+               if (are_vdevs_in_pool(argc - 1, argv + 1, argv[0], &cb)) {
+                       /* ...and the rest are vdev names */
+                       cb.cb_vdev_names = argv + 1;
+                       cb.cb_vdev_names_count = argc - 1;
+                       argc = 1; /* One pool to process */
+               } else {
+                       fprintf(stderr, gettext("Expected either a list of "));
+                       fprintf(stderr, gettext("pools, or list of vdevs in"));
+                       fprintf(stderr, " \"%s\", ", argv[0]);
+                       fprintf(stderr, gettext("but got:\n"));
+                       error_list_unresolved_vdevs(argc - 1, argv + 1,
+                           argv[0], &cb);
+                       fprintf(stderr, "\n");
+                       usage(B_FALSE);
+                       return (1);
+               }
+       } else {
+               /*
+                * The args don't make sense. The first arg isn't a pool name,
+                * nor are all the args vdevs.
+                */
+               fprintf(stderr, gettext("Unable to parse pools/vdevs list.\n"));
+               fprintf(stderr, "\n");
+               return (1);
+       }
+
+       if (cb.cb_vdev_names_count != 0) {
+               /*
+                * If user specified vdevs, it implies verbose.
+                */
+               cb.cb_verbose = B_TRUE;
+       }
 
        /*
         * Construct the list of all interesting pools.
@@ -2926,19 +3997,56 @@ zpool_do_iostat(int argc, char **argv)
                return (1);
        }
 
+       if (histo && (queues || latency)) {
+               pool_list_free(list);
+               (void) fprintf(stderr,
+                   gettext("-w isn't allowed with [-q|-l]\n"));
+               usage(B_FALSE);
+               return (1);
+       }
+
        /*
         * Enter the main iostat loop.
         */
        cb.cb_list = list;
-       cb.cb_verbose = verbose;
-       if (guid)
-               cb.cb_name_flags |= VDEV_NAME_GUID;
-       if (follow_links)
-               cb.cb_name_flags |= VDEV_NAME_FOLLOW_LINKS;
-       if (full_name)
-               cb.cb_name_flags |= VDEV_NAME_PATH;
-       cb.cb_iteration = 0;
-       cb.cb_namewidth = 0;
+
+       if (histo) {
+               /*
+                * Histograms tables look out of place when you try to display
+                * them with the other stats, so make a rule that you can only
+                * print histograms by themselves.
+                */
+               cb.cb_flags = IOS_L_HISTO_M;
+       } else {
+               cb.cb_flags = IOS_DEFAULT_M;
+               if (latency)
+                       cb.cb_flags |= IOS_LATENCY_M;
+               if (queues)
+                       cb.cb_flags |= IOS_QUEUES_M;
+       }
+
+       /*
+        * See if the module supports all the stats we want to display.
+        */
+       unsupported_flags = cb.cb_flags & ~get_stat_flags(list);
+       if (unsupported_flags) {
+               uint64_t f;
+               int idx;
+               fprintf(stderr,
+                   gettext("The loaded zfs module doesn't support:"));
+
+               /* for each bit set in unsupported_flags */
+               for (f = unsupported_flags; f; f &= ~(1ULL << idx)) {
+                       idx = lowbit64(f) - 1;
+                       fprintf(stderr, " -%c", flag_to_arg[idx]);
+               }
+
+               fprintf(stderr, ".  Try running a newer module.\n"),
+               pool_list_free(list);
+
+               return (1);
+       }
+
 
        for (;;) {
                if ((npools = pool_list_count(list)) == 0)
@@ -2949,7 +4057,7 @@ zpool_do_iostat(int argc, char **argv)
                         * we skip any printing.
                         */
                        boolean_t skip = (omit_since_boot &&
-                               cb.cb_iteration == 0);
+                           cb.cb_iteration == 0);
 
                        /*
                         * Refresh all statistics.  This is done as an
@@ -2958,7 +4066,7 @@ zpool_do_iostat(int argc, char **argv)
                         * properly accounted for.
                         */
                        (void) pool_list_iter(list, B_FALSE, refresh_iostat,
-                               &cb);
+                           &cb);
 
                        /*
                         * Iterate over all pools to determine the maximum width
@@ -2966,7 +4074,7 @@ zpool_do_iostat(int argc, char **argv)
                         */
                        cb.cb_namewidth = 0;
                        (void) pool_list_iter(list, B_FALSE, get_namewidth,
-                               &cb);
+                           &cb);
 
                        if (timestamp_fmt != NODATE)
                                print_timestamp(timestamp_fmt);
@@ -2974,28 +4082,38 @@ zpool_do_iostat(int argc, char **argv)
                        /*
                         * If it's the first time and we're not skipping it,
                         * or either skip or verbose mode, print the header.
+                        *
+                        * The histogram code explicitly prints its header on
+                        * every vdev, so skip this for histograms.
                         */
-                       if ((++cb.cb_iteration == 1 && !skip) ||
-                               (skip != verbose))
+                       if (((++cb.cb_iteration == 1 && !skip) ||
+                           (skip != verbose)) &&
+                           (!(cb.cb_flags & IOS_L_HISTO_M)) &&
+                           !cb.cb_scripted)
                                print_iostat_header(&cb);
 
                        if (skip) {
-                               (void) sleep(interval);
+                               (void) fsleep(interval);
                                continue;
                        }
 
-                       (void) pool_list_iter(list, B_FALSE, print_iostat, &cb);
+                       pool_list_iter(list, B_FALSE, print_iostat, &cb);
 
                        /*
                         * If there's more than one pool, and we're not in
                         * verbose mode (which prints a separator for us),
                         * then print a separator.
+                        *
+                        * In addition, if we're printing specific vdevs then
+                        * we also want an ending separator.
                         */
-                       if (npools > 1 && !verbose)
+                       if (((npools > 1 && !verbose &&
+                           !(cb.cb_flags & IOS_L_HISTO_M)) ||
+                           (!(cb.cb_flags & IOS_L_HISTO_M) &&
+                           cb.cb_vdev_names_count)) &&
+                           !cb.cb_scripted) {
                                print_iostat_separator(&cb);
-
-                       if (verbose)
-                               (void) printf("\n");
+                       }
                }
 
                /*
@@ -3010,7 +4128,7 @@ zpool_do_iostat(int argc, char **argv)
                if (count != 0 && --count == 0)
                        break;
 
-               (void) sleep(interval);
+               (void) fsleep(interval);
        }
 
        pool_list_free(list);
@@ -3352,7 +4470,8 @@ zpool_do_list(int argc, char **argv)
            "name,size,allocated,free,expandsize,fragmentation,capacity,"
            "dedupratio,health,altroot";
        char *props = default_props;
-       unsigned long interval = 0, count = 0;
+       float interval = 0;
+       unsigned long count = 0;
        zpool_list_t *list;
        boolean_t first = B_TRUE;
 
@@ -3427,7 +4546,7 @@ zpool_do_list(int argc, char **argv)
                        break;
 
                pool_list_free(list);
-               (void) sleep(interval);
+               (void) fsleep(interval);
        }
 
        if (argc == 0 && !cb.cb_scripted && pool_list_count(list) == 0) {
@@ -4776,7 +5895,8 @@ zpool_do_status(int argc, char **argv)
 {
        int c;
        int ret;
-       unsigned long interval = 0, count = 0;
+       float interval = 0;
+       unsigned long count = 0;
        status_cbdata_t cb = { 0 };
 
        /* check options */
@@ -4841,7 +5961,7 @@ zpool_do_status(int argc, char **argv)
                if (count != 0 && --count == 0)
                        break;
 
-               (void) sleep(interval);
+               (void) fsleep(interval);
        }
 
        return (0);
index c7a002efb17cff9fe4039a6df68cf2fa01d58297..df3f9bf834f4c037a36bb72540d274bb878784fd 100644 (file)
@@ -29,6 +29,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <strings.h>
+#include <ctype.h>
 
 #include "zpool_util.h"
 
@@ -84,3 +85,27 @@ num_logs(nvlist_t *nv)
        }
        return (nlogs);
 }
+
+/* Find the max element in an array of uint64_t values */
+uint64_t
+array64_max(uint64_t array[], unsigned int len) {
+       uint64_t max = 0;
+       int i;
+       for (i = 0; i < len; i++)
+               max = MAX(max, array[i]);
+
+       return (max);
+}
+
+/*
+ * Return 1 if "str" is a number string, 0 otherwise.  Works for integer and
+ * floating point numbers.
+ */
+int
+isnumber(char *str) {
+       for (; *str; str++)
+               if (!(isdigit(*str) || (*str == '.')))
+                       return (0);
+
+       return (1);
+}
index 1b4ce518f83b3de3b842130c22d2191d33de281f..f279fd5dd63dc24c8fc07b2f2748870b6704aa29 100644 (file)
@@ -38,6 +38,8 @@ extern "C" {
 void *safe_malloc(size_t);
 void zpool_no_memory(void);
 uint_t num_logs(nvlist_t *nv);
+uint64_t array64_max(uint64_t array[], unsigned int len);
+int isnumber(char *str);
 
 /*
  * Virtual device functions
@@ -55,6 +57,10 @@ nvlist_t *split_mirror_vdev(zpool_handle_t *zhp, char *newname,
 int for_each_pool(int, char **, boolean_t unavail, zprop_list_t **,
     zpool_iter_f, void *);
 
+/* Vdev list functions */
+typedef int (*pool_vdev_iter_f)(zpool_handle_t *, nvlist_t *, void *);
+int for_each_vdev(zpool_handle_t *zhp, pool_vdev_iter_f func, void *data);
+
 typedef struct zpool_list zpool_list_t;
 
 zpool_list_t *pool_list_get(int, char **, zprop_list_t **, int *);
index 3faee0adddd00611d41526af7a34de15bff6dc55..654b932843182eef5837359fac3d1dc7cc49eb28 100644 (file)
@@ -747,10 +747,21 @@ extern int zfs_unshareall(zfs_handle_t *);
 extern int zfs_deleg_share_nfs(libzfs_handle_t *, char *, char *, char *,
     void *, void *, int, zfs_share_op_t);
 
+enum zfs_nicenum_format {
+       ZFS_NICENUM_1024 = 0,
+       ZFS_NICENUM_TIME = 1,
+       ZFS_NICENUM_RAW = 2
+};
+
 /*
  * Utility function to convert a number to a human-readable form.
  */
 extern void zfs_nicenum(uint64_t, char *, size_t);
+extern void zfs_nicenum_format(uint64_t num, char *buf, size_t buflen,
+    enum zfs_nicenum_format type);
+
+
+extern void zfs_nicetime(uint64_t, char *, size_t);
 extern int zfs_nicestrtonum(libzfs_handle_t *, const char *, uint64_t *);
 
 /*
index e2974ad7ac83fd9cf6bffe7505fdd05d95b0ab76..65dba125c94129fa746f95569223aadcf0e0b6cf 100644 (file)
@@ -32,6 +32,7 @@
 #define        _SYS_FS_ZFS_H
 
 #include <sys/time.h>
+#include <sys/zio_priority.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -528,6 +529,37 @@ typedef struct zpool_rewind_policy {
 #define        ZPOOL_CONFIG_DTL                "DTL"
 #define        ZPOOL_CONFIG_SCAN_STATS         "scan_stats"    /* not stored on disk */
 #define        ZPOOL_CONFIG_VDEV_STATS         "vdev_stats"    /* not stored on disk */
+
+/* container nvlist of extended stats */
+#define        ZPOOL_CONFIG_VDEV_STATS_EX      "vdev_stats_ex"
+
+/* Active queue read/write stats */
+#define        ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE   "vdev_sync_r_active_queue"
+#define        ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE   "vdev_sync_w_active_queue"
+#define        ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE  "vdev_async_r_active_queue"
+#define        ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE  "vdev_async_w_active_queue"
+#define        ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE    "vdev_async_scrub_active_queue"
+
+/* Queue sizes */
+#define        ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE     "vdev_sync_r_pend_queue"
+#define        ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE     "vdev_sync_w_pend_queue"
+#define        ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE    "vdev_async_r_pend_queue"
+#define        ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE    "vdev_async_w_pend_queue"
+#define        ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE      "vdev_async_scrub_pend_queue"
+
+/* Latency read/write histogram stats */
+#define        ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO       "vdev_tot_r_lat_histo"
+#define        ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO       "vdev_tot_w_lat_histo"
+#define        ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO      "vdev_disk_r_lat_histo"
+#define        ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO      "vdev_disk_w_lat_histo"
+#define        ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO      "vdev_sync_r_lat_histo"
+#define        ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO      "vdev_sync_w_lat_histo"
+#define        ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO     "vdev_async_r_lat_histo"
+#define        ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO     "vdev_async_w_lat_histo"
+#define        ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO       "vdev_scrub_histo"
+
+
+
 #define        ZPOOL_CONFIG_WHOLE_DISK         "whole_disk"
 #define        ZPOOL_CONFIG_ERRCOUNT           "error_count"
 #define        ZPOOL_CONFIG_NOT_PRESENT        "not_present"
@@ -766,8 +798,49 @@ typedef struct vdev_stat {
        uint64_t        vs_scan_removing;       /* removing?    */
        uint64_t        vs_scan_processed;      /* scan processed bytes */
        uint64_t        vs_fragmentation;       /* device fragmentation */
+
 } vdev_stat_t;
 
+/*
+ * Extended stats
+ *
+ * These are stats which aren't included in the original iostat output.  For
+ * convenience, they are grouped together in vdev_stat_ex, although each stat
+ * is individually exported as a nvlist.
+ */
+typedef struct vdev_stat_ex {
+       /* Number of ZIOs issued to disk and waiting to finish */
+       uint64_t vsx_active_queue[ZIO_PRIORITY_NUM_QUEUEABLE];
+
+       /* Number of ZIOs pending to be issued to disk */
+       uint64_t vsx_pend_queue[ZIO_PRIORITY_NUM_QUEUEABLE];
+
+       /*
+        * Below are the histograms for various latencies. Buckets are in
+        * units of nanoseconds.
+        */
+
+       /*
+        * 2^37 nanoseconds = 134s. Timeouts will probably start kicking in
+        * before this.
+        */
+#define        VDEV_HISTO_BUCKETS 37
+
+       /* Amount of time in ZIO queue (ns) */
+       uint64_t vsx_queue_histo[ZIO_PRIORITY_NUM_QUEUEABLE]
+           [VDEV_HISTO_BUCKETS];
+
+       /* Total ZIO latency (ns).  Includes queuing and disk access time */
+       uint64_t vsx_total_histo[ZIO_TYPES][VDEV_HISTO_BUCKETS];
+
+       /* Amount of time to read/write the disk (ns) */
+       uint64_t vsx_disk_histo[ZIO_TYPES][VDEV_HISTO_BUCKETS];
+
+       /* "lookup the bucket for a value" macro */
+#define        HISTO(a) (a != 0 ? MIN(highbit64(a) - 1, VDEV_HISTO_BUCKETS - 1) : 0)
+
+} vdev_stat_ex_t;
+
 /*
  * DDT statistics.  Note: all fields should be 64-bit because this
  * is passed between kernel and userland as an nvlist uint64 array.
index 5abd8c0194d6111555322f8f14ba31fea2d5c8ee..4f54b1707c54383f6e6f34e16aa527a54861afa2 100644 (file)
@@ -85,7 +85,7 @@ extern void vdev_expand(vdev_t *vd, uint64_t txg);
 extern void vdev_split(vdev_t *vd);
 extern void vdev_deadman(vdev_t *vd);
 
-
+extern void vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx);
 extern void vdev_get_stats(vdev_t *vd, vdev_stat_t *vs);
 extern void vdev_clear_stats(vdev_t *vd);
 extern void vdev_stat_update(zio_t *zio, uint64_t psize);
@@ -153,6 +153,7 @@ extern uint64_t vdev_label_offset(uint64_t psize, int l, uint64_t offset);
 extern int vdev_label_number(uint64_t psise, uint64_t offset);
 extern nvlist_t *vdev_label_read_config(vdev_t *vd, uint64_t txg);
 extern void vdev_uberblock_load(vdev_t *, struct uberblock *, nvlist_t **);
+extern void vdev_config_generate_stats(vdev_t *vd, nvlist_t *nv);
 
 typedef enum {
        VDEV_LABEL_CREATE,      /* create/add a new device */
index 4958cad9c4c96008b2abaf0149ad642e7f34b25b..0d09c81c7f83030451b09a8b498d00c7a19b640c 100644 (file)
@@ -150,6 +150,7 @@ struct vdev {
        vdev_t          **vdev_child;   /* array of children            */
        uint64_t        vdev_children;  /* number of children           */
        vdev_stat_t     vdev_stat;      /* virtual device statistics    */
+       vdev_stat_ex_t  vdev_stat_ex;   /* extended statistics          */
        boolean_t       vdev_expanding; /* expand the vdev?             */
        boolean_t       vdev_reopening; /* reopen in progress?          */
        boolean_t       vdev_nonrot;    /* true if solid state          */
index e68223eb30e6cfec79411a9a6fb94f2d9557ec89..693035ee290a2ec8924a18c11980044946ba4ed8 100644 (file)
@@ -647,6 +647,7 @@ extern void delay(clock_t ticks);
 extern uint64_t physmem;
 
 extern int highbit64(uint64_t i);
+extern int lowbit64(uint64_t i);
 extern int random_get_bytes(uint8_t *ptr, size_t len);
 extern int random_get_pseudo_bytes(uint8_t *ptr, size_t len);
 
index ced7fe87bc837ad36354bf81beb989aeb656545f..9790b4a9008c6df1f2cd67ea8b5a8c2fd0123031 100644 (file)
@@ -421,7 +421,8 @@ struct zio {
        uint64_t        io_offset;
        hrtime_t        io_timestamp;   /* submitted at */
        hrtime_t        io_delta;       /* vdev queue service delta */
-       uint64_t        io_delay;       /* vdev disk service delta (ticks) */
+       hrtime_t        io_delay;       /* Device access time (disk or */
+                                       /* file). */
        avl_node_t      io_queue_node;
        avl_node_t      io_offset_node;
 
index e33b9585b1c0c027a368c93771a87a1543d520c9..3fc3589be0c12703da90d4b66d2afd565352f685 100644 (file)
@@ -29,8 +29,7 @@ typedef enum zio_priority {
        ZIO_PRIORITY_ASYNC_WRITE,       /* spa_sync() */
        ZIO_PRIORITY_SCRUB,             /* asynchronous scrub/resilver reads */
        ZIO_PRIORITY_NUM_QUEUEABLE,
-
-       ZIO_PRIORITY_NOW                /* non-queued i/os (e.g. free) */
+       ZIO_PRIORITY_NOW,               /* non-queued i/os (e.g. free) */
 } zio_priority_t;
 
 #ifdef __cplusplus
index 5d10657be582867113809b8ecf7b69e1cce726d2..c2525dd2a424e7deb787b35faff8f4d0353c1e8b 100644 (file)
@@ -39,6 +39,9 @@
 #ifndef ABS
 #define        ABS(a)          ((a) < 0 ? -(a) : (a))
 #endif
+#ifndef ARRAY_SIZE
+#define        ARRAY_SIZE(a) (sizeof (a) / sizeof (a[0]))
+#endif
 
 #define        makedevice(maj, min)    makedev(maj, min)
 #define        _sysconf(a)             sysconf(a)
index 8cacc01dd1e4749ce7eade30c977cce6f8d1d1be..789df407c47b57c1eea96c8b3d55ae625e358eb1 100644 (file)
@@ -3538,7 +3538,6 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
                 * If it's a raidz device, we need to stick in the parity level.
                 */
                if (strcmp(path, VDEV_TYPE_RAIDZ) == 0) {
-
                        verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NPARITY,
                            &value) == 0);
                        (void) snprintf(buf, sizeof (buf), "%s%llu", path,
@@ -3552,7 +3551,6 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *zhp, nvlist_t *nv,
                 */
                if (name_flags & VDEV_NAME_TYPE_ID) {
                        uint64_t id;
-
                        verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_ID,
                            &id) == 0);
                        (void) snprintf(tmpbuf, sizeof (tmpbuf), "%s-%llu",
index 57c2ac85386a13bcc53e1c20fc8285c949d70e28..926ed4ed8aa9647980a998e93878585da56450fb 100644 (file)
@@ -596,27 +596,49 @@ zfs_strdup(libzfs_handle_t *hdl, const char *str)
  * Convert a number to an appropriately human-readable output.
  */
 void
-zfs_nicenum(uint64_t num, char *buf, size_t buflen)
+zfs_nicenum_format(uint64_t num, char *buf, size_t buflen,
+    enum zfs_nicenum_format format)
 {
        uint64_t n = num;
        int index = 0;
-       char u;
+       const char *u;
+       const char *units[3][7] = {
+           [ZFS_NICENUM_1024] = {"", "K", "M", "G", "T", "P", "E"},
+           [ZFS_NICENUM_TIME] = {"ns", "us", "ms", "s", "?", "?", "?"}
+       };
+
+       const int units_len[] = {[ZFS_NICENUM_1024] = 6,
+           [ZFS_NICENUM_TIME] = 4};
+
+       const int k_unit[] = {  [ZFS_NICENUM_1024] = 1024,
+           [ZFS_NICENUM_TIME] = 1000};
 
-       while (n >= 1024 && index < 6) {
-               n /= 1024;
+       double val;
+
+       if (format == ZFS_NICENUM_RAW) {
+               snprintf(buf, buflen, "%llu", (u_longlong_t) num);
+               return;
+       }
+
+
+       while (n >= k_unit[format] && index < units_len[format]) {
+               n /= k_unit[format];
                index++;
        }
 
-       u = " KMGTPE"[index];
+       u = units[format][index];
 
-       if (index == 0) {
-               (void) snprintf(buf, buflen, "%llu", (u_longlong_t) n);
-       } else if ((num & ((1ULL << 10 * index) - 1)) == 0) {
+       /* Don't print 0ns times */
+       if ((format == ZFS_NICENUM_TIME) && (num == 0)) {
+               (void) snprintf(buf, buflen, "-");
+       } else if ((index == 0) || ((num %
+           (uint64_t) powl(k_unit[format], index)) == 0)) {
                /*
                 * If this is an even multiple of the base, always display
                 * without any decimal precision.
                 */
-               (void) snprintf(buf, buflen, "%llu%c", (u_longlong_t) n, u);
+               (void) snprintf(buf, buflen, "%llu%s", (u_longlong_t) n, u);
+
        } else {
                /*
                 * We want to choose a precision that reflects the best choice
@@ -629,13 +651,61 @@ zfs_nicenum(uint64_t num, char *buf, size_t buflen)
                 */
                int i;
                for (i = 2; i >= 0; i--) {
-                       if (snprintf(buf, buflen, "%.*f%c", i,
-                           (double)num / (1ULL << 10 * index), u) <= 5)
-                               break;
+                       val = (double) num /
+                           (uint64_t) powl(k_unit[format], index);
+
+                       /*
+                        * Don't print floating point values for time.  Note,
+                        * we use floor() instead of round() here, since
+                        * round can result in undesirable results.  For
+                        * example, if "num" is in the range of
+                        * 999500-999999, it will print out "1000us".  This
+                        * doesn't happen if we use floor().
+                        */
+                       if (format == ZFS_NICENUM_TIME) {
+                               if (snprintf(buf, buflen, "%d%s",
+                                   (unsigned int) floor(val), u) <= 5)
+                                       break;
+
+                       } else {
+                               if (snprintf(buf, buflen, "%.*f%s", i,
+                                   val, u) <= 5)
+                                       break;
+                       }
                }
        }
 }
 
+/*
+ * Convert a number to an appropriately human-readable output.
+ */
+void
+zfs_nicenum(uint64_t num, char *buf, size_t buflen)
+{
+       zfs_nicenum_format(num, buf, buflen, ZFS_NICENUM_1024);
+}
+
+/*
+ * Convert a time to an appropriately human-readable output.
+ * @num:       Time in nanoseconds
+ */
+void
+zfs_nicetime(uint64_t num, char *buf, size_t buflen)
+{
+       zfs_nicenum_format(num, buf, buflen, ZFS_NICENUM_TIME);
+}
+
+/*
+ * Print out a raw number with correct column spacing
+ */
+void
+zfs_niceraw(uint64_t num, char *buf, size_t buflen)
+{
+       zfs_nicenum_format(num, buf, buflen, ZFS_NICENUM_RAW);
+}
+
+
+
 void
 libzfs_print_on_error(libzfs_handle_t *hdl, boolean_t printerr)
 {
index 49d17ece3273299a98c14907d2a8ec7d1517d9e4..3d85093e2da33bd50f6e43fca2893f446726d860 100644 (file)
@@ -1071,6 +1071,50 @@ highbit64(uint64_t i)
        return (h);
 }
 
+/*
+ * Find lowest one bit set.
+ * Returns bit number + 1 of lowest bit that is set, otherwise returns 0.
+ * This is basically a reimplementation of ffsll(), which is GNU specific.
+ */
+int
+lowbit64(uint64_t i)
+{
+       register int h = 64;
+       if (i == 0)
+               return (0);
+
+       if (i & 0x00000000ffffffffULL)
+               h -= 32;
+       else
+               i >>= 32;
+
+       if (i & 0x0000ffff)
+               h -= 16;
+       else
+               i >>= 16;
+
+       if (i & 0x00ff)
+               h -= 8;
+       else
+               i >>= 8;
+
+       if (i & 0x0f)
+               h -= 4;
+       else
+               i >>= 4;
+
+       if (i & 0x3)
+               h -= 2;
+       else
+               i >>= 2;
+
+       if (i & 0x1)
+               h -= 1;
+
+       return (h);
+}
+
+
 static int random_fd = -1, urandom_fd = -1;
 
 static int
index 231043d75bed52340313aa467078e72a6313961c..7a0748c0322685835dd7a18770f1aac0cd9d58cc 100644 (file)
@@ -67,7 +67,7 @@ static void
 show_vdev_stats(const char *desc, const char *ctype, nvlist_t *nv, int indent)
 {
        vdev_stat_t *vs;
-       vdev_stat_t v0 = { 0 };
+       vdev_stat_t *v0 = { 0 };
        uint64_t sec;
        uint64_t is_log = 0;
        nvlist_t **child;
@@ -76,6 +76,8 @@ show_vdev_stats(const char *desc, const char *ctype, nvlist_t *nv, int indent)
        char rops[6], wops[6], rbytes[6], wbytes[6], rerr[6], werr[6], cerr[6];
        char *prefix = "";
 
+       v0 = umem_zalloc(sizeof (*v0), UMEM_NOFAIL);
+
        if (indent == 0 && desc != NULL) {
                (void) printf("                           "
                    " capacity   operations   bandwidth  ---- errors ----\n");
@@ -91,7 +93,7 @@ show_vdev_stats(const char *desc, const char *ctype, nvlist_t *nv, int indent)
 
                if (nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
                    (uint64_t **)&vs, &c) != 0)
-                       vs = &v0;
+                       vs = v0;
 
                sec = MAX(1, vs->vs_timestamp / NANOSEC);
 
@@ -114,6 +116,7 @@ show_vdev_stats(const char *desc, const char *ctype, nvlist_t *nv, int indent)
                    vs->vs_space ? 6 : 0, vs->vs_space ? avail : "",
                    rops, wops, rbytes, wbytes, rerr, werr, cerr);
        }
+       free(v0);
 
        if (nvlist_lookup_nvlist_array(nv, ctype, &child, &children) != 0)
                return;
index bcbcaa249abfff510103fee46e3498f00c491944..1f14eee98734d7a19fcac718718ead598524a707 100644 (file)
@@ -95,7 +95,9 @@ zpool \- configures ZFS storage pools
 
 .LP
 .nf
-\fBzpool iostat\fR [\fB-T\fR d | u ] [\fB-gLPvy\fR] [\fIpool\fR] ... [\fIinterval\fR[\fIcount\fR]]
+\fB\fBzpool iostat\fR [\fB-T\fR \fBd\fR | \fBu\fR] [\fB-ghHLpPvy\fR] [\fB-G\fR|[\fB-lq\fR]]
+     [[\fIpool\fR ...]|[\fIpool vdev\fR ...]|[\fIvdev\fR ...]] [\fIinterval\fR[\fIcount\fR]]\fR
+
 .fi
 
 .LP
@@ -1677,11 +1679,22 @@ Scan using the default search path, the libblkid cache will not be consulted.  A
 .ne 2
 .mk
 .na
-\fB\fBzpool iostat\fR [\fB-T\fR \fBd\fR | \fBu\fR] [\fB-gLPvy\fR] [\fIpool\fR] ... [\fIinterval\fR[\fIcount\fR]]\fR
+\fB\fBzpool iostat\fR [\fB-T\fR \fBd\fR | \fBu\fR] [\fB-ghHLpPvy\fR] [\fB-w\fR|[\fB-lq\fR]] [[\fIpool\fR ...]|[\fIpool vdev\fR ...]|[\fIvdev\fR ...]] [\fIinterval\fR[\fIcount\fR]]\fR
+
 .ad
 .sp .6
 .RS 4n
-Displays \fBI/O\fR statistics for the given pools. When given an interval, the statistics are printed every \fIinterval\fR seconds until \fBCtrl-C\fR is pressed. If no \fIpools\fR are specified, statistics for every pool in the system is shown. If \fIcount\fR is specified, the command exits after \fIcount\fR reports are printed.
+Displays \fBI/O\fR statistics for the given \fIpool\fRs/\fIvdev\fRs. You can
+pass in a list of \fIpool\fRs, a \fIpool\fR and list of \fIvdev\fRs in that
+\fIpool\fR, or a list of any \fIvdev\fRs from any \fIpool\fR. If no items are
+specified, statistics for every pool in the system are shown.  When given an
+interval, the statistics are printed every \fIinterval\fR seconds until
+\fBCtrl-C\fR is pressed. If \fIcount\fR is specified, the command exits after
+\fIcount\fR reports are printed.  The first report printed is always the
+statistics since boot regardless of whether \fIinterval\fR and \fIcount\fR
+are passed.  However, this behavior can be suppressed with the -y flag.  Also
+note that the units of 'K', 'M', 'G'...  that are printed in the report are in
+base 1024.  To get the raw values, use the \fB-p\fR flag.
 .sp
 .ne 2
 .mk
@@ -1706,6 +1719,17 @@ Specify \fBu\fR for a printed representation of the internal representation of t
 Display vdev GUIDs instead of the normal device names. These GUIDs can be used in place of device names for the zpool detach/offline/remove/replace commands.
 .RE
 
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-H\fR\fR
+.ad
+.RS 12n
+.rt
+Scripted mode. Do not display headers, and separate fields by a single tab instead of arbitrary space.
+.RE
+
 .sp
 .ne 2
 .mk
@@ -1717,6 +1741,17 @@ Display vdev GUIDs instead of the normal device names. These GUIDs can be used i
 Display real paths for vdevs resolving all symbolic links. This can be used to look up the current block device name regardless of the /dev/disk/ path used to open it.
 .RE
 
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-p\fR\fR
+.ad
+.RS 12n
+.rt
+Display numbers in parseable (exact) values.  Time values are in nanoseconds.
+.RE
+
 .sp
 .ne 2
 .mk
@@ -1749,9 +1784,177 @@ Verbose statistics. Reports usage statistics for individual \fIvdevs\fR within t
 .rt
 Omit statistics since boot.  Normally the first line of output reports the statistics since boot.  This option suppresses that first line of output.
 .RE
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-w\fR\fR
+.ad
+.RS 12n
+.rt
+Display latency histograms:
+
+.sp
+.ne 2
+.mk
+.na
+total_wait:
+.ad
+.RS 20n
+.rt
+Total IO time (queuing + disk IO time).
+.RE
+.ne 2
+.mk
+.na
+disk_wait:
+.ad
+.RS 20n
+.rt
+Disk IO time (time reading/writing the disk).
+.RE
+.ne 2
+.mk
+.na
+syncq_wait:
+.ad
+.RS 20n
+.rt
+Amount of time IO spent in synchronous priority queues.  Does not include
+disk time.
+.RE
+.ne 2
+.mk
+.na
+asyncq_wait:
+.ad
+.RS 20n
+.rt
+Amount of time IO spent in asynchronous priority queues.  Does not include
+disk time.
+.RE
+.ne 2
+.mk
+.na
+scrub:
+.ad
+.RS 20n
+.rt
+Amount of time IO spent in scrub queue. Does not include disk time.
+
+
+.RE
+
+All histogram buckets are power-of-two sized.  The time labels are the end
+ranges of the buckets, so for example, a 15ns bucket stores latencies from
+8-15ns.  The last bucket is also a catch-all for latencies higher than the
+maximum.
+.RE
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-l\fR\fR
+.ad
+.RS 12n
+.rt
+Include average latency statistics:
+
+.sp
+.ne 2
+.mk
+.na
+total_wait:
+.ad
+.RS 20n
+.rt
+Average total IO time (queuing + disk IO time).
+.RE
+.ne 2
+.mk
+.na
+disk_wait:
+.ad
+.RS 20n
+.rt
+Average disk IO time (time reading/writing the disk).
+.RE
+.ne 2
+.mk
+.na
+syncq_wait:
+.ad
+.RS 20n
+.rt
+Average amount of time IO spent in synchronous priority queues.  Does not
+include disk time.
+.RE
+.ne 2
+.mk
+.na
+asyncq_wait:
+.ad
+.RS 20n
+.rt
+Average amount of time IO spent in asynchronous priority queues.  Does not
+include disk time.
+.RE
+.ne 2
+.mk
+.na
+scrub:
+.ad
+.RS 20n
+.rt
+Average queuing time in scrub queue.  Does not include disk time.
+.RE
 
+.RE
+.sp
+.ne 2
+.mk
+.na
+\fB\fB-q\fR\fR
+.ad
+.RS 12n
+.rt
+Include active queue statistics.  Each priority queue has both pending ("pend")
+and active ("activ") IOs.  Pending IOs are waiting to be issued to the disk, and
+active IOs have been issued to disk and are waiting for completion.  These stats
+are broken out by priority queue:
+.sp
+.ne 2
+.mk
+.na
+syncq_read/write:
+.ad
+.RS 20n
+.rt
+Current number of entries in synchronous priority queues.
+.RE
+.ne 2
+.mk
+.na
+asyncq_read/write:
+.ad
+.RS 20n
+.rt
+Current number of entries in asynchronous priority queues.
+.RE
+.ne 2
+.mk
+.na
+scrubq_read:
+.ad
+.RS 20n
+.rt
+Current number of entries in scrub queue.
 .RE
 
+All queue statistics are instantaneous measurements of the number of entries
+in the queues.  If you specify an interval, the measurements will be sampled
+from the end of the interval.
+.RE
 .sp
 .ne 2
 .mk
index 9246495ee178c8498577c9c25e9eeec5e363e469..c23fd7a3aad3bfffb735e8e6c12b5dc7b254bae2 100644 (file)
@@ -3350,6 +3350,8 @@ spa_add_l2cache(spa_t *spa, nvlist_t *config)
                            ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
                            == 0);
                        vdev_get_stats(vd, vs);
+                       vdev_config_generate_stats(vd, l2cache[i]);
+
                }
        }
 }
index 7114c2efcafcda71f51f37e849c9ca7e19df4c30..13739017382ab0d02257b35d461323209063e792 100644 (file)
@@ -2764,49 +2764,123 @@ vdev_accessible(vdev_t *vd, zio_t *zio)
        return (B_TRUE);
 }
 
-/*
- * Get statistics for the given vdev.
- */
-void
-vdev_get_stats(vdev_t *vd, vdev_stat_t *vs)
+static void
+vdev_get_child_stat(vdev_t *cvd, vdev_stat_t *vs, vdev_stat_t *cvs)
 {
-       spa_t *spa = vd->vdev_spa;
-       vdev_t *rvd = spa->spa_root_vdev;
-       int c, t;
+       int t;
+       for (t = 0; t < ZIO_TYPES; t++) {
+               vs->vs_ops[t] += cvs->vs_ops[t];
+               vs->vs_bytes[t] += cvs->vs_bytes[t];
+       }
 
-       ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
+       cvs->vs_scan_removing = cvd->vdev_removing;
+}
 
-       mutex_enter(&vd->vdev_stat_lock);
-       bcopy(&vd->vdev_stat, vs, sizeof (*vs));
-       vs->vs_timestamp = gethrtime() - vs->vs_timestamp;
-       vs->vs_state = vd->vdev_state;
-       vs->vs_rsize = vdev_get_min_asize(vd);
-       if (vd->vdev_ops->vdev_op_leaf)
-               vs->vs_rsize += VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE;
-       vs->vs_esize = vd->vdev_max_asize - vd->vdev_asize;
-       if (vd->vdev_aux == NULL && vd == vd->vdev_top && !vd->vdev_ishole) {
-               vs->vs_fragmentation = vd->vdev_mg->mg_fragmentation;
+/*
+ * Get extended stats
+ */
+static void
+vdev_get_child_stat_ex(vdev_t *cvd, vdev_stat_ex_t *vsx, vdev_stat_ex_t *cvsx)
+{
+       int t, b;
+       for (t = 0; t < ZIO_TYPES; t++) {
+               for (b = 0; b < VDEV_HISTO_BUCKETS; b++) {
+                       vsx->vsx_disk_histo[t][b] += cvsx->vsx_disk_histo[t][b];
+                       vsx->vsx_total_histo[t][b] +=
+                           cvsx->vsx_total_histo[t][b];
+               }
+       }
+
+       for (t = 0; t < ZIO_PRIORITY_NUM_QUEUEABLE; t++) {
+               for (b = 0; b < VDEV_HISTO_BUCKETS; b++) {
+                       vsx->vsx_queue_histo[t][b] +=
+                           cvsx->vsx_queue_histo[t][b];
+               }
+               vsx->vsx_active_queue[t] += cvsx->vsx_active_queue[t];
+               vsx->vsx_pend_queue[t] += cvsx->vsx_pend_queue[t];
        }
+}
 
+/*
+ * Get statistics for the given vdev.
+ */
+static void
+vdev_get_stats_ex_impl(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
+{
+       int c, t;
        /*
         * If we're getting stats on the root vdev, aggregate the I/O counts
         * over all top-level vdevs (i.e. the direct children of the root).
         */
-       if (vd == rvd) {
-               for (c = 0; c < rvd->vdev_children; c++) {
-                       vdev_t *cvd = rvd->vdev_child[c];
+       if (!vd->vdev_ops->vdev_op_leaf) {
+               if (vs) {
+                       memset(vs->vs_ops, 0, sizeof (vs->vs_ops));
+                       memset(vs->vs_bytes, 0, sizeof (vs->vs_bytes));
+               }
+               if (vsx)
+                       memset(vsx, 0, sizeof (*vsx));
+
+               for (c = 0; c < vd->vdev_children; c++) {
+                       vdev_t *cvd = vd->vdev_child[c];
                        vdev_stat_t *cvs = &cvd->vdev_stat;
+                       vdev_stat_ex_t *cvsx = &cvd->vdev_stat_ex;
 
-                       for (t = 0; t < ZIO_TYPES; t++) {
-                               vs->vs_ops[t] += cvs->vs_ops[t];
-                               vs->vs_bytes[t] += cvs->vs_bytes[t];
-                       }
-                       cvs->vs_scan_removing = cvd->vdev_removing;
+                       vdev_get_stats_ex_impl(cvd, cvs, cvsx);
+                       if (vs)
+                               vdev_get_child_stat(cvd, vs, cvs);
+                       if (vsx)
+                               vdev_get_child_stat_ex(cvd, vsx, cvsx);
+
+               }
+       } else {
+               /*
+                * We're a leaf.  Just copy our ZIO active queue stats in.  The
+                * other leaf stats are updated in vdev_stat_update().
+                */
+               if (!vsx)
+                       return;
+
+               memcpy(vsx, &vd->vdev_stat_ex, sizeof (vd->vdev_stat_ex));
+
+               for (t = 0; t < ARRAY_SIZE(vd->vdev_queue.vq_class); t++) {
+                       vsx->vsx_active_queue[t] =
+                           vd->vdev_queue.vq_class[t].vqc_active;
+                       vsx->vsx_pend_queue[t] = avl_numnodes(
+                           &vd->vdev_queue.vq_class[t].vqc_queued_tree);
+               }
+       }
+}
+
+void
+vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
+{
+       mutex_enter(&vd->vdev_stat_lock);
+       if (vs) {
+               bcopy(&vd->vdev_stat, vs, sizeof (*vs));
+               vs->vs_timestamp = gethrtime() - vs->vs_timestamp;
+               vs->vs_state = vd->vdev_state;
+               vs->vs_rsize = vdev_get_min_asize(vd);
+               if (vd->vdev_ops->vdev_op_leaf)
+                       vs->vs_rsize += VDEV_LABEL_START_SIZE +
+                           VDEV_LABEL_END_SIZE;
+               vs->vs_esize = vd->vdev_max_asize - vd->vdev_asize;
+               if (vd->vdev_aux == NULL && vd == vd->vdev_top &&
+                   !vd->vdev_ishole) {
+                       vs->vs_fragmentation = vd->vdev_mg->mg_fragmentation;
                }
        }
+
+       ASSERT(spa_config_held(vd->vdev_spa, SCL_ALL, RW_READER) != 0);
+       vdev_get_stats_ex_impl(vd, vs, vsx);
        mutex_exit(&vd->vdev_stat_lock);
 }
 
+void
+vdev_get_stats(vdev_t *vd, vdev_stat_t *vs)
+{
+       return (vdev_get_stats_ex(vd, vs, NULL));
+}
+
 void
 vdev_clear_stats(vdev_t *vd)
 {
@@ -2840,6 +2914,7 @@ vdev_stat_update(zio_t *zio, uint64_t psize)
        vdev_t *pvd;
        uint64_t txg = zio->io_txg;
        vdev_stat_t *vs = &vd->vdev_stat;
+       vdev_stat_ex_t *vsx = &vd->vdev_stat_ex;
        zio_type_t type = zio->io_type;
        int flags = zio->io_flags;
 
@@ -2890,8 +2965,24 @@ vdev_stat_update(zio_t *zio, uint64_t psize)
                                vs->vs_self_healed += psize;
                }
 
-               vs->vs_ops[type]++;
-               vs->vs_bytes[type] += psize;
+               /*
+                * The bytes/ops/histograms are recorded at the leaf level and
+                * aggregated into the higher level vdevs in vdev_get_stats().
+                */
+               if (vd->vdev_ops->vdev_op_leaf) {
+
+                       vs->vs_ops[type]++;
+                       vs->vs_bytes[type] += psize;
+
+                       if (zio->io_delta && zio->io_delay) {
+                               vsx->vsx_queue_histo[zio->io_priority]
+                                   [HISTO(zio->io_delta - zio->io_delay)]++;
+                               vsx->vsx_disk_histo[type]
+                                   [HISTO(zio->io_delay)]++;
+                               vsx->vsx_total_histo[type]
+                                   [HISTO(zio->io_delta)]++;
+                       }
+               }
 
                mutex_exit(&vd->vdev_stat_lock);
                return;
index 9b51ecc1d9687cd8bd805cfa2fb3de27c02adbfa..4e362226a88067d870f73fcdf4590363eeca699f 100644 (file)
@@ -100,9 +100,9 @@ vdev_disk_error(zio_t *zio)
 {
 #ifdef ZFS_DEBUG
        printk("ZFS: zio error=%d type=%d offset=%llu size=%llu "
-           "flags=%x delay=%llu\n", zio->io_error, zio->io_type,
+           "flags=%x\n", zio->io_error, zio->io_type,
            (u_longlong_t)zio->io_offset, (u_longlong_t)zio->io_size,
-           zio->io_flags, (u_longlong_t)zio->io_delay);
+           zio->io_flags);
 #endif
 }
 
@@ -410,7 +410,6 @@ vdev_disk_dio_put(dio_request_t *dr)
                vdev_disk_dio_free(dr);
 
                if (zio) {
-                       zio->io_delay = jiffies_64 - zio->io_delay;
                        zio->io_error = error;
                        ASSERT3S(zio->io_error, >=, 0);
                        if (zio->io_error)
@@ -588,8 +587,6 @@ retry:
 
        /* Extra reference to protect dio_request during vdev_submit_bio */
        vdev_disk_dio_get(dr);
-       if (zio)
-               zio->io_delay = jiffies_64;
 
        /* Submit all bio's associated with this dio */
        for (i = 0; i < dr->dr_bio_count; i++)
@@ -630,7 +627,6 @@ BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, rc)
        int rc = bio->bi_error;
 #endif
 
-       zio->io_delay = jiffies_64 - zio->io_delay;
        zio->io_error = -rc;
        if (rc && (rc == -EOPNOTSUPP))
                zio->io_vd->vdev_nowritecache = B_TRUE;
@@ -660,7 +656,6 @@ vdev_disk_io_flush(struct block_device *bdev, zio_t *zio)
        bio->bi_end_io = vdev_disk_io_flush_completion;
        bio->bi_private = zio;
        bio->bi_bdev = bdev;
-       zio->io_delay = jiffies_64;
        vdev_submit_bio(VDEV_WRITE_FLUSH_FUA, bio);
        invalidate_bdev(bdev);
 
index 3dc3d0d9d32ebf01732482d6138104ec47c3a6d9..1400aee7b7579f9a1f3a02a968e049d87995ac6a 100644 (file)
@@ -207,6 +207,107 @@ vdev_label_write(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset,
            ZIO_PRIORITY_SYNC_WRITE, flags, B_TRUE));
 }
 
+/*
+ * Generate the nvlist representing this vdev's stats
+ */
+void
+vdev_config_generate_stats(vdev_t *vd, nvlist_t *nv)
+{
+       nvlist_t *nvx;
+       vdev_stat_t *vs;
+       vdev_stat_ex_t *vsx;
+
+       vs = kmem_alloc(sizeof (*vs), KM_SLEEP);
+       vsx = kmem_alloc(sizeof (*vsx), KM_SLEEP);
+
+       vdev_get_stats_ex(vd, vs, vsx);
+       fnvlist_add_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
+           (uint64_t *)vs, sizeof (*vs) / sizeof (uint64_t));
+
+       kmem_free(vs, sizeof (*vs));
+
+       /*
+        * Add extended stats into a special extended stats nvlist.  This keeps
+        * all the extended stats nicely grouped together.  The extended stats
+        * nvlist is then added to the main nvlist.
+        */
+       nvx = fnvlist_alloc();
+
+       /* ZIOs in flight to disk */
+       fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE,
+           vsx->vsx_active_queue[ZIO_PRIORITY_SYNC_READ]);
+
+       fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE,
+           vsx->vsx_active_queue[ZIO_PRIORITY_SYNC_WRITE]);
+
+       fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE,
+           vsx->vsx_active_queue[ZIO_PRIORITY_ASYNC_READ]);
+
+       fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE,
+           vsx->vsx_active_queue[ZIO_PRIORITY_ASYNC_WRITE]);
+
+       fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE,
+           vsx->vsx_active_queue[ZIO_PRIORITY_SCRUB]);
+
+       /* ZIOs pending */
+       fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE,
+           vsx->vsx_pend_queue[ZIO_PRIORITY_SYNC_READ]);
+
+       fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE,
+           vsx->vsx_pend_queue[ZIO_PRIORITY_SYNC_WRITE]);
+
+       fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE,
+           vsx->vsx_pend_queue[ZIO_PRIORITY_ASYNC_READ]);
+
+       fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE,
+           vsx->vsx_pend_queue[ZIO_PRIORITY_ASYNC_WRITE]);
+
+       fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE,
+           vsx->vsx_pend_queue[ZIO_PRIORITY_SCRUB]);
+
+       /* Histograms */
+       fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO,
+           vsx->vsx_total_histo[ZIO_TYPE_READ],
+           ARRAY_SIZE(vsx->vsx_total_histo[ZIO_TYPE_READ]));
+
+       fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO,
+           vsx->vsx_total_histo[ZIO_TYPE_WRITE],
+           ARRAY_SIZE(vsx->vsx_total_histo[ZIO_TYPE_WRITE]));
+
+       fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO,
+           vsx->vsx_disk_histo[ZIO_TYPE_READ],
+           ARRAY_SIZE(vsx->vsx_disk_histo[ZIO_TYPE_READ]));
+
+       fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO,
+           vsx->vsx_disk_histo[ZIO_TYPE_WRITE],
+           ARRAY_SIZE(vsx->vsx_disk_histo[ZIO_TYPE_WRITE]));
+
+       fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO,
+           vsx->vsx_queue_histo[ZIO_PRIORITY_SYNC_READ],
+           ARRAY_SIZE(vsx->vsx_queue_histo[ZIO_PRIORITY_SYNC_READ]));
+
+       fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO,
+           vsx->vsx_queue_histo[ZIO_PRIORITY_SYNC_WRITE],
+           ARRAY_SIZE(vsx->vsx_queue_histo[ZIO_PRIORITY_SYNC_WRITE]));
+
+       fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO,
+           vsx->vsx_queue_histo[ZIO_PRIORITY_ASYNC_READ],
+           ARRAY_SIZE(vsx->vsx_queue_histo[ZIO_PRIORITY_ASYNC_READ]));
+
+       fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO,
+           vsx->vsx_queue_histo[ZIO_PRIORITY_ASYNC_WRITE],
+           ARRAY_SIZE(vsx->vsx_queue_histo[ZIO_PRIORITY_ASYNC_WRITE]));
+
+       fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO,
+           vsx->vsx_queue_histo[ZIO_PRIORITY_SCRUB],
+           ARRAY_SIZE(vsx->vsx_queue_histo[ZIO_PRIORITY_SCRUB]));
+
+       /* Add extended stats nvlist to main nvlist */
+       fnvlist_add_nvlist(nv, ZPOOL_CONFIG_VDEV_STATS_EX, nvx);
+
+       kmem_free(vsx, sizeof (*vsx));
+}
+
 /*
  * Generate the nvlist representing this vdev's config.
  */
@@ -215,7 +316,6 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
     vdev_config_flag_t flags)
 {
        nvlist_t *nv = NULL;
-
        nv = fnvlist_alloc();
 
        fnvlist_add_string(nv, ZPOOL_CONFIG_TYPE, vd->vdev_ops->vdev_op_type);
@@ -306,12 +406,9 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
        }
 
        if (getstats) {
-               vdev_stat_t vs;
                pool_scan_stat_t ps;
 
-               vdev_get_stats(vd, &vs);
-               fnvlist_add_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
-                   (uint64_t *)&vs, sizeof (vs) / sizeof (uint64_t));
+               vdev_config_generate_stats(vd, nv);
 
                /* provide either current or previous scan information */
                if (spa_scan_get_stats(spa, &ps) == 0) {
index 2d16e632de06108607b27888219a28973fd43163..523a924d67b0973a94549f69599af32ce7ff0087 100644 (file)
@@ -39,6 +39,7 @@
 #include <sys/ddt.h>
 #include <sys/blkptr.h>
 #include <sys/zfeature.h>
+#include <sys/time.h>
 
 /*
  * ==========================================================================
@@ -2694,6 +2695,8 @@ zio_vdev_io_start(zio_t *zio)
        uint64_t align;
        spa_t *spa = zio->io_spa;
 
+       zio->io_delay = 0;
+
        ASSERT(zio->io_error == 0);
        ASSERT(zio->io_child_error[ZIO_CHILD_VDEV] == 0);
 
@@ -2799,6 +2802,7 @@ zio_vdev_io_start(zio_t *zio)
                }
        }
 
+       zio->io_delay = gethrtime();
        vd->vdev_ops->vdev_op_io_start(zio);
        return (ZIO_PIPELINE_STOP);
 }
@@ -2815,6 +2819,9 @@ zio_vdev_io_done(zio_t *zio)
 
        ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
 
+       if (zio->io_delay)
+               zio->io_delay = gethrtime() - zio->io_delay;
+
        if (vd != NULL && vd->vdev_ops->vdev_op_leaf) {
 
                vdev_queue_io_done(zio);
@@ -3217,7 +3224,7 @@ zio_done(zio_t *zio)
         * 30 seconds to complete, post an error described the I/O delay.
         * We ignore these errors if the device is currently unavailable.
         */
-       if (zio->io_delay >= MSEC_TO_TICK(zio_delay_max)) {
+       if (zio->io_delay >= MSEC2NSEC(zio_delay_max)) {
                if (zio->io_vd != NULL && !vdev_is_dead(zio->io_vd))
                        zfs_ereport_post(FM_EREPORT_ZFS_DELAY, zio->io_spa,
                            zio->io_vd, zio, 0, 0);
index 179f82e43dcb35a0a69398190a1736bd8c94e77f..c9b882987bad346a8fd9d2221509336276874e77 100644 (file)
@@ -370,7 +370,7 @@ tests = ['zfs_list_001_pos', 'zfs_list_002_pos', 'zfs_list_003_pos',
 
 [tests/functional/cli_user/zpool_iostat]
 tests = ['zpool_iostat_001_neg', 'zpool_iostat_002_pos',
-    'zpool_iostat_003_neg']
+    'zpool_iostat_003_neg', 'zpool_iostat_004_pos']
 
 [tests/functional/cli_user/zpool_list]
 tests = ['zpool_list_001_pos', 'zpool_list_002_neg']
index 2c292b9999384d4a1052fdb7d663617023787249..621dff91f2a7ce4fcbacb2b4211cef22525ef49a 100644 (file)
@@ -4,4 +4,5 @@ dist_pkgdata_SCRIPTS = \
        cleanup.ksh \
        zpool_iostat_001_neg.ksh \
        zpool_iostat_002_pos.ksh \
-       zpool_iostat_003_neg.ksh
+       zpool_iostat_003_neg.ksh \
+       zpool_iostat_004_pos.ksh
index d275e063b13a7345760d26954e814e3404db51c8..77eb6bd34f40eb39697544b40c7fed95f4afa218 100755 (executable)
@@ -33,4 +33,4 @@
 
 DISK=${DISKS%% *}
 
-default_setup $DISK
+default_raidz_setup $DISKS
index 37062ca536692747d19adf70b4230a01cd44a94a..ec5599acef0c4004cd694861d7567439d47e9029 100755 (executable)
 
 #
 # DESCRIPTION:
-# Verify that 'zpool iostat [interval [count]' can be executed as non-root.
+# Verify that 'zpool iostat [interval [count]]' can be executed as non-root.
 #
 # STRATEGY:
 # 1. Set the interval to 1 and count to 4.
 # 2. Sleep for 4 seconds.
 # 3. Verify that the output has 4 records.
-#
+# 4. Set interval to 0.5 and count to 1 to test floating point intervals.
 
 verify_runnable "both"
 
@@ -68,4 +68,7 @@ if [[ $stat_count -ne 4 ]]; then
        log_fail "zpool iostat [pool_name] [interval] [count] failed"
 fi
 
+# Test a floating point interval value
+log_must $ZPOOL iostat -v 0.5 1
+
 log_pass "zpool iostat [pool_name ...] [interval] [count] passed"
index d73f5d5c8e6586a47059c6584039fad93cd0ad97..ae1e5a1523c2db7d08d7b73bef1ddb9be2a10401 100755 (executable)
@@ -51,13 +51,14 @@ else
 fi
 
 set -A args "" "-?" "-f" "nonexistpool" "$TESTPOOL/$TESTFS" \
-       "$testpool 1.23" "$testpool 0" "$testpool -1" "$testpool 1 0" \
-       "$testpool 0 0"
+       "$testpool 0" "$testpool -1" "$testpool 1 0" \
+       "$testpool 0 0" "$testpool -wl" "$testpool -wq"
 
 log_assert "Executing 'zpool iostat' with bad options fails"
 
 typeset -i i=1
 while [[ $i -lt ${#args[*]} ]]; do
+       log_assert "doing $ZPOOL iostat ${args[i]}"
        log_mustnot $ZPOOL iostat ${args[i]}
        ((i = i + 1))
 done
diff --git a/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_004_pos.ksh b/tests/zfs-tests/tests/functional/cli_user/zpool_iostat/zpool_iostat_004_pos.ksh
new file mode 100755 (executable)
index 0000000..70318db
--- /dev/null
@@ -0,0 +1,74 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2013 by Delphix. All rights reserved.
+#
+
+# Copyright (C) 2016 Lawrence Livermore National Security, LLC.
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Executing 'zpool iostat' command with various combinations of extended
+# stats (-vqL), parseable/script options (-pH), and misc lists of pools
+# and vdevs.
+#
+# STRATEGY:
+# 1. Create an array of mixed 'zpool iostat' options.
+# 2. Execute each element of the array.
+# 3. Verify an error code is returned.
+#
+
+verify_runnable "both"
+
+typeset testpool
+if is_global_zone ; then
+        testpool=$TESTPOOL
+else
+        testpool=${TESTPOOL%%/*}
+fi
+
+set -A args "" "-v" "-q" "-l" "-lq $TESTPOOL" "-ql ${DISKS[0]} ${DISKS[1]}" \
+       "-w $TESTPOOL ${DISKS[0]} ${DISKS[1]}" \
+       "-wp $TESTPOOL" \
+       "-qlH $TESTPOOL ${DISKS[0]}" \
+       "-vpH ${DISKS[0]}" \
+       "-wpH ${DISKS[0]}"
+
+log_assert "Executing 'zpool iostat' with extended stat options succeeds"
+log_note "testpool: $TESTPOOL, disks $DISKS"
+
+typeset -i i=1
+while [[ $i -lt ${#args[*]} ]]; do
+       log_note "doing $ZPOOL iostat ${args[i]}"
+       log_must $ZPOOL iostat ${args[i]}
+       ((i = i + 1))
+done
+
+log_pass "Executing 'zpool iostat' with extended stat options succeeds"