]> granicus.if.org Git - zfs/blob - module/zfs/spa_stats.c
Report duration and error in mmp_history entries
[zfs] / module / zfs / spa_stats.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21
22 #include <sys/zfs_context.h>
23 #include <sys/spa_impl.h>
24 #include <sys/vdev_impl.h>
25
26 /*
27  * Keeps stats on last N reads per spa_t, disabled by default.
28  */
29 int zfs_read_history = 0;
30
31 /*
32  * Include cache hits in history, disabled by default.
33  */
34 int zfs_read_history_hits = 0;
35
36 /*
37  * Keeps stats on the last N txgs, disabled by default.
38  */
39 int zfs_txg_history = 0;
40
41 /*
42  * Keeps stats on the last N MMP updates, disabled by default.
43  */
44 int zfs_multihost_history = 0;
45
46 /*
47  * ==========================================================================
48  * SPA Read History Routines
49  * ==========================================================================
50  */
51
52 /*
53  * Read statistics - Information exported regarding each arc_read call
54  */
55 typedef struct spa_read_history {
56         uint64_t        uid;            /* unique identifier */
57         hrtime_t        start;          /* time read completed */
58         uint64_t        objset;         /* read from this objset */
59         uint64_t        object;         /* read of this object number */
60         uint64_t        level;          /* block's indirection level */
61         uint64_t        blkid;          /* read of this block id */
62         char            origin[24];     /* read originated from here */
63         uint32_t        aflags;         /* ARC flags (cached, prefetch, etc.) */
64         pid_t           pid;            /* PID of task doing read */
65         char            comm[16];       /* process name of task doing read */
66         list_node_t     srh_link;
67 } spa_read_history_t;
68
69 static int
70 spa_read_history_headers(char *buf, size_t size)
71 {
72         (void) snprintf(buf, size, "%-8s %-16s %-8s %-8s %-8s %-8s %-8s "
73             "%-24s %-8s %-16s\n", "UID", "start", "objset", "object",
74             "level", "blkid", "aflags", "origin", "pid", "process");
75
76         return (0);
77 }
78
79 static int
80 spa_read_history_data(char *buf, size_t size, void *data)
81 {
82         spa_read_history_t *srh = (spa_read_history_t *)data;
83
84         (void) snprintf(buf, size, "%-8llu %-16llu 0x%-6llx "
85             "%-8lli %-8lli %-8lli 0x%-6x %-24s %-8i %-16s\n",
86             (u_longlong_t)srh->uid, srh->start,
87             (longlong_t)srh->objset, (longlong_t)srh->object,
88             (longlong_t)srh->level, (longlong_t)srh->blkid,
89             srh->aflags, srh->origin, srh->pid, srh->comm);
90
91         return (0);
92 }
93
94 /*
95  * Calculate the address for the next spa_stats_history_t entry.  The
96  * ssh->lock will be held until ksp->ks_ndata entries are processed.
97  */
98 static void *
99 spa_read_history_addr(kstat_t *ksp, loff_t n)
100 {
101         spa_t *spa = ksp->ks_private;
102         spa_stats_history_t *ssh = &spa->spa_stats.read_history;
103
104         ASSERT(MUTEX_HELD(&ssh->lock));
105
106         if (n == 0)
107                 ssh->private = list_tail(&ssh->list);
108         else if (ssh->private)
109                 ssh->private = list_prev(&ssh->list, ssh->private);
110
111         return (ssh->private);
112 }
113
114 /*
115  * When the kstat is written discard all spa_read_history_t entries.  The
116  * ssh->lock will be held until ksp->ks_ndata entries are processed.
117  */
118 static int
119 spa_read_history_update(kstat_t *ksp, int rw)
120 {
121         spa_t *spa = ksp->ks_private;
122         spa_stats_history_t *ssh = &spa->spa_stats.read_history;
123
124         if (rw == KSTAT_WRITE) {
125                 spa_read_history_t *srh;
126
127                 while ((srh = list_remove_head(&ssh->list))) {
128                         ssh->size--;
129                         kmem_free(srh, sizeof (spa_read_history_t));
130                 }
131
132                 ASSERT3U(ssh->size, ==, 0);
133         }
134
135         ksp->ks_ndata = ssh->size;
136         ksp->ks_data_size = ssh->size * sizeof (spa_read_history_t);
137
138         return (0);
139 }
140
141 static void
142 spa_read_history_init(spa_t *spa)
143 {
144         spa_stats_history_t *ssh = &spa->spa_stats.read_history;
145         char name[KSTAT_STRLEN];
146         kstat_t *ksp;
147
148         mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
149         list_create(&ssh->list, sizeof (spa_read_history_t),
150             offsetof(spa_read_history_t, srh_link));
151
152         ssh->count = 0;
153         ssh->size = 0;
154         ssh->private = NULL;
155
156         (void) snprintf(name, KSTAT_STRLEN, "zfs/%s", spa_name(spa));
157
158         ksp = kstat_create(name, 0, "reads", "misc",
159             KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
160         ssh->kstat = ksp;
161
162         if (ksp) {
163                 ksp->ks_lock = &ssh->lock;
164                 ksp->ks_data = NULL;
165                 ksp->ks_private = spa;
166                 ksp->ks_update = spa_read_history_update;
167                 kstat_set_raw_ops(ksp, spa_read_history_headers,
168                     spa_read_history_data, spa_read_history_addr);
169                 kstat_install(ksp);
170         }
171 }
172
173 static void
174 spa_read_history_destroy(spa_t *spa)
175 {
176         spa_stats_history_t *ssh = &spa->spa_stats.read_history;
177         spa_read_history_t *srh;
178         kstat_t *ksp;
179
180         ksp = ssh->kstat;
181         if (ksp)
182                 kstat_delete(ksp);
183
184         mutex_enter(&ssh->lock);
185         while ((srh = list_remove_head(&ssh->list))) {
186                 ssh->size--;
187                 kmem_free(srh, sizeof (spa_read_history_t));
188         }
189
190         ASSERT3U(ssh->size, ==, 0);
191         list_destroy(&ssh->list);
192         mutex_exit(&ssh->lock);
193
194         mutex_destroy(&ssh->lock);
195 }
196
197 void
198 spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb, uint32_t aflags)
199 {
200         spa_stats_history_t *ssh = &spa->spa_stats.read_history;
201         spa_read_history_t *srh, *rm;
202
203         ASSERT3P(spa, !=, NULL);
204         ASSERT3P(zb,  !=, NULL);
205
206         if (zfs_read_history == 0 && ssh->size == 0)
207                 return;
208
209         if (zfs_read_history_hits == 0 && (aflags & ARC_FLAG_CACHED))
210                 return;
211
212         srh = kmem_zalloc(sizeof (spa_read_history_t), KM_SLEEP);
213         strlcpy(srh->comm, getcomm(), sizeof (srh->comm));
214         srh->start  = gethrtime();
215         srh->objset = zb->zb_objset;
216         srh->object = zb->zb_object;
217         srh->level  = zb->zb_level;
218         srh->blkid  = zb->zb_blkid;
219         srh->aflags = aflags;
220         srh->pid    = getpid();
221
222         mutex_enter(&ssh->lock);
223
224         srh->uid = ssh->count++;
225         list_insert_head(&ssh->list, srh);
226         ssh->size++;
227
228         while (ssh->size > zfs_read_history) {
229                 ssh->size--;
230                 rm = list_remove_tail(&ssh->list);
231                 kmem_free(rm, sizeof (spa_read_history_t));
232         }
233
234         mutex_exit(&ssh->lock);
235 }
236
237 /*
238  * ==========================================================================
239  * SPA TXG History Routines
240  * ==========================================================================
241  */
242
243 /*
244  * Txg statistics - Information exported regarding each txg sync
245  */
246
247 typedef struct spa_txg_history {
248         uint64_t        txg;            /* txg id */
249         txg_state_t     state;          /* active txg state */
250         uint64_t        nread;          /* number of bytes read */
251         uint64_t        nwritten;       /* number of bytes written */
252         uint64_t        reads;          /* number of read operations */
253         uint64_t        writes;         /* number of write operations */
254         uint64_t        ndirty;         /* number of dirty bytes */
255         hrtime_t        times[TXG_STATE_COMMITTED]; /* completion times */
256         list_node_t     sth_link;
257 } spa_txg_history_t;
258
259 static int
260 spa_txg_history_headers(char *buf, size_t size)
261 {
262         (void) snprintf(buf, size, "%-8s %-16s %-5s %-12s %-12s %-12s "
263             "%-8s %-8s %-12s %-12s %-12s %-12s\n", "txg", "birth", "state",
264             "ndirty", "nread", "nwritten", "reads", "writes",
265             "otime", "qtime", "wtime", "stime");
266
267         return (0);
268 }
269
270 static int
271 spa_txg_history_data(char *buf, size_t size, void *data)
272 {
273         spa_txg_history_t *sth = (spa_txg_history_t *)data;
274         uint64_t open = 0, quiesce = 0, wait = 0, sync = 0;
275         char state;
276
277         switch (sth->state) {
278                 case TXG_STATE_BIRTH:           state = 'B';    break;
279                 case TXG_STATE_OPEN:            state = 'O';    break;
280                 case TXG_STATE_QUIESCED:        state = 'Q';    break;
281                 case TXG_STATE_WAIT_FOR_SYNC:   state = 'W';    break;
282                 case TXG_STATE_SYNCED:          state = 'S';    break;
283                 case TXG_STATE_COMMITTED:       state = 'C';    break;
284                 default:                        state = '?';    break;
285         }
286
287         if (sth->times[TXG_STATE_OPEN])
288                 open = sth->times[TXG_STATE_OPEN] -
289                     sth->times[TXG_STATE_BIRTH];
290
291         if (sth->times[TXG_STATE_QUIESCED])
292                 quiesce = sth->times[TXG_STATE_QUIESCED] -
293                     sth->times[TXG_STATE_OPEN];
294
295         if (sth->times[TXG_STATE_WAIT_FOR_SYNC])
296                 wait = sth->times[TXG_STATE_WAIT_FOR_SYNC] -
297                     sth->times[TXG_STATE_QUIESCED];
298
299         if (sth->times[TXG_STATE_SYNCED])
300                 sync = sth->times[TXG_STATE_SYNCED] -
301                     sth->times[TXG_STATE_WAIT_FOR_SYNC];
302
303         (void) snprintf(buf, size, "%-8llu %-16llu %-5c %-12llu "
304             "%-12llu %-12llu %-8llu %-8llu %-12llu %-12llu %-12llu %-12llu\n",
305             (longlong_t)sth->txg, sth->times[TXG_STATE_BIRTH], state,
306             (u_longlong_t)sth->ndirty,
307             (u_longlong_t)sth->nread, (u_longlong_t)sth->nwritten,
308             (u_longlong_t)sth->reads, (u_longlong_t)sth->writes,
309             (u_longlong_t)open, (u_longlong_t)quiesce, (u_longlong_t)wait,
310             (u_longlong_t)sync);
311
312         return (0);
313 }
314
315 /*
316  * Calculate the address for the next spa_stats_history_t entry.  The
317  * ssh->lock will be held until ksp->ks_ndata entries are processed.
318  */
319 static void *
320 spa_txg_history_addr(kstat_t *ksp, loff_t n)
321 {
322         spa_t *spa = ksp->ks_private;
323         spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
324
325         ASSERT(MUTEX_HELD(&ssh->lock));
326
327         if (n == 0)
328                 ssh->private = list_tail(&ssh->list);
329         else if (ssh->private)
330                 ssh->private = list_prev(&ssh->list, ssh->private);
331
332         return (ssh->private);
333 }
334
335 /*
336  * When the kstat is written discard all spa_txg_history_t entries.  The
337  * ssh->lock will be held until ksp->ks_ndata entries are processed.
338  */
339 static int
340 spa_txg_history_update(kstat_t *ksp, int rw)
341 {
342         spa_t *spa = ksp->ks_private;
343         spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
344
345         ASSERT(MUTEX_HELD(&ssh->lock));
346
347         if (rw == KSTAT_WRITE) {
348                 spa_txg_history_t *sth;
349
350                 while ((sth = list_remove_head(&ssh->list))) {
351                         ssh->size--;
352                         kmem_free(sth, sizeof (spa_txg_history_t));
353                 }
354
355                 ASSERT3U(ssh->size, ==, 0);
356         }
357
358         ksp->ks_ndata = ssh->size;
359         ksp->ks_data_size = ssh->size * sizeof (spa_txg_history_t);
360
361         return (0);
362 }
363
364 static void
365 spa_txg_history_init(spa_t *spa)
366 {
367         spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
368         char name[KSTAT_STRLEN];
369         kstat_t *ksp;
370
371         mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
372         list_create(&ssh->list, sizeof (spa_txg_history_t),
373             offsetof(spa_txg_history_t, sth_link));
374
375         ssh->count = 0;
376         ssh->size = 0;
377         ssh->private = NULL;
378
379         (void) snprintf(name, KSTAT_STRLEN, "zfs/%s", spa_name(spa));
380
381         ksp = kstat_create(name, 0, "txgs", "misc",
382             KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
383         ssh->kstat = ksp;
384
385         if (ksp) {
386                 ksp->ks_lock = &ssh->lock;
387                 ksp->ks_data = NULL;
388                 ksp->ks_private = spa;
389                 ksp->ks_update = spa_txg_history_update;
390                 kstat_set_raw_ops(ksp, spa_txg_history_headers,
391                     spa_txg_history_data, spa_txg_history_addr);
392                 kstat_install(ksp);
393         }
394 }
395
396 static void
397 spa_txg_history_destroy(spa_t *spa)
398 {
399         spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
400         spa_txg_history_t *sth;
401         kstat_t *ksp;
402
403         ksp = ssh->kstat;
404         if (ksp)
405                 kstat_delete(ksp);
406
407         mutex_enter(&ssh->lock);
408         while ((sth = list_remove_head(&ssh->list))) {
409                 ssh->size--;
410                 kmem_free(sth, sizeof (spa_txg_history_t));
411         }
412
413         ASSERT3U(ssh->size, ==, 0);
414         list_destroy(&ssh->list);
415         mutex_exit(&ssh->lock);
416
417         mutex_destroy(&ssh->lock);
418 }
419
420 /*
421  * Add a new txg to historical record.
422  */
423 void
424 spa_txg_history_add(spa_t *spa, uint64_t txg, hrtime_t birth_time)
425 {
426         spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
427         spa_txg_history_t *sth, *rm;
428
429         if (zfs_txg_history == 0 && ssh->size == 0)
430                 return;
431
432         sth = kmem_zalloc(sizeof (spa_txg_history_t), KM_SLEEP);
433         sth->txg = txg;
434         sth->state = TXG_STATE_OPEN;
435         sth->times[TXG_STATE_BIRTH] = birth_time;
436
437         mutex_enter(&ssh->lock);
438
439         list_insert_head(&ssh->list, sth);
440         ssh->size++;
441
442         while (ssh->size > zfs_txg_history) {
443                 ssh->size--;
444                 rm = list_remove_tail(&ssh->list);
445                 kmem_free(rm, sizeof (spa_txg_history_t));
446         }
447
448         mutex_exit(&ssh->lock);
449 }
450
451 /*
452  * Set txg state completion time and increment current state.
453  */
454 int
455 spa_txg_history_set(spa_t *spa, uint64_t txg, txg_state_t completed_state,
456     hrtime_t completed_time)
457 {
458         spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
459         spa_txg_history_t *sth;
460         int error = ENOENT;
461
462         if (zfs_txg_history == 0)
463                 return (0);
464
465         mutex_enter(&ssh->lock);
466         for (sth = list_head(&ssh->list); sth != NULL;
467             sth = list_next(&ssh->list, sth)) {
468                 if (sth->txg == txg) {
469                         sth->times[completed_state] = completed_time;
470                         sth->state++;
471                         error = 0;
472                         break;
473                 }
474         }
475         mutex_exit(&ssh->lock);
476
477         return (error);
478 }
479
480 /*
481  * Set txg IO stats.
482  */
483 static int
484 spa_txg_history_set_io(spa_t *spa, uint64_t txg, uint64_t nread,
485     uint64_t nwritten, uint64_t reads, uint64_t writes, uint64_t ndirty)
486 {
487         spa_stats_history_t *ssh = &spa->spa_stats.txg_history;
488         spa_txg_history_t *sth;
489         int error = ENOENT;
490
491         if (zfs_txg_history == 0)
492                 return (0);
493
494         mutex_enter(&ssh->lock);
495         for (sth = list_head(&ssh->list); sth != NULL;
496             sth = list_next(&ssh->list, sth)) {
497                 if (sth->txg == txg) {
498                         sth->nread = nread;
499                         sth->nwritten = nwritten;
500                         sth->reads = reads;
501                         sth->writes = writes;
502                         sth->ndirty = ndirty;
503                         error = 0;
504                         break;
505                 }
506         }
507         mutex_exit(&ssh->lock);
508
509         return (error);
510 }
511
512 txg_stat_t *
513 spa_txg_history_init_io(spa_t *spa, uint64_t txg, dsl_pool_t *dp)
514 {
515         txg_stat_t *ts;
516
517         if (zfs_txg_history == 0)
518                 return (NULL);
519
520         ts = kmem_alloc(sizeof (txg_stat_t), KM_SLEEP);
521
522         spa_config_enter(spa, SCL_ALL, FTAG, RW_READER);
523         vdev_get_stats(spa->spa_root_vdev, &ts->vs1);
524         spa_config_exit(spa, SCL_ALL, FTAG);
525
526         ts->txg = txg;
527         ts->ndirty = dp->dp_dirty_pertxg[txg & TXG_MASK];
528
529         spa_txg_history_set(spa, txg, TXG_STATE_WAIT_FOR_SYNC, gethrtime());
530
531         return (ts);
532 }
533
534 void
535 spa_txg_history_fini_io(spa_t *spa, txg_stat_t *ts)
536 {
537         if (ts == NULL)
538                 return;
539
540         if (zfs_txg_history == 0) {
541                 kmem_free(ts, sizeof (txg_stat_t));
542                 return;
543         }
544
545         spa_config_enter(spa, SCL_ALL, FTAG, RW_READER);
546         vdev_get_stats(spa->spa_root_vdev, &ts->vs2);
547         spa_config_exit(spa, SCL_ALL, FTAG);
548
549         spa_txg_history_set(spa, ts->txg, TXG_STATE_SYNCED, gethrtime());
550         spa_txg_history_set_io(spa, ts->txg,
551             ts->vs2.vs_bytes[ZIO_TYPE_READ] - ts->vs1.vs_bytes[ZIO_TYPE_READ],
552             ts->vs2.vs_bytes[ZIO_TYPE_WRITE] - ts->vs1.vs_bytes[ZIO_TYPE_WRITE],
553             ts->vs2.vs_ops[ZIO_TYPE_READ] - ts->vs1.vs_ops[ZIO_TYPE_READ],
554             ts->vs2.vs_ops[ZIO_TYPE_WRITE] - ts->vs1.vs_ops[ZIO_TYPE_WRITE],
555             ts->ndirty);
556
557         kmem_free(ts, sizeof (txg_stat_t));
558 }
559
560 /*
561  * ==========================================================================
562  * SPA TX Assign Histogram Routines
563  * ==========================================================================
564  */
565
566 /*
567  * Tx statistics - Information exported regarding dmu_tx_assign time.
568  */
569
570 /*
571  * When the kstat is written zero all buckets.  When the kstat is read
572  * count the number of trailing buckets set to zero and update ks_ndata
573  * such that they are not output.
574  */
575 static int
576 spa_tx_assign_update(kstat_t *ksp, int rw)
577 {
578         spa_t *spa = ksp->ks_private;
579         spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
580         int i;
581
582         if (rw == KSTAT_WRITE) {
583                 for (i = 0; i < ssh->count; i++)
584                         ((kstat_named_t *)ssh->private)[i].value.ui64 = 0;
585         }
586
587         for (i = ssh->count; i > 0; i--)
588                 if (((kstat_named_t *)ssh->private)[i-1].value.ui64 != 0)
589                         break;
590
591         ksp->ks_ndata = i;
592         ksp->ks_data_size = i * sizeof (kstat_named_t);
593
594         return (0);
595 }
596
597 static void
598 spa_tx_assign_init(spa_t *spa)
599 {
600         spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
601         char name[KSTAT_STRLEN];
602         kstat_named_t *ks;
603         kstat_t *ksp;
604         int i;
605
606         mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
607
608         ssh->count = 42; /* power of two buckets for 1ns to 2,199s */
609         ssh->size = ssh->count * sizeof (kstat_named_t);
610         ssh->private = kmem_alloc(ssh->size, KM_SLEEP);
611
612         (void) snprintf(name, KSTAT_STRLEN, "zfs/%s", spa_name(spa));
613
614         for (i = 0; i < ssh->count; i++) {
615                 ks = &((kstat_named_t *)ssh->private)[i];
616                 ks->data_type = KSTAT_DATA_UINT64;
617                 ks->value.ui64 = 0;
618                 (void) snprintf(ks->name, KSTAT_STRLEN, "%llu ns",
619                     (u_longlong_t)1 << i);
620         }
621
622         ksp = kstat_create(name, 0, "dmu_tx_assign", "misc",
623             KSTAT_TYPE_NAMED, 0, KSTAT_FLAG_VIRTUAL);
624         ssh->kstat = ksp;
625
626         if (ksp) {
627                 ksp->ks_lock = &ssh->lock;
628                 ksp->ks_data = ssh->private;
629                 ksp->ks_ndata = ssh->count;
630                 ksp->ks_data_size = ssh->size;
631                 ksp->ks_private = spa;
632                 ksp->ks_update = spa_tx_assign_update;
633                 kstat_install(ksp);
634         }
635 }
636
637 static void
638 spa_tx_assign_destroy(spa_t *spa)
639 {
640         spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
641         kstat_t *ksp;
642
643         ksp = ssh->kstat;
644         if (ksp)
645                 kstat_delete(ksp);
646
647         kmem_free(ssh->private, ssh->size);
648         mutex_destroy(&ssh->lock);
649 }
650
651 void
652 spa_tx_assign_add_nsecs(spa_t *spa, uint64_t nsecs)
653 {
654         spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
655         uint64_t idx = 0;
656
657         while (((1ULL << idx) < nsecs) && (idx < ssh->size - 1))
658                 idx++;
659
660         atomic_inc_64(&((kstat_named_t *)ssh->private)[idx].value.ui64);
661 }
662
663 /*
664  * ==========================================================================
665  * SPA IO History Routines
666  * ==========================================================================
667  */
668 static int
669 spa_io_history_update(kstat_t *ksp, int rw)
670 {
671         if (rw == KSTAT_WRITE)
672                 memset(ksp->ks_data, 0, ksp->ks_data_size);
673
674         return (0);
675 }
676
677 static void
678 spa_io_history_init(spa_t *spa)
679 {
680         spa_stats_history_t *ssh = &spa->spa_stats.io_history;
681         char name[KSTAT_STRLEN];
682         kstat_t *ksp;
683
684         mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
685
686         (void) snprintf(name, KSTAT_STRLEN, "zfs/%s", spa_name(spa));
687
688         ksp = kstat_create(name, 0, "io", "disk", KSTAT_TYPE_IO, 1, 0);
689         ssh->kstat = ksp;
690
691         if (ksp) {
692                 ksp->ks_lock = &ssh->lock;
693                 ksp->ks_private = spa;
694                 ksp->ks_update = spa_io_history_update;
695                 kstat_install(ksp);
696         }
697 }
698
699 static void
700 spa_io_history_destroy(spa_t *spa)
701 {
702         spa_stats_history_t *ssh = &spa->spa_stats.io_history;
703
704         if (ssh->kstat)
705                 kstat_delete(ssh->kstat);
706
707         mutex_destroy(&ssh->lock);
708 }
709
710 /*
711  * ==========================================================================
712  * SPA MMP History Routines
713  * ==========================================================================
714  */
715
716 /*
717  * MMP statistics - Information exported regarding each MMP update
718  */
719
720 typedef struct spa_mmp_history {
721         uint64_t        mmp_kstat_id;   /* unique # for updates */
722         uint64_t        txg;            /* txg of last sync */
723         uint64_t        timestamp;      /* UTC time of of last sync */
724         uint64_t        mmp_delay;      /* nanosec since last MMP write */
725         uint64_t        vdev_guid;      /* unique ID of leaf vdev */
726         char            *vdev_path;
727         uint64_t        vdev_label;     /* vdev label */
728         int             io_error;       /* error status of MMP write */
729         hrtime_t        duration;       /* time from submission to completion */
730         list_node_t     smh_link;
731 } spa_mmp_history_t;
732
733 static int
734 spa_mmp_history_headers(char *buf, size_t size)
735 {
736         (void) snprintf(buf, size, "%-10s %-10s %-10s %-6s %-10s %-12s %-24s "
737             "%-10s %s\n", "id", "txg", "timestamp", "error", "duration",
738             "mmp_delay", "vdev_guid", "vdev_label", "vdev_path");
739         return (0);
740 }
741
742 static int
743 spa_mmp_history_data(char *buf, size_t size, void *data)
744 {
745         spa_mmp_history_t *smh = (spa_mmp_history_t *)data;
746
747         (void) snprintf(buf, size, "%-10llu %-10llu %-10llu %-6lld %-10lld "
748             "%-12llu %-24llu %-10llu %s\n",
749             (u_longlong_t)smh->mmp_kstat_id, (u_longlong_t)smh->txg,
750             (u_longlong_t)smh->timestamp, (longlong_t)smh->io_error,
751             (longlong_t)smh->duration, (u_longlong_t)smh->mmp_delay,
752             (u_longlong_t)smh->vdev_guid, (u_longlong_t)smh->vdev_label,
753             (smh->vdev_path ? smh->vdev_path : "-"));
754
755         return (0);
756 }
757
758 /*
759  * Calculate the address for the next spa_stats_history_t entry.  The
760  * ssh->lock will be held until ksp->ks_ndata entries are processed.
761  */
762 static void *
763 spa_mmp_history_addr(kstat_t *ksp, loff_t n)
764 {
765         spa_t *spa = ksp->ks_private;
766         spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
767
768         ASSERT(MUTEX_HELD(&ssh->lock));
769
770         if (n == 0)
771                 ssh->private = list_tail(&ssh->list);
772         else if (ssh->private)
773                 ssh->private = list_prev(&ssh->list, ssh->private);
774
775         return (ssh->private);
776 }
777
778 /*
779  * When the kstat is written discard all spa_mmp_history_t entries.  The
780  * ssh->lock will be held until ksp->ks_ndata entries are processed.
781  */
782 static int
783 spa_mmp_history_update(kstat_t *ksp, int rw)
784 {
785         spa_t *spa = ksp->ks_private;
786         spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
787
788         ASSERT(MUTEX_HELD(&ssh->lock));
789
790         if (rw == KSTAT_WRITE) {
791                 spa_mmp_history_t *smh;
792
793                 while ((smh = list_remove_head(&ssh->list))) {
794                         ssh->size--;
795                         if (smh->vdev_path)
796                                 strfree(smh->vdev_path);
797                         kmem_free(smh, sizeof (spa_mmp_history_t));
798                 }
799
800                 ASSERT3U(ssh->size, ==, 0);
801         }
802
803         ksp->ks_ndata = ssh->size;
804         ksp->ks_data_size = ssh->size * sizeof (spa_mmp_history_t);
805
806         return (0);
807 }
808
809 static void
810 spa_mmp_history_init(spa_t *spa)
811 {
812         spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
813         char name[KSTAT_STRLEN];
814         kstat_t *ksp;
815
816         mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
817         list_create(&ssh->list, sizeof (spa_mmp_history_t),
818             offsetof(spa_mmp_history_t, smh_link));
819
820         ssh->count = 0;
821         ssh->size = 0;
822         ssh->private = NULL;
823
824         (void) snprintf(name, KSTAT_STRLEN, "zfs/%s", spa_name(spa));
825
826         ksp = kstat_create(name, 0, "multihost", "misc",
827             KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
828         ssh->kstat = ksp;
829
830         if (ksp) {
831                 ksp->ks_lock = &ssh->lock;
832                 ksp->ks_data = NULL;
833                 ksp->ks_private = spa;
834                 ksp->ks_update = spa_mmp_history_update;
835                 kstat_set_raw_ops(ksp, spa_mmp_history_headers,
836                     spa_mmp_history_data, spa_mmp_history_addr);
837                 kstat_install(ksp);
838         }
839 }
840
841 static void
842 spa_mmp_history_destroy(spa_t *spa)
843 {
844         spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
845         spa_mmp_history_t *smh;
846         kstat_t *ksp;
847
848         ksp = ssh->kstat;
849         if (ksp)
850                 kstat_delete(ksp);
851
852         mutex_enter(&ssh->lock);
853         while ((smh = list_remove_head(&ssh->list))) {
854                 ssh->size--;
855                 if (smh->vdev_path)
856                         strfree(smh->vdev_path);
857                 kmem_free(smh, sizeof (spa_mmp_history_t));
858         }
859
860         ASSERT3U(ssh->size, ==, 0);
861         list_destroy(&ssh->list);
862         mutex_exit(&ssh->lock);
863
864         mutex_destroy(&ssh->lock);
865 }
866
867 /*
868  * Set MMP write duration and error status in existing record.
869  */
870 int
871 spa_mmp_history_set(spa_t *spa, uint64_t mmp_kstat_id, int io_error,
872     hrtime_t duration)
873 {
874         spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
875         spa_mmp_history_t *smh;
876         int error = ENOENT;
877
878         if (zfs_multihost_history == 0 && ssh->size == 0)
879                 return (0);
880
881         mutex_enter(&ssh->lock);
882         for (smh = list_head(&ssh->list); smh != NULL;
883             smh = list_next(&ssh->list, smh)) {
884                 if (smh->mmp_kstat_id == mmp_kstat_id) {
885                         smh->io_error = io_error;
886                         smh->duration = duration;
887                         error = 0;
888                         break;
889                 }
890         }
891         mutex_exit(&ssh->lock);
892
893         return (error);
894 }
895
896 /*
897  * Add a new MMP write to historical record.
898  */
899 void
900 spa_mmp_history_add(uint64_t txg, uint64_t timestamp, uint64_t mmp_delay,
901     vdev_t *vd, int label, uint64_t mmp_kstat_id)
902 {
903         spa_t *spa = vd->vdev_spa;
904         spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
905         spa_mmp_history_t *smh, *rm;
906
907         if (zfs_multihost_history == 0 && ssh->size == 0)
908                 return;
909
910         smh = kmem_zalloc(sizeof (spa_mmp_history_t), KM_SLEEP);
911         smh->txg = txg;
912         smh->timestamp = timestamp;
913         smh->mmp_delay = mmp_delay;
914         smh->vdev_guid = vd->vdev_guid;
915         if (vd->vdev_path)
916                 smh->vdev_path = strdup(vd->vdev_path);
917         smh->vdev_label = label;
918         smh->mmp_kstat_id = mmp_kstat_id;
919
920         mutex_enter(&ssh->lock);
921
922         list_insert_head(&ssh->list, smh);
923         ssh->size++;
924
925         while (ssh->size > zfs_multihost_history) {
926                 ssh->size--;
927                 rm = list_remove_tail(&ssh->list);
928                 if (rm->vdev_path)
929                         strfree(rm->vdev_path);
930                 kmem_free(rm, sizeof (spa_mmp_history_t));
931         }
932
933         mutex_exit(&ssh->lock);
934 }
935
936 void
937 spa_stats_init(spa_t *spa)
938 {
939         spa_read_history_init(spa);
940         spa_txg_history_init(spa);
941         spa_tx_assign_init(spa);
942         spa_io_history_init(spa);
943         spa_mmp_history_init(spa);
944 }
945
946 void
947 spa_stats_destroy(spa_t *spa)
948 {
949         spa_tx_assign_destroy(spa);
950         spa_txg_history_destroy(spa);
951         spa_read_history_destroy(spa);
952         spa_io_history_destroy(spa);
953         spa_mmp_history_destroy(spa);
954 }
955
956 #if defined(_KERNEL) && defined(HAVE_SPL)
957 /* CSTYLED */
958 module_param(zfs_read_history, int, 0644);
959 MODULE_PARM_DESC(zfs_read_history,
960         "Historical statistics for the last N reads");
961
962 module_param(zfs_read_history_hits, int, 0644);
963 MODULE_PARM_DESC(zfs_read_history_hits,
964         "Include cache hits in read history");
965
966 module_param(zfs_txg_history, int, 0644);
967 MODULE_PARM_DESC(zfs_txg_history,
968         "Historical statistics for the last N txgs");
969
970 module_param(zfs_multihost_history, int, 0644);
971 MODULE_PARM_DESC(zfs_multihost_history,
972         "Historical statistics for last N multihost writes");
973 /* END CSTYLED */
974 #endif