4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2016, Intel Corporation.
28 * This file imlements the minimal FMD module API required to support the
29 * fault logic modules in ZED. This support includes module registration,
30 * memory allocation, module property accessors, basic case management,
31 * one-shot timers and SERD engines.
33 * In the ZED runtime, the modules are called from a single thread so no
34 * locking is required in this emulated FMD environment.
37 #include <sys/types.h>
38 #include <sys/fm/protocol.h>
39 #include <uuid/uuid.h>
47 #include "zfs_agents.h"
48 #include "../zed_log.h"
50 typedef struct fmd_modstat {
51 fmd_stat_t ms_accepted; /* total events accepted by module */
52 fmd_stat_t ms_caseopen; /* cases currently open */
53 fmd_stat_t ms_casesolved; /* total cases solved by module */
54 fmd_stat_t ms_caseclosed; /* total cases closed by module */
57 typedef struct fmd_module {
58 const char *mod_name; /* basename of module (ro) */
59 const fmd_hdl_info_t *mod_info; /* module info registered with handle */
60 void *mod_spec; /* fmd_hdl_get/setspecific data value */
61 fmd_stat_t *mod_ustat; /* module specific custom stats */
62 uint_t mod_ustat_cnt; /* count of ustat stats */
63 fmd_modstat_t mod_stats; /* fmd built-in per-module statistics */
64 fmd_serd_hash_t mod_serds; /* hash of serd engs owned by module */
65 char *mod_vers; /* a copy of module version string */
69 * ZED has two FMD hardwired module instances
71 fmd_module_t zfs_retire_module;
72 fmd_module_t zfs_diagnosis_module;
75 * Enable a reasonable set of defaults for libumem debugging on DEBUG builds.
80 _umem_debug_init(void)
82 return ("default,verbose"); /* $UMEM_DEBUG setting */
86 _umem_logging_init(void)
88 return ("fail,contents"); /* $UMEM_LOGGING setting */
93 * Register a module with fmd and finish module initialization.
94 * Returns an integer indicating whether it succeeded (zero) or
98 fmd_hdl_register(fmd_hdl_t *hdl, int version, const fmd_hdl_info_t *mip)
100 fmd_module_t *mp = (fmd_module_t *)hdl;
103 mp->mod_name = mip->fmdi_desc + 4; /* drop 'ZFS ' prefix */
106 /* bare minimum module stats */
107 (void) strcpy(mp->mod_stats.ms_accepted.fmds_name, "fmd.accepted");
108 (void) strcpy(mp->mod_stats.ms_caseopen.fmds_name, "fmd.caseopen");
109 (void) strcpy(mp->mod_stats.ms_casesolved.fmds_name, "fmd.casesolved");
110 (void) strcpy(mp->mod_stats.ms_caseclosed.fmds_name, "fmd.caseclosed");
112 fmd_serd_hash_create(&mp->mod_serds);
114 fmd_hdl_debug(hdl, "register module");
120 fmd_hdl_unregister(fmd_hdl_t *hdl)
122 fmd_module_t *mp = (fmd_module_t *)hdl;
123 fmd_modstat_t *msp = &mp->mod_stats;
124 const fmd_hdl_ops_t *ops = mp->mod_info->fmdi_ops;
126 /* dump generic module stats */
127 fmd_hdl_debug(hdl, "%s: %llu", msp->ms_accepted.fmds_name,
128 msp->ms_accepted.fmds_value.ui64);
129 if (ops->fmdo_close != NULL) {
130 fmd_hdl_debug(hdl, "%s: %llu", msp->ms_caseopen.fmds_name,
131 msp->ms_caseopen.fmds_value.ui64);
132 fmd_hdl_debug(hdl, "%s: %llu", msp->ms_casesolved.fmds_name,
133 msp->ms_casesolved.fmds_value.ui64);
134 fmd_hdl_debug(hdl, "%s: %llu", msp->ms_caseclosed.fmds_name,
135 msp->ms_caseclosed.fmds_value.ui64);
138 /* dump module specific stats */
139 if (mp->mod_ustat != NULL) {
142 for (i = 0; i < mp->mod_ustat_cnt; i++) {
143 fmd_hdl_debug(hdl, "%s: %llu",
144 mp->mod_ustat[i].fmds_name,
145 mp->mod_ustat[i].fmds_value.ui64);
149 fmd_serd_hash_destroy(&mp->mod_serds);
151 fmd_hdl_debug(hdl, "unregister module");
155 * fmd_hdl_setspecific() is used to associate a data pointer with
156 * the specified handle for the duration of the module's lifetime.
157 * This pointer can be retrieved using fmd_hdl_getspecific().
160 fmd_hdl_setspecific(fmd_hdl_t *hdl, void *spec)
162 fmd_module_t *mp = (fmd_module_t *)hdl;
168 * Return the module-specific data pointer previously associated
169 * with the handle using fmd_hdl_setspecific().
172 fmd_hdl_getspecific(fmd_hdl_t *hdl)
174 fmd_module_t *mp = (fmd_module_t *)hdl;
176 return (mp->mod_spec);
180 fmd_hdl_alloc(fmd_hdl_t *hdl, size_t size, int flags)
182 return (umem_alloc(size, flags));
186 fmd_hdl_zalloc(fmd_hdl_t *hdl, size_t size, int flags)
188 return (umem_zalloc(size, flags));
192 fmd_hdl_free(fmd_hdl_t *hdl, void *data, size_t size)
194 umem_free(data, size);
198 * Record a module debug message using the specified format.
201 fmd_hdl_debug(fmd_hdl_t *hdl, const char *format, ...)
205 fmd_module_t *mp = (fmd_module_t *)hdl;
207 va_start(vargs, format);
208 (void) vsnprintf(message, sizeof (message), format, vargs);
211 /* prefix message with module name */
212 zed_log_msg(LOG_INFO, "%s: %s", mp->mod_name, message);
215 /* Property Retrieval */
218 fmd_prop_get_int32(fmd_hdl_t *hdl, const char *name)
221 * These can be looked up in mp->modinfo->fmdi_props
222 * For now we just hard code for phase 2. In the
223 * future, there can be a ZED based override.
225 if (strcmp(name, "spare_on_remove") == 0)
228 if (strcmp(name, "io_N") == 0 || strcmp(name, "checksum_N") == 0)
229 return (10); /* N = 10 events */
235 fmd_prop_get_int64(fmd_hdl_t *hdl, const char *name)
238 * These can be looked up in mp->modinfo->fmdi_props
239 * For now we just hard code for phase 2. In the
240 * future, there can be a ZED based override.
242 if (strcmp(name, "remove_timeout") == 0)
243 return (15ULL * 1000ULL * 1000ULL * 1000ULL); /* 15 sec */
245 if (strcmp(name, "io_T") == 0 || strcmp(name, "checksum_T") == 0)
246 return (1000ULL * 1000ULL * 1000ULL * 600ULL); /* 10 min */
254 fmd_stat_create(fmd_hdl_t *hdl, uint_t flags, uint_t nstats, fmd_stat_t *statv)
256 fmd_module_t *mp = (fmd_module_t *)hdl;
258 if (flags == FMD_STAT_NOALLOC) {
259 mp->mod_ustat = statv;
260 mp->mod_ustat_cnt = nstats;
266 /* Case Management */
269 fmd_case_open(fmd_hdl_t *hdl, void *data)
271 fmd_module_t *mp = (fmd_module_t *)hdl;
276 cp = fmd_hdl_zalloc(hdl, sizeof (fmd_case_t), FMD_SLEEP);
278 cp->ci_state = FMD_CASE_UNSOLVED;
279 cp->ci_flags = FMD_CF_DIRTY;
281 cp->ci_bufptr = NULL;
285 uuid_unparse(uuid, cp->ci_uuid);
287 fmd_hdl_debug(hdl, "case opened (%s)", cp->ci_uuid);
288 mp->mod_stats.ms_caseopen.fmds_value.ui64++;
294 fmd_case_solve(fmd_hdl_t *hdl, fmd_case_t *cp)
296 fmd_module_t *mp = (fmd_module_t *)hdl;
299 * For ZED, the event was already sent from fmd_case_add_suspect()
302 if (cp->ci_state >= FMD_CASE_SOLVED)
303 fmd_hdl_debug(hdl, "case is already solved or closed");
305 cp->ci_state = FMD_CASE_SOLVED;
307 fmd_hdl_debug(hdl, "case solved (%s)", cp->ci_uuid);
308 mp->mod_stats.ms_casesolved.fmds_value.ui64++;
312 fmd_case_close(fmd_hdl_t *hdl, fmd_case_t *cp)
314 fmd_module_t *mp = (fmd_module_t *)hdl;
315 const fmd_hdl_ops_t *ops = mp->mod_info->fmdi_ops;
317 fmd_hdl_debug(hdl, "case closed (%s)", cp->ci_uuid);
319 if (ops->fmdo_close != NULL)
320 ops->fmdo_close(hdl, cp);
322 mp->mod_stats.ms_caseopen.fmds_value.ui64--;
323 mp->mod_stats.ms_caseclosed.fmds_value.ui64++;
325 if (cp->ci_bufptr != NULL && cp->ci_bufsiz > 0)
326 fmd_hdl_free(hdl, cp->ci_bufptr, cp->ci_bufsiz);
328 fmd_hdl_free(hdl, cp, sizeof (fmd_case_t));
332 fmd_case_uuresolved(fmd_hdl_t *hdl, const char *uuid)
334 fmd_hdl_debug(hdl, "case resolved by uuid (%s)", uuid);
338 fmd_case_solved(fmd_hdl_t *hdl, fmd_case_t *cp)
340 return ((cp->ci_state >= FMD_CASE_SOLVED) ? FMD_B_TRUE : FMD_B_FALSE);
344 fmd_case_add_ereport(fmd_hdl_t *hdl, fmd_case_t *cp, fmd_event_t *ep)
349 zed_log_fault(nvlist_t *nvl, const char *uuid, const char *code)
356 zed_log_msg(LOG_INFO, "\nzed_fault_event:");
359 zed_log_msg(LOG_INFO, "\t%s: %s", FM_SUSPECT_UUID, uuid);
360 if (nvlist_lookup_string(nvl, FM_CLASS, &strval) == 0)
361 zed_log_msg(LOG_INFO, "\t%s: %s", FM_CLASS, strval);
363 zed_log_msg(LOG_INFO, "\t%s: %s", FM_SUSPECT_DIAG_CODE, code);
364 if (nvlist_lookup_uint8(nvl, FM_FAULT_CERTAINTY, &byte) == 0)
365 zed_log_msg(LOG_INFO, "\t%s: %llu", FM_FAULT_CERTAINTY, byte);
366 if (nvlist_lookup_nvlist(nvl, FM_FAULT_RESOURCE, &rsrc) == 0) {
367 if (nvlist_lookup_string(rsrc, FM_FMRI_SCHEME, &strval) == 0)
368 zed_log_msg(LOG_INFO, "\t%s: %s", FM_FMRI_SCHEME,
370 if (nvlist_lookup_uint64(rsrc, FM_FMRI_ZFS_POOL, &guid) == 0)
371 zed_log_msg(LOG_INFO, "\t%s: %llu", FM_FMRI_ZFS_POOL,
373 if (nvlist_lookup_uint64(rsrc, FM_FMRI_ZFS_VDEV, &guid) == 0)
374 zed_log_msg(LOG_INFO, "\t%s: %llu \n", FM_FMRI_ZFS_VDEV,
380 fmd_fault_mkcode(nvlist_t *fault)
382 char *class, *code = "-";
385 * Note: message codes come from: openzfs/usr/src/cmd/fm/dicts/ZFS.po
387 if (nvlist_lookup_string(fault, FM_CLASS, &class) == 0) {
388 if (strcmp(class, "fault.fs.zfs.vdev.io") == 0)
389 code = "ZFS-8000-FD";
390 else if (strcmp(class, "fault.fs.zfs.vdev.checksum") == 0)
391 code = "ZFS-8000-GH";
392 else if (strcmp(class, "fault.fs.zfs.io_failure_wait") == 0)
393 code = "ZFS-8000-HC";
394 else if (strcmp(class, "fault.fs.zfs.io_failure_continue") == 0)
395 code = "ZFS-8000-JQ";
396 else if (strcmp(class, "fault.fs.zfs.log_replay") == 0)
397 code = "ZFS-8000-K4";
398 else if (strcmp(class, "fault.fs.zfs.pool") == 0)
399 code = "ZFS-8000-CS";
400 else if (strcmp(class, "fault.fs.zfs.device") == 0)
401 code = "ZFS-8000-D3";
408 fmd_case_add_suspect(fmd_hdl_t *hdl, fmd_case_t *cp, nvlist_t *fault)
411 const char *code = fmd_fault_mkcode(fault);
416 * payload derived from fmd_protocol_list()
419 (void) gettimeofday(&cp->ci_tv, NULL);
420 tod[0] = cp->ci_tv.tv_sec;
421 tod[1] = cp->ci_tv.tv_usec;
423 nvl = fmd_nvl_alloc(hdl, FMD_SLEEP);
425 err |= nvlist_add_uint8(nvl, FM_VERSION, FM_SUSPECT_VERSION);
426 err |= nvlist_add_string(nvl, FM_CLASS, FM_LIST_SUSPECT_CLASS);
427 err |= nvlist_add_string(nvl, FM_SUSPECT_UUID, cp->ci_uuid);
428 err |= nvlist_add_string(nvl, FM_SUSPECT_DIAG_CODE, code);
429 err |= nvlist_add_int64_array(nvl, FM_SUSPECT_DIAG_TIME, tod, 2);
430 err |= nvlist_add_uint32(nvl, FM_SUSPECT_FAULT_SZ, 1);
431 err |= nvlist_add_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST, &fault, 1);
434 zed_log_die("failed to populate nvlist");
436 zed_log_fault(fault, cp->ci_uuid, code);
437 zfs_agent_post_event(FM_LIST_SUSPECT_CLASS, NULL, nvl);
444 fmd_case_setspecific(fmd_hdl_t *hdl, fmd_case_t *cp, void *data)
450 fmd_case_getspecific(fmd_hdl_t *hdl, fmd_case_t *cp)
452 return (cp->ci_data);
456 fmd_buf_create(fmd_hdl_t *hdl, fmd_case_t *cp, const char *name, size_t size)
458 assert(strcmp(name, "data") == 0);
459 assert(cp->ci_bufptr == NULL);
460 assert(size < (1024 * 1024));
462 cp->ci_bufptr = fmd_hdl_alloc(hdl, size, FMD_SLEEP);
463 cp->ci_bufsiz = size;
467 fmd_buf_read(fmd_hdl_t *hdl, fmd_case_t *cp,
468 const char *name, void *buf, size_t size)
470 assert(strcmp(name, "data") == 0);
471 assert(cp->ci_bufptr != NULL);
472 assert(size <= cp->ci_bufsiz);
474 bcopy(cp->ci_bufptr, buf, size);
478 fmd_buf_write(fmd_hdl_t *hdl, fmd_case_t *cp,
479 const char *name, const void *buf, size_t size)
481 assert(strcmp(name, "data") == 0);
482 assert(cp->ci_bufptr != NULL);
483 assert(cp->ci_bufsiz >= size);
485 bcopy(buf, cp->ci_bufptr, size);
491 fmd_serd_create(fmd_hdl_t *hdl, const char *name, uint_t n, hrtime_t t)
493 fmd_module_t *mp = (fmd_module_t *)hdl;
495 if (fmd_serd_eng_lookup(&mp->mod_serds, name) != NULL) {
496 zed_log_msg(LOG_ERR, "failed to create SERD engine '%s': "
497 " name already exists", name);
501 (void) fmd_serd_eng_insert(&mp->mod_serds, name, n, t);
505 fmd_serd_destroy(fmd_hdl_t *hdl, const char *name)
507 fmd_module_t *mp = (fmd_module_t *)hdl;
509 fmd_serd_eng_delete(&mp->mod_serds, name);
511 fmd_hdl_debug(hdl, "serd_destroy %s", name);
515 fmd_serd_exists(fmd_hdl_t *hdl, const char *name)
517 fmd_module_t *mp = (fmd_module_t *)hdl;
519 return (fmd_serd_eng_lookup(&mp->mod_serds, name) != NULL);
523 fmd_serd_reset(fmd_hdl_t *hdl, const char *name)
525 fmd_module_t *mp = (fmd_module_t *)hdl;
528 if ((sgp = fmd_serd_eng_lookup(&mp->mod_serds, name)) == NULL) {
529 zed_log_msg(LOG_ERR, "serd engine '%s' does not exist", name);
533 fmd_serd_eng_reset(sgp);
535 fmd_hdl_debug(hdl, "serd_reset %s", name);
539 fmd_serd_record(fmd_hdl_t *hdl, const char *name, fmd_event_t *ep)
541 fmd_module_t *mp = (fmd_module_t *)hdl;
545 if ((sgp = fmd_serd_eng_lookup(&mp->mod_serds, name)) == NULL) {
546 zed_log_msg(LOG_ERR, "failed to add record to SERD engine '%s'",
548 return (FMD_B_FALSE);
550 err = fmd_serd_eng_record(sgp, ep->ev_hrt);
558 _timer_notify(union sigval sv)
560 fmd_timer_t *ftp = sv.sival_ptr;
561 fmd_hdl_t *hdl = ftp->ft_hdl;
562 fmd_module_t *mp = (fmd_module_t *)hdl;
563 const fmd_hdl_ops_t *ops = mp->mod_info->fmdi_ops;
564 struct itimerspec its;
566 fmd_hdl_debug(hdl, "timer fired (%p)", ftp->ft_tid);
568 /* disarm the timer */
569 bzero(&its, sizeof (struct itimerspec));
570 timer_settime(ftp->ft_tid, 0, &its, NULL);
572 /* Note that the fmdo_timeout can remove this timer */
573 if (ops->fmdo_timeout != NULL)
574 ops->fmdo_timeout(hdl, ftp, ftp->ft_arg);
578 * Install a new timer which will fire at least delta nanoseconds after the
579 * current time. After the timeout has expired, the module's fmdo_timeout
580 * entry point is called.
583 fmd_timer_install(fmd_hdl_t *hdl, void *arg, fmd_event_t *ep, hrtime_t delta)
586 struct itimerspec its;
589 ftp = fmd_hdl_alloc(hdl, sizeof (fmd_timer_t), FMD_SLEEP);
593 its.it_value.tv_sec = delta / 1000000000;
594 its.it_value.tv_nsec = delta % 1000000000;
595 its.it_interval.tv_sec = its.it_value.tv_sec;
596 its.it_interval.tv_nsec = its.it_value.tv_nsec;
598 sev.sigev_notify = SIGEV_THREAD;
599 sev.sigev_notify_function = _timer_notify;
600 sev.sigev_notify_attributes = NULL;
601 sev.sigev_value.sival_ptr = ftp;
603 timer_create(CLOCK_REALTIME, &sev, &ftp->ft_tid);
604 timer_settime(ftp->ft_tid, 0, &its, NULL);
606 fmd_hdl_debug(hdl, "installing timer for %d secs (%p)",
607 (int)its.it_value.tv_sec, ftp->ft_tid);
613 fmd_timer_remove(fmd_hdl_t *hdl, fmd_timer_t *ftp)
615 fmd_hdl_debug(hdl, "removing timer (%p)", ftp->ft_tid);
617 timer_delete(ftp->ft_tid);
619 fmd_hdl_free(hdl, ftp, sizeof (fmd_timer_t));
622 /* Name-Value Pair Lists */
625 fmd_nvl_create_fault(fmd_hdl_t *hdl, const char *class, uint8_t certainty,
626 nvlist_t *asru, nvlist_t *fru, nvlist_t *resource)
631 if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
632 zed_log_die("failed to xalloc fault nvlist");
634 err |= nvlist_add_uint8(nvl, FM_VERSION, FM_FAULT_VERSION);
635 err |= nvlist_add_string(nvl, FM_CLASS, class);
636 err |= nvlist_add_uint8(nvl, FM_FAULT_CERTAINTY, certainty);
639 err |= nvlist_add_nvlist(nvl, FM_FAULT_ASRU, asru);
641 err |= nvlist_add_nvlist(nvl, FM_FAULT_FRU, fru);
642 if (resource != NULL)
643 err |= nvlist_add_nvlist(nvl, FM_FAULT_RESOURCE, resource);
646 zed_log_die("failed to populate nvlist: %s\n", strerror(err));
652 * sourced from fmd_string.c
655 fmd_strmatch(const char *s, const char *p)
663 s = ""; /* treat NULL string as the empty string */
666 if ((c = *p++) == '\0')
671 p++; /* consecutive *'s can be collapsed */
677 if (fmd_strmatch(s++, p) != 0)
689 fmd_nvl_class_match(fmd_hdl_t *hdl, nvlist_t *nvl, const char *pattern)
693 return (nvl != NULL &&
694 nvlist_lookup_string(nvl, FM_CLASS, &class) == 0 &&
695 fmd_strmatch(class, pattern));
699 fmd_nvl_alloc(fmd_hdl_t *hdl, int flags)
701 nvlist_t *nvl = NULL;
703 if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
711 * ZED Agent specific APIs
715 fmd_module_hdl(const char *name)
717 if (strcmp(name, "zfs-retire") == 0)
718 return ((fmd_hdl_t *)&zfs_retire_module);
719 if (strcmp(name, "zfs-diagnosis") == 0)
720 return ((fmd_hdl_t *)&zfs_diagnosis_module);
726 fmd_module_initialized(fmd_hdl_t *hdl)
728 fmd_module_t *mp = (fmd_module_t *)hdl;
730 return (mp->mod_info != NULL);
734 * fmd_module_recv is called for each event that is received by
735 * the fault manager that has a class that matches one of the
736 * module's subscriptions.
739 fmd_module_recv(fmd_hdl_t *hdl, nvlist_t *nvl, const char *class)
741 fmd_module_t *mp = (fmd_module_t *)hdl;
742 const fmd_hdl_ops_t *ops = mp->mod_info->fmdi_ops;
743 fmd_event_t faux_event = {0};
748 * Will need to normalized this if we persistently store the case data
750 if (nvlist_lookup_int64_array(nvl, FM_EREPORT_TIME, &tv, &n) == 0)
751 faux_event.ev_hrt = tv[0] * NANOSEC + tv[1];
753 faux_event.ev_hrt = 0;
755 ops->fmdo_recv(hdl, &faux_event, nvl, class);
757 mp->mod_stats.ms_accepted.fmds_value.ui64++;
759 /* TBD - should we initiate fm_module_gc() periodically? */