]> granicus.if.org Git - zfs/commitdiff
Add Solaris FMA style support
authorBrian Behlendorf <behlendorf1@llnl.gov>
Thu, 29 Apr 2010 17:37:15 +0000 (10:37 -0700)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Thu, 29 Apr 2010 17:37:15 +0000 (10:37 -0700)
module/zfs/fm.c [new file with mode: 0644]
module/zfs/include/sys/fm/fs/zfs.h [moved from module/zcommon/include/sys/fm/fs/zfs.h with 100% similarity]
module/zfs/include/sys/fm/protocol.h [new file with mode: 0644]
module/zfs/include/sys/fm/util.h [new file with mode: 0644]
scripts/zfs-update.sh

diff --git a/module/zfs/fm.c b/module/zfs/fm.c
new file mode 100644 (file)
index 0000000..3cc979d
--- /dev/null
@@ -0,0 +1,1266 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Fault Management Architecture (FMA) Resource and Protocol Support
+ *
+ * The routines contained herein provide services to support kernel subsystems
+ * in publishing fault management telemetry (see PSARC 2002/412 and 2003/089).
+ *
+ * Name-Value Pair Lists
+ *
+ * The embodiment of an FMA protocol element (event, fmri or authority) is a
+ * name-value pair list (nvlist_t).  FMA-specific nvlist construtor and
+ * destructor functions, fm_nvlist_create() and fm_nvlist_destroy(), are used
+ * to create an nvpair list using custom allocators.  Callers may choose to
+ * allocate either from the kernel memory allocator, or from a preallocated
+ * buffer, useful in constrained contexts like high-level interrupt routines.
+ *
+ * Protocol Event and FMRI Construction
+ *
+ * Convenience routines are provided to construct nvlist events according to
+ * the FMA Event Protocol and Naming Schema specification for ereports and
+ * FMRIs for the dev, cpu, hc, mem, legacy hc and de schemes.
+ *
+ * ENA Manipulation
+ *
+ * Routines to generate ENA formats 0, 1 and 2 are available as well as
+ * routines to increment formats 1 and 2.  Individual fields within the
+ * ENA are extractable via fm_ena_time_get(), fm_ena_id_get(),
+ * fm_ena_format_get() and fm_ena_gen_get().
+ */
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/sysevent.h>
+#include <sys/sysevent_impl.h>
+#include <sys/nvpair.h>
+#include <sys/cmn_err.h>
+#include <sys/cpuvar.h>
+#include <sys/sysmacros.h>
+#include <sys/systm.h>
+#include <sys/ddifm.h>
+#include <sys/ddifm_impl.h>
+#include <sys/spl.h>
+#include <sys/dumphdr.h>
+#include <sys/compress.h>
+#include <sys/cpuvar.h>
+#include <sys/console.h>
+#include <sys/panic.h>
+#include <sys/kobj.h>
+#include <sys/sunddi.h>
+#include <sys/systeminfo.h>
+#include <sys/sysevent/eventdefs.h>
+#include <sys/fm/util.h>
+#include <sys/fm/protocol.h>
+
+/*
+ * URL and SUNW-MSG-ID value to display for fm_panic(), defined below.  These
+ * values must be kept in sync with the FMA source code in usr/src/cmd/fm.
+ */
+static const char *fm_url = "http://www.sun.com/msg";
+static const char *fm_msgid = "SUNOS-8000-0G";
+static char *volatile fm_panicstr = NULL;
+
+errorq_t *ereport_errorq;
+void *ereport_dumpbuf;
+size_t ereport_dumplen;
+
+static uint_t ereport_chanlen = ERPT_EVCH_MAX;
+static evchan_t *ereport_chan = NULL;
+static ulong_t ereport_qlen = 0;
+static size_t ereport_size = 0;
+static int ereport_cols = 80;
+
+/*
+ * Common fault management kstats to record ereport generation
+ * failures
+ */
+
+struct erpt_kstat {
+       kstat_named_t   erpt_dropped;           /* num erpts dropped on post */
+       kstat_named_t   erpt_set_failed;        /* num erpt set failures */
+       kstat_named_t   fmri_set_failed;        /* num fmri set failures */
+       kstat_named_t   payload_set_failed;     /* num payload set failures */
+};
+
+static struct erpt_kstat erpt_kstat_data = {
+       { "erpt-dropped", KSTAT_DATA_UINT64 },
+       { "erpt-set-failed", KSTAT_DATA_UINT64 },
+       { "fmri-set-failed", KSTAT_DATA_UINT64 },
+       { "payload-set-failed", KSTAT_DATA_UINT64 }
+};
+
+/*ARGSUSED*/
+static void
+fm_drain(void *private, void *data, errorq_elem_t *eep)
+{
+       nvlist_t *nvl = errorq_elem_nvl(ereport_errorq, eep);
+
+       if (!panicstr)
+               (void) fm_ereport_post(nvl, EVCH_TRYHARD);
+       else
+               fm_nvprint(nvl);
+}
+
+void
+fm_init(void)
+{
+       kstat_t *ksp;
+
+       (void) sysevent_evc_bind(FM_ERROR_CHAN,
+           &ereport_chan, EVCH_CREAT | EVCH_HOLD_PEND);
+
+       (void) sysevent_evc_control(ereport_chan,
+           EVCH_SET_CHAN_LEN, &ereport_chanlen);
+
+       if (ereport_qlen == 0)
+               ereport_qlen = ERPT_MAX_ERRS * MAX(max_ncpus, 4);
+
+       if (ereport_size == 0)
+               ereport_size = ERPT_DATA_SZ;
+
+       ereport_errorq = errorq_nvcreate("fm_ereport_queue",
+           (errorq_func_t)fm_drain, NULL, ereport_qlen, ereport_size,
+           FM_ERR_PIL, ERRORQ_VITAL);
+       if (ereport_errorq == NULL)
+               panic("failed to create required ereport error queue");
+
+       ereport_dumpbuf = kmem_alloc(ereport_size, KM_SLEEP);
+       ereport_dumplen = ereport_size;
+
+       /* Initialize ereport allocation and generation kstats */
+       ksp = kstat_create("unix", 0, "fm", "misc", KSTAT_TYPE_NAMED,
+           sizeof (struct erpt_kstat) / sizeof (kstat_named_t),
+           KSTAT_FLAG_VIRTUAL);
+
+       if (ksp != NULL) {
+               ksp->ks_data = &erpt_kstat_data;
+               kstat_install(ksp);
+       } else {
+               cmn_err(CE_NOTE, "failed to create fm/misc kstat\n");
+
+       }
+}
+
+/*
+ * Formatting utility function for fm_nvprintr.  We attempt to wrap chunks of
+ * output so they aren't split across console lines, and return the end column.
+ */
+/*PRINTFLIKE4*/
+static int
+fm_printf(int depth, int c, int cols, const char *format, ...)
+{
+       va_list ap;
+       int width;
+       char c1;
+
+       va_start(ap, format);
+       width = vsnprintf(&c1, sizeof (c1), format, ap);
+       va_end(ap);
+
+       if (c + width >= cols) {
+               console_printf("\n\r");
+               c = 0;
+               if (format[0] != ' ' && depth > 0) {
+                       console_printf(" ");
+                       c++;
+               }
+       }
+
+       va_start(ap, format);
+       console_vprintf(format, ap);
+       va_end(ap);
+
+       return ((c + width) % cols);
+}
+
+/*
+ * Recursively print a nvlist in the specified column width and return the
+ * column we end up in.  This function is called recursively by fm_nvprint(),
+ * below.  We generically format the entire nvpair using hexadecimal
+ * integers and strings, and elide any integer arrays.  Arrays are basically
+ * used for cache dumps right now, so we suppress them so as not to overwhelm
+ * the amount of console output we produce at panic time.  This can be further
+ * enhanced as FMA technology grows based upon the needs of consumers.  All
+ * FMA telemetry is logged using the dump device transport, so the console
+ * output serves only as a fallback in case this procedure is unsuccessful.
+ */
+static int
+fm_nvprintr(nvlist_t *nvl, int d, int c, int cols)
+{
+       nvpair_t *nvp;
+
+       for (nvp = nvlist_next_nvpair(nvl, NULL);
+           nvp != NULL; nvp = nvlist_next_nvpair(nvl, nvp)) {
+
+               data_type_t type = nvpair_type(nvp);
+               const char *name = nvpair_name(nvp);
+
+               boolean_t b;
+               uint8_t i8;
+               uint16_t i16;
+               uint32_t i32;
+               uint64_t i64;
+               char *str;
+               nvlist_t *cnv;
+
+               if (strcmp(name, FM_CLASS) == 0)
+                       continue; /* already printed by caller */
+
+               c = fm_printf(d, c, cols, " %s=", name);
+
+               switch (type) {
+               case DATA_TYPE_BOOLEAN:
+                       c = fm_printf(d + 1, c, cols, " 1");
+                       break;
+
+               case DATA_TYPE_BOOLEAN_VALUE:
+                       (void) nvpair_value_boolean_value(nvp, &b);
+                       c = fm_printf(d + 1, c, cols, b ? "1" : "0");
+                       break;
+
+               case DATA_TYPE_BYTE:
+                       (void) nvpair_value_byte(nvp, &i8);
+                       c = fm_printf(d + 1, c, cols, "%x", i8);
+                       break;
+
+               case DATA_TYPE_INT8:
+                       (void) nvpair_value_int8(nvp, (void *)&i8);
+                       c = fm_printf(d + 1, c, cols, "%x", i8);
+                       break;
+
+               case DATA_TYPE_UINT8:
+                       (void) nvpair_value_uint8(nvp, &i8);
+                       c = fm_printf(d + 1, c, cols, "%x", i8);
+                       break;
+
+               case DATA_TYPE_INT16:
+                       (void) nvpair_value_int16(nvp, (void *)&i16);
+                       c = fm_printf(d + 1, c, cols, "%x", i16);
+                       break;
+
+               case DATA_TYPE_UINT16:
+                       (void) nvpair_value_uint16(nvp, &i16);
+                       c = fm_printf(d + 1, c, cols, "%x", i16);
+                       break;
+
+               case DATA_TYPE_INT32:
+                       (void) nvpair_value_int32(nvp, (void *)&i32);
+                       c = fm_printf(d + 1, c, cols, "%x", i32);
+                       break;
+
+               case DATA_TYPE_UINT32:
+                       (void) nvpair_value_uint32(nvp, &i32);
+                       c = fm_printf(d + 1, c, cols, "%x", i32);
+                       break;
+
+               case DATA_TYPE_INT64:
+                       (void) nvpair_value_int64(nvp, (void *)&i64);
+                       c = fm_printf(d + 1, c, cols, "%llx",
+                           (u_longlong_t)i64);
+                       break;
+
+               case DATA_TYPE_UINT64:
+                       (void) nvpair_value_uint64(nvp, &i64);
+                       c = fm_printf(d + 1, c, cols, "%llx",
+                           (u_longlong_t)i64);
+                       break;
+
+               case DATA_TYPE_HRTIME:
+                       (void) nvpair_value_hrtime(nvp, (void *)&i64);
+                       c = fm_printf(d + 1, c, cols, "%llx",
+                           (u_longlong_t)i64);
+                       break;
+
+               case DATA_TYPE_STRING:
+                       (void) nvpair_value_string(nvp, &str);
+                       c = fm_printf(d + 1, c, cols, "\"%s\"",
+                           str ? str : "<NULL>");
+                       break;
+
+               case DATA_TYPE_NVLIST:
+                       c = fm_printf(d + 1, c, cols, "[");
+                       (void) nvpair_value_nvlist(nvp, &cnv);
+                       c = fm_nvprintr(cnv, d + 1, c, cols);
+                       c = fm_printf(d + 1, c, cols, " ]");
+                       break;
+
+               case DATA_TYPE_NVLIST_ARRAY: {
+                       nvlist_t **val;
+                       uint_t i, nelem;
+
+                       c = fm_printf(d + 1, c, cols, "[");
+                       (void) nvpair_value_nvlist_array(nvp, &val, &nelem);
+                       for (i = 0; i < nelem; i++) {
+                               c = fm_nvprintr(val[i], d + 1, c, cols);
+                       }
+                       c = fm_printf(d + 1, c, cols, " ]");
+                       }
+                       break;
+
+               case DATA_TYPE_BOOLEAN_ARRAY:
+               case DATA_TYPE_BYTE_ARRAY:
+               case DATA_TYPE_INT8_ARRAY:
+               case DATA_TYPE_UINT8_ARRAY:
+               case DATA_TYPE_INT16_ARRAY:
+               case DATA_TYPE_UINT16_ARRAY:
+               case DATA_TYPE_INT32_ARRAY:
+               case DATA_TYPE_UINT32_ARRAY:
+               case DATA_TYPE_INT64_ARRAY:
+               case DATA_TYPE_UINT64_ARRAY:
+               case DATA_TYPE_STRING_ARRAY:
+                       c = fm_printf(d + 1, c, cols, "[...]");
+                       break;
+               case DATA_TYPE_UNKNOWN:
+                       c = fm_printf(d + 1, c, cols, "<unknown>");
+                       break;
+               }
+       }
+
+       return (c);
+}
+
+void
+fm_nvprint(nvlist_t *nvl)
+{
+       char *class;
+       int c = 0;
+
+       console_printf("\r");
+
+       if (nvlist_lookup_string(nvl, FM_CLASS, &class) == 0)
+               c = fm_printf(0, c, ereport_cols, "%s", class);
+
+       if (fm_nvprintr(nvl, 0, c, ereport_cols) != 0)
+               console_printf("\n");
+
+       console_printf("\n");
+}
+
+/*
+ * Wrapper for panic() that first produces an FMA-style message for admins.
+ * Normally such messages are generated by fmd(1M)'s syslog-msgs agent: this
+ * is the one exception to that rule and the only error that gets messaged.
+ * This function is intended for use by subsystems that have detected a fatal
+ * error and enqueued appropriate ereports and wish to then force a panic.
+ */
+/*PRINTFLIKE1*/
+void
+fm_panic(const char *format, ...)
+{
+       va_list ap;
+
+       (void) casptr((void *)&fm_panicstr, NULL, (void *)format);
+       va_start(ap, format);
+       vpanic(format, ap);
+       va_end(ap);
+}
+
+/*
+ * Print any appropriate FMA banner message before the panic message.  This
+ * function is called by panicsys() and prints the message for fm_panic().
+ * We print the message here so that it comes after the system is quiesced.
+ * A one-line summary is recorded in the log only (cmn_err(9F) with "!" prefix).
+ * The rest of the message is for the console only and not needed in the log,
+ * so it is printed using console_printf().  We break it up into multiple
+ * chunks so as to avoid overflowing any small legacy prom_printf() buffers.
+ */
+void
+fm_banner(void)
+{
+       timespec_t tod;
+       hrtime_t now;
+
+       if (!fm_panicstr)
+               return; /* panic was not initiated by fm_panic(); do nothing */
+
+       if (panicstr) {
+               tod = panic_hrestime;
+               now = panic_hrtime;
+       } else {
+               gethrestime(&tod);
+               now = gethrtime_waitfree();
+       }
+
+       cmn_err(CE_NOTE, "!SUNW-MSG-ID: %s, "
+           "TYPE: Error, VER: 1, SEVERITY: Major\n", fm_msgid);
+
+       console_printf(
+"\n\rSUNW-MSG-ID: %s, TYPE: Error, VER: 1, SEVERITY: Major\n"
+"EVENT-TIME: 0x%lx.0x%lx (0x%llx)\n",
+           fm_msgid, tod.tv_sec, tod.tv_nsec, (u_longlong_t)now);
+
+       console_printf(
+"PLATFORM: %s, CSN: -, HOSTNAME: %s\n"
+"SOURCE: %s, REV: %s %s\n",
+           platform, utsname.nodename, utsname.sysname,
+           utsname.release, utsname.version);
+
+       console_printf(
+"DESC: Errors have been detected that require a reboot to ensure system\n"
+"integrity.  See %s/%s for more information.\n",
+           fm_url, fm_msgid);
+
+       console_printf(
+"AUTO-RESPONSE: Solaris will attempt to save and diagnose the error telemetry\n"
+"IMPACT: The system will sync files, save a crash dump if needed, and reboot\n"
+"REC-ACTION: Save the error summary below in case telemetry cannot be saved\n");
+
+       console_printf("\n");
+}
+
+/*
+ * Utility function to write all of the pending ereports to the dump device.
+ * This function is called at either normal reboot or panic time, and simply
+ * iterates over the in-transit messages in the ereport sysevent channel.
+ */
+void
+fm_ereport_dump(void)
+{
+       evchanq_t *chq;
+       sysevent_t *sep;
+       erpt_dump_t ed;
+
+       timespec_t tod;
+       hrtime_t now;
+       char *buf;
+       size_t len;
+
+       if (panicstr) {
+               tod = panic_hrestime;
+               now = panic_hrtime;
+       } else {
+               if (ereport_errorq != NULL)
+                       errorq_drain(ereport_errorq);
+               gethrestime(&tod);
+               now = gethrtime_waitfree();
+       }
+
+       /*
+        * In the panic case, sysevent_evc_walk_init() will return NULL.
+        */
+       if ((chq = sysevent_evc_walk_init(ereport_chan, NULL)) == NULL &&
+           !panicstr)
+               return; /* event channel isn't initialized yet */
+
+       while ((sep = sysevent_evc_walk_step(chq)) != NULL) {
+               if ((buf = sysevent_evc_event_attr(sep, &len)) == NULL)
+                       break;
+
+               ed.ed_magic = ERPT_MAGIC;
+               ed.ed_chksum = checksum32(buf, len);
+               ed.ed_size = (uint32_t)len;
+               ed.ed_pad = 0;
+               ed.ed_hrt_nsec = SE_TIME(sep);
+               ed.ed_hrt_base = now;
+               ed.ed_tod_base.sec = tod.tv_sec;
+               ed.ed_tod_base.nsec = tod.tv_nsec;
+
+               dumpvp_write(&ed, sizeof (ed));
+               dumpvp_write(buf, len);
+       }
+
+       sysevent_evc_walk_fini(chq);
+}
+
+/*
+ * Post an error report (ereport) to the sysevent error channel.  The error
+ * channel must be established with a prior call to sysevent_evc_create()
+ * before publication may occur.
+ */
+void
+fm_ereport_post(nvlist_t *ereport, int evc_flag)
+{
+       size_t nvl_size = 0;
+       evchan_t *error_chan;
+
+       (void) nvlist_size(ereport, &nvl_size, NV_ENCODE_NATIVE);
+       if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) {
+               atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
+               return;
+       }
+
+       if (sysevent_evc_bind(FM_ERROR_CHAN, &error_chan,
+           EVCH_CREAT|EVCH_HOLD_PEND) != 0) {
+               atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
+               return;
+       }
+
+       if (sysevent_evc_publish(error_chan, EC_FM, ESC_FM_ERROR,
+           SUNW_VENDOR, FM_PUB, ereport, evc_flag) != 0) {
+               atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
+               sysevent_evc_unbind(error_chan);
+               return;
+       }
+       sysevent_evc_unbind(error_chan);
+}
+
+/*
+ * Wrapppers for FM nvlist allocators
+ */
+/* ARGSUSED */
+static void *
+i_fm_alloc(nv_alloc_t *nva, size_t size)
+{
+       return (kmem_zalloc(size, KM_SLEEP));
+}
+
+/* ARGSUSED */
+static void
+i_fm_free(nv_alloc_t *nva, void *buf, size_t size)
+{
+       kmem_free(buf, size);
+}
+
+const nv_alloc_ops_t fm_mem_alloc_ops = {
+       NULL,
+       NULL,
+       i_fm_alloc,
+       i_fm_free,
+       NULL
+};
+
+/*
+ * Create and initialize a new nv_alloc_t for a fixed buffer, buf.  A pointer
+ * to the newly allocated nv_alloc_t structure is returned upon success or NULL
+ * is returned to indicate that the nv_alloc structure could not be created.
+ */
+nv_alloc_t *
+fm_nva_xcreate(char *buf, size_t bufsz)
+{
+       nv_alloc_t *nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
+
+       if (bufsz == 0 || nv_alloc_init(nvhdl, nv_fixed_ops, buf, bufsz) != 0) {
+               kmem_free(nvhdl, sizeof (nv_alloc_t));
+               return (NULL);
+       }
+
+       return (nvhdl);
+}
+
+/*
+ * Destroy a previously allocated nv_alloc structure.  The fixed buffer
+ * associated with nva must be freed by the caller.
+ */
+void
+fm_nva_xdestroy(nv_alloc_t *nva)
+{
+       nv_alloc_fini(nva);
+       kmem_free(nva, sizeof (nv_alloc_t));
+}
+
+/*
+ * Create a new nv list.  A pointer to a new nv list structure is returned
+ * upon success or NULL is returned to indicate that the structure could
+ * not be created.  The newly created nv list is created and managed by the
+ * operations installed in nva.   If nva is NULL, the default FMA nva
+ * operations are installed and used.
+ *
+ * When called from the kernel and nva == NULL, this function must be called
+ * from passive kernel context with no locks held that can prevent a
+ * sleeping memory allocation from occurring.  Otherwise, this function may
+ * be called from other kernel contexts as long a valid nva created via
+ * fm_nva_create() is supplied.
+ */
+nvlist_t *
+fm_nvlist_create(nv_alloc_t *nva)
+{
+       int hdl_alloced = 0;
+       nvlist_t *nvl;
+       nv_alloc_t *nvhdl;
+
+       if (nva == NULL) {
+               nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
+
+               if (nv_alloc_init(nvhdl, &fm_mem_alloc_ops, NULL, 0) != 0) {
+                       kmem_free(nvhdl, sizeof (nv_alloc_t));
+                       return (NULL);
+               }
+               hdl_alloced = 1;
+       } else {
+               nvhdl = nva;
+       }
+
+       if (nvlist_xalloc(&nvl, NV_UNIQUE_NAME, nvhdl) != 0) {
+               if (hdl_alloced) {
+                       kmem_free(nvhdl, sizeof (nv_alloc_t));
+                       nv_alloc_fini(nvhdl);
+               }
+               return (NULL);
+       }
+
+       return (nvl);
+}
+
+/*
+ * Destroy a previously allocated nvlist structure.  flag indicates whether
+ * or not the associated nva structure should be freed (FM_NVA_FREE) or
+ * retained (FM_NVA_RETAIN).  Retaining the nv alloc structure allows
+ * it to be re-used for future nvlist creation operations.
+ */
+void
+fm_nvlist_destroy(nvlist_t *nvl, int flag)
+{
+       nv_alloc_t *nva = nvlist_lookup_nv_alloc(nvl);
+
+       nvlist_free(nvl);
+
+       if (nva != NULL) {
+               if (flag == FM_NVA_FREE)
+                       fm_nva_xdestroy(nva);
+       }
+}
+
+int
+i_fm_payload_set(nvlist_t *payload, const char *name, va_list ap)
+{
+       int nelem, ret = 0;
+       data_type_t type;
+
+       while (ret == 0 && name != NULL) {
+               type = va_arg(ap, data_type_t);
+               switch (type) {
+               case DATA_TYPE_BYTE:
+                       ret = nvlist_add_byte(payload, name,
+                           va_arg(ap, uint_t));
+                       break;
+               case DATA_TYPE_BYTE_ARRAY:
+                       nelem = va_arg(ap, int);
+                       ret = nvlist_add_byte_array(payload, name,
+                           va_arg(ap, uchar_t *), nelem);
+                       break;
+               case DATA_TYPE_BOOLEAN_VALUE:
+                       ret = nvlist_add_boolean_value(payload, name,
+                           va_arg(ap, boolean_t));
+                       break;
+               case DATA_TYPE_BOOLEAN_ARRAY:
+                       nelem = va_arg(ap, int);
+                       ret = nvlist_add_boolean_array(payload, name,
+                           va_arg(ap, boolean_t *), nelem);
+                       break;
+               case DATA_TYPE_INT8:
+                       ret = nvlist_add_int8(payload, name,
+                           va_arg(ap, int));
+                       break;
+               case DATA_TYPE_INT8_ARRAY:
+                       nelem = va_arg(ap, int);
+                       ret = nvlist_add_int8_array(payload, name,
+                           va_arg(ap, int8_t *), nelem);
+                       break;
+               case DATA_TYPE_UINT8:
+                       ret = nvlist_add_uint8(payload, name,
+                           va_arg(ap, uint_t));
+                       break;
+               case DATA_TYPE_UINT8_ARRAY:
+                       nelem = va_arg(ap, int);
+                       ret = nvlist_add_uint8_array(payload, name,
+                           va_arg(ap, uint8_t *), nelem);
+                       break;
+               case DATA_TYPE_INT16:
+                       ret = nvlist_add_int16(payload, name,
+                           va_arg(ap, int));
+                       break;
+               case DATA_TYPE_INT16_ARRAY:
+                       nelem = va_arg(ap, int);
+                       ret = nvlist_add_int16_array(payload, name,
+                           va_arg(ap, int16_t *), nelem);
+                       break;
+               case DATA_TYPE_UINT16:
+                       ret = nvlist_add_uint16(payload, name,
+                           va_arg(ap, uint_t));
+                       break;
+               case DATA_TYPE_UINT16_ARRAY:
+                       nelem = va_arg(ap, int);
+                       ret = nvlist_add_uint16_array(payload, name,
+                           va_arg(ap, uint16_t *), nelem);
+                       break;
+               case DATA_TYPE_INT32:
+                       ret = nvlist_add_int32(payload, name,
+                           va_arg(ap, int32_t));
+                       break;
+               case DATA_TYPE_INT32_ARRAY:
+                       nelem = va_arg(ap, int);
+                       ret = nvlist_add_int32_array(payload, name,
+                           va_arg(ap, int32_t *), nelem);
+                       break;
+               case DATA_TYPE_UINT32:
+                       ret = nvlist_add_uint32(payload, name,
+                           va_arg(ap, uint32_t));
+                       break;
+               case DATA_TYPE_UINT32_ARRAY:
+                       nelem = va_arg(ap, int);
+                       ret = nvlist_add_uint32_array(payload, name,
+                           va_arg(ap, uint32_t *), nelem);
+                       break;
+               case DATA_TYPE_INT64:
+                       ret = nvlist_add_int64(payload, name,
+                           va_arg(ap, int64_t));
+                       break;
+               case DATA_TYPE_INT64_ARRAY:
+                       nelem = va_arg(ap, int);
+                       ret = nvlist_add_int64_array(payload, name,
+                           va_arg(ap, int64_t *), nelem);
+                       break;
+               case DATA_TYPE_UINT64:
+                       ret = nvlist_add_uint64(payload, name,
+                           va_arg(ap, uint64_t));
+                       break;
+               case DATA_TYPE_UINT64_ARRAY:
+                       nelem = va_arg(ap, int);
+                       ret = nvlist_add_uint64_array(payload, name,
+                           va_arg(ap, uint64_t *), nelem);
+                       break;
+               case DATA_TYPE_STRING:
+                       ret = nvlist_add_string(payload, name,
+                           va_arg(ap, char *));
+                       break;
+               case DATA_TYPE_STRING_ARRAY:
+                       nelem = va_arg(ap, int);
+                       ret = nvlist_add_string_array(payload, name,
+                           va_arg(ap, char **), nelem);
+                       break;
+               case DATA_TYPE_NVLIST:
+                       ret = nvlist_add_nvlist(payload, name,
+                           va_arg(ap, nvlist_t *));
+                       break;
+               case DATA_TYPE_NVLIST_ARRAY:
+                       nelem = va_arg(ap, int);
+                       ret = nvlist_add_nvlist_array(payload, name,
+                           va_arg(ap, nvlist_t **), nelem);
+                       break;
+               default:
+                       ret = EINVAL;
+               }
+
+               name = va_arg(ap, char *);
+       }
+       return (ret);
+}
+
+void
+fm_payload_set(nvlist_t *payload, ...)
+{
+       int ret;
+       const char *name;
+       va_list ap;
+
+       va_start(ap, payload);
+       name = va_arg(ap, char *);
+       ret = i_fm_payload_set(payload, name, ap);
+       va_end(ap);
+
+       if (ret)
+               atomic_add_64(
+                   &erpt_kstat_data.payload_set_failed.value.ui64, 1);
+}
+
+/*
+ * Set-up and validate the members of an ereport event according to:
+ *
+ *     Member name             Type            Value
+ *     ====================================================
+ *     class                   string          ereport
+ *     version                 uint8_t         0
+ *     ena                     uint64_t        <ena>
+ *     detector                nvlist_t        <detector>
+ *     ereport-payload         nvlist_t        <var args>
+ *
+ */
+void
+fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class,
+    uint64_t ena, const nvlist_t *detector, ...)
+{
+       char ereport_class[FM_MAX_CLASS];
+       const char *name;
+       va_list ap;
+       int ret;
+
+       if (version != FM_EREPORT_VERS0) {
+               atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
+               return;
+       }
+
+       (void) snprintf(ereport_class, FM_MAX_CLASS, "%s.%s",
+           FM_EREPORT_CLASS, erpt_class);
+       if (nvlist_add_string(ereport, FM_CLASS, ereport_class) != 0) {
+               atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
+               return;
+       }
+
+       if (nvlist_add_uint64(ereport, FM_EREPORT_ENA, ena)) {
+               atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
+       }
+
+       if (nvlist_add_nvlist(ereport, FM_EREPORT_DETECTOR,
+           (nvlist_t *)detector) != 0) {
+               atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
+       }
+
+       va_start(ap, detector);
+       name = va_arg(ap, const char *);
+       ret = i_fm_payload_set(ereport, name, ap);
+       va_end(ap);
+
+       if (ret)
+               atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
+}
+
+/*
+ * Set-up and validate the members of an hc fmri according to;
+ *
+ *     Member name             Type            Value
+ *     ===================================================
+ *     version                 uint8_t         0
+ *     auth                    nvlist_t        <auth>
+ *     hc-name                 string          <name>
+ *     hc-id                   string          <id>
+ *
+ * Note that auth and hc-id are optional members.
+ */
+
+#define        HC_MAXPAIRS     20
+#define        HC_MAXNAMELEN   50
+
+static int
+fm_fmri_hc_set_common(nvlist_t *fmri, int version, const nvlist_t *auth)
+{
+       if (version != FM_HC_SCHEME_VERSION) {
+               atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+               return (0);
+       }
+
+       if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0 ||
+           nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0) {
+               atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+               return (0);
+       }
+
+       if (auth != NULL && nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
+           (nvlist_t *)auth) != 0) {
+               atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+               return (0);
+       }
+
+       return (1);
+}
+
+void
+fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth,
+    nvlist_t *snvl, int npairs, ...)
+{
+       nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
+       nvlist_t *pairs[HC_MAXPAIRS];
+       va_list ap;
+       int i;
+
+       if (!fm_fmri_hc_set_common(fmri, version, auth))
+               return;
+
+       npairs = MIN(npairs, HC_MAXPAIRS);
+
+       va_start(ap, npairs);
+       for (i = 0; i < npairs; i++) {
+               const char *name = va_arg(ap, const char *);
+               uint32_t id = va_arg(ap, uint32_t);
+               char idstr[11];
+
+               (void) snprintf(idstr, sizeof (idstr), "%u", id);
+
+               pairs[i] = fm_nvlist_create(nva);
+               if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
+                   nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
+                       atomic_add_64(
+                           &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+               }
+       }
+       va_end(ap);
+
+       if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs, npairs) != 0)
+               atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+
+       for (i = 0; i < npairs; i++)
+               fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
+
+       if (snvl != NULL) {
+               if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
+                       atomic_add_64(
+                           &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+               }
+       }
+}
+
+/*
+ * Set-up and validate the members of an dev fmri according to:
+ *
+ *     Member name             Type            Value
+ *     ====================================================
+ *     version                 uint8_t         0
+ *     auth                    nvlist_t        <auth>
+ *     devpath                 string          <devpath>
+ *     devid                   string          <devid>
+ *
+ * Note that auth and devid are optional members.
+ */
+void
+fm_fmri_dev_set(nvlist_t *fmri_dev, int version, const nvlist_t *auth,
+    const char *devpath, const char *devid)
+{
+       if (version != DEV_SCHEME_VERSION0) {
+               atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+               return;
+       }
+
+       if (nvlist_add_uint8(fmri_dev, FM_VERSION, version) != 0) {
+               atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+               return;
+       }
+
+       if (nvlist_add_string(fmri_dev, FM_FMRI_SCHEME,
+           FM_FMRI_SCHEME_DEV) != 0) {
+               atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+               return;
+       }
+
+       if (auth != NULL) {
+               if (nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY,
+                   (nvlist_t *)auth) != 0) {
+                       atomic_add_64(
+                           &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+               }
+       }
+
+       if (nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath) != 0) {
+               atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+       }
+
+       if (devid != NULL)
+               if (nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid) != 0)
+                       atomic_add_64(
+                           &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+}
+
+/*
+ * Set-up and validate the members of an cpu fmri according to:
+ *
+ *     Member name             Type            Value
+ *     ====================================================
+ *     version                 uint8_t         0
+ *     auth                    nvlist_t        <auth>
+ *     cpuid                   uint32_t        <cpu_id>
+ *     cpumask                 uint8_t         <cpu_mask>
+ *     serial                  uint64_t        <serial_id>
+ *
+ * Note that auth, cpumask, serial are optional members.
+ *
+ */
+void
+fm_fmri_cpu_set(nvlist_t *fmri_cpu, int version, const nvlist_t *auth,
+    uint32_t cpu_id, uint8_t *cpu_maskp, const char *serial_idp)
+{
+       uint64_t *failedp = &erpt_kstat_data.fmri_set_failed.value.ui64;
+
+       if (version < CPU_SCHEME_VERSION1) {
+               atomic_add_64(failedp, 1);
+               return;
+       }
+
+       if (nvlist_add_uint8(fmri_cpu, FM_VERSION, version) != 0) {
+               atomic_add_64(failedp, 1);
+               return;
+       }
+
+       if (nvlist_add_string(fmri_cpu, FM_FMRI_SCHEME,
+           FM_FMRI_SCHEME_CPU) != 0) {
+               atomic_add_64(failedp, 1);
+               return;
+       }
+
+       if (auth != NULL && nvlist_add_nvlist(fmri_cpu, FM_FMRI_AUTHORITY,
+           (nvlist_t *)auth) != 0)
+               atomic_add_64(failedp, 1);
+
+       if (nvlist_add_uint32(fmri_cpu, FM_FMRI_CPU_ID, cpu_id) != 0)
+               atomic_add_64(failedp, 1);
+
+       if (cpu_maskp != NULL && nvlist_add_uint8(fmri_cpu, FM_FMRI_CPU_MASK,
+           *cpu_maskp) != 0)
+               atomic_add_64(failedp, 1);
+
+       if (serial_idp == NULL || nvlist_add_string(fmri_cpu,
+           FM_FMRI_CPU_SERIAL_ID, (char *)serial_idp) != 0)
+                       atomic_add_64(failedp, 1);
+}
+
+/*
+ * Set-up and validate the members of a mem according to:
+ *
+ *     Member name             Type            Value
+ *     ====================================================
+ *     version                 uint8_t         0
+ *     auth                    nvlist_t        <auth>          [optional]
+ *     unum                    string          <unum>
+ *     serial                  string          <serial>        [optional*]
+ *     offset                  uint64_t        <offset>        [optional]
+ *
+ *     * serial is required if offset is present
+ */
+void
+fm_fmri_mem_set(nvlist_t *fmri, int version, const nvlist_t *auth,
+    const char *unum, const char *serial, uint64_t offset)
+{
+       if (version != MEM_SCHEME_VERSION0) {
+               atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+               return;
+       }
+
+       if (!serial && (offset != (uint64_t)-1)) {
+               atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+               return;
+       }
+
+       if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
+               atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+               return;
+       }
+
+       if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_MEM) != 0) {
+               atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+               return;
+       }
+
+       if (auth != NULL) {
+               if (nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
+                   (nvlist_t *)auth) != 0) {
+                       atomic_add_64(
+                           &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+               }
+       }
+
+       if (nvlist_add_string(fmri, FM_FMRI_MEM_UNUM, unum) != 0) {
+               atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+       }
+
+       if (serial != NULL) {
+               if (nvlist_add_string_array(fmri, FM_FMRI_MEM_SERIAL_ID,
+                   (char **)&serial, 1) != 0) {
+                       atomic_add_64(
+                           &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+               }
+               if (offset != (uint64_t)-1) {
+                       if (nvlist_add_uint64(fmri, FM_FMRI_MEM_OFFSET,
+                           offset) != 0) {
+                               atomic_add_64(&erpt_kstat_data.
+                                   fmri_set_failed.value.ui64, 1);
+                       }
+               }
+       }
+}
+
+void
+fm_fmri_zfs_set(nvlist_t *fmri, int version, uint64_t pool_guid,
+    uint64_t vdev_guid)
+{
+       if (version != ZFS_SCHEME_VERSION0) {
+               atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+               return;
+       }
+
+       if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
+               atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+               return;
+       }
+
+       if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS) != 0) {
+               atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+               return;
+       }
+
+       if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_POOL, pool_guid) != 0) {
+               atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+       }
+
+       if (vdev_guid != 0) {
+               if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_VDEV, vdev_guid) != 0) {
+                       atomic_add_64(
+                           &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+               }
+       }
+}
+
+uint64_t
+fm_ena_increment(uint64_t ena)
+{
+       uint64_t new_ena;
+
+       switch (ENA_FORMAT(ena)) {
+       case FM_ENA_FMT1:
+               new_ena = ena + (1 << ENA_FMT1_GEN_SHFT);
+               break;
+       case FM_ENA_FMT2:
+               new_ena = ena + (1 << ENA_FMT2_GEN_SHFT);
+               break;
+       default:
+               new_ena = 0;
+       }
+
+       return (new_ena);
+}
+
+uint64_t
+fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format)
+{
+       uint64_t ena = 0;
+
+       switch (format) {
+       case FM_ENA_FMT1:
+               if (timestamp) {
+                       ena = (uint64_t)((format & ENA_FORMAT_MASK) |
+                           ((cpuid << ENA_FMT1_CPUID_SHFT) &
+                           ENA_FMT1_CPUID_MASK) |
+                           ((timestamp << ENA_FMT1_TIME_SHFT) &
+                           ENA_FMT1_TIME_MASK));
+               } else {
+                       ena = (uint64_t)((format & ENA_FORMAT_MASK) |
+                           ((cpuid << ENA_FMT1_CPUID_SHFT) &
+                           ENA_FMT1_CPUID_MASK) |
+                           ((gethrtime_waitfree() << ENA_FMT1_TIME_SHFT) &
+                           ENA_FMT1_TIME_MASK));
+               }
+               break;
+       case FM_ENA_FMT2:
+               ena = (uint64_t)((format & ENA_FORMAT_MASK) |
+                   ((timestamp << ENA_FMT2_TIME_SHFT) & ENA_FMT2_TIME_MASK));
+               break;
+       default:
+               break;
+       }
+
+       return (ena);
+}
+
+uint64_t
+fm_ena_generate(uint64_t timestamp, uchar_t format)
+{
+       return (fm_ena_generate_cpu(timestamp, CPU->cpu_id, format));
+}
+
+uint64_t
+fm_ena_generation_get(uint64_t ena)
+{
+       uint64_t gen;
+
+       switch (ENA_FORMAT(ena)) {
+       case FM_ENA_FMT1:
+               gen = (ena & ENA_FMT1_GEN_MASK) >> ENA_FMT1_GEN_SHFT;
+               break;
+       case FM_ENA_FMT2:
+               gen = (ena & ENA_FMT2_GEN_MASK) >> ENA_FMT2_GEN_SHFT;
+               break;
+       default:
+               gen = 0;
+               break;
+       }
+
+       return (gen);
+}
+
+uchar_t
+fm_ena_format_get(uint64_t ena)
+{
+
+       return (ENA_FORMAT(ena));
+}
+
+uint64_t
+fm_ena_id_get(uint64_t ena)
+{
+       uint64_t id;
+
+       switch (ENA_FORMAT(ena)) {
+       case FM_ENA_FMT1:
+               id = (ena & ENA_FMT1_ID_MASK) >> ENA_FMT1_ID_SHFT;
+               break;
+       case FM_ENA_FMT2:
+               id = (ena & ENA_FMT2_ID_MASK) >> ENA_FMT2_ID_SHFT;
+               break;
+       default:
+               id = 0;
+       }
+
+       return (id);
+}
+
+uint64_t
+fm_ena_time_get(uint64_t ena)
+{
+       uint64_t time;
+
+       switch (ENA_FORMAT(ena)) {
+       case FM_ENA_FMT1:
+               time = (ena & ENA_FMT1_TIME_MASK) >> ENA_FMT1_TIME_SHFT;
+               break;
+       case FM_ENA_FMT2:
+               time = (ena & ENA_FMT2_TIME_MASK) >> ENA_FMT2_TIME_SHFT;
+               break;
+       default:
+               time = 0;
+       }
+
+       return (time);
+}
+
+/*
+ * Convert a getpcstack() trace to symbolic name+offset, and add the resulting
+ * string array to a Fault Management ereport as FM_EREPORT_PAYLOAD_NAME_STACK.
+ */
+void
+fm_payload_stack_add(nvlist_t *payload, const pc_t *stack, int depth)
+{
+       int i;
+       char *sym;
+       ulong_t off;
+       char *stkpp[FM_STK_DEPTH];
+       char buf[FM_STK_DEPTH * FM_SYM_SZ];
+       char *stkp = buf;
+
+       for (i = 0; i < depth && i != FM_STK_DEPTH; i++, stkp += FM_SYM_SZ) {
+               if ((sym = kobj_getsymname(stack[i], &off)) != NULL)
+                       (void) snprintf(stkp, FM_SYM_SZ, "%s+%lx", sym, off);
+               else
+                       (void) snprintf(stkp, FM_SYM_SZ, "%lx", (long)stack[i]);
+               stkpp[i] = stkp;
+       }
+
+       fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_STACK,
+           DATA_TYPE_STRING_ARRAY, depth, stkpp, NULL);
+}
+
+void
+print_msg_hwerr(ctid_t ct_id, proc_t *p)
+{
+       uprintf("Killed process %d (%s) in contract id %d "
+           "due to hardware error\n", p->p_pid, p->p_user.u_comm, ct_id);
+}
diff --git a/module/zfs/include/sys/fm/protocol.h b/module/zfs/include/sys/fm/protocol.h
new file mode 100644 (file)
index 0000000..767fb07
--- /dev/null
@@ -0,0 +1,336 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef        _SYS_FM_PROTOCOL_H
+#define        _SYS_FM_PROTOCOL_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+#include <sys/varargs.h>
+#include <sys/nvpair.h>
+#else
+#include <libnvpair.h>
+#include <stdarg.h>
+#endif
+#include <sys/processor.h>
+
+/* FM common member names */
+#define        FM_CLASS                        "class"
+#define        FM_VERSION                      "version"
+
+/* FM event class values */
+#define        FM_EREPORT_CLASS                "ereport"
+#define        FM_FAULT_CLASS                  "fault"
+#define        FM_RSRC_CLASS                   "resource"
+#define        FM_LIST_EVENT                   "list"
+
+/* FM list.* event class values */
+#define        FM_LIST_SUSPECT_CLASS           FM_LIST_EVENT ".suspect"
+#define        FM_LIST_ISOLATED_CLASS          FM_LIST_EVENT ".isolated"
+#define        FM_LIST_REPAIRED_CLASS          FM_LIST_EVENT ".repaired"
+#define        FM_LIST_UPDATED_CLASS           FM_LIST_EVENT ".updated"
+#define        FM_LIST_RESOLVED_CLASS          FM_LIST_EVENT ".resolved"
+
+/* ereport class subcategory values */
+#define        FM_ERROR_CPU                    "cpu"
+#define        FM_ERROR_IO                     "io"
+
+/* ereport version and payload member names */
+#define        FM_EREPORT_VERS0                0
+#define        FM_EREPORT_VERSION              FM_EREPORT_VERS0
+
+/* ereport payload member names */
+#define        FM_EREPORT_DETECTOR             "detector"
+#define        FM_EREPORT_ENA                  "ena"
+
+/* list.* event payload member names */
+#define        FM_LIST_EVENT_SIZE              "list-sz"
+
+/*
+ * list.suspect, isolated, updated, repaired and resolved
+ * versions/payload member names.
+ */
+#define        FM_SUSPECT_UUID                 "uuid"
+#define        FM_SUSPECT_DIAG_CODE            "code"
+#define        FM_SUSPECT_DIAG_TIME            "diag-time"
+#define        FM_SUSPECT_DE                   "de"
+#define        FM_SUSPECT_FAULT_LIST           "fault-list"
+#define        FM_SUSPECT_FAULT_SZ             "fault-list-sz"
+#define        FM_SUSPECT_FAULT_STATUS         "fault-status"
+#define        FM_SUSPECT_MESSAGE              "message"
+#define        FM_SUSPECT_RETIRE               "retire"
+#define        FM_SUSPECT_RESPONSE             "response"
+#define        FM_SUSPECT_SEVERITY             "severity"
+
+#define        FM_SUSPECT_VERS0                0
+#define        FM_SUSPECT_VERSION              FM_SUSPECT_VERS0
+
+#define        FM_SUSPECT_FAULTY               0x1
+#define        FM_SUSPECT_UNUSABLE             0x2
+#define        FM_SUSPECT_NOT_PRESENT          0x4
+#define        FM_SUSPECT_DEGRADED             0x8
+#define        FM_SUSPECT_REPAIRED             0x10
+#define        FM_SUSPECT_REPLACED             0x20
+#define        FM_SUSPECT_ACQUITTED            0x40
+
+/* fault event versions and payload member names */
+#define        FM_FAULT_VERS0                  0
+#define        FM_FAULT_VERSION                FM_FAULT_VERS0
+
+#define        FM_FAULT_ASRU                   "asru"
+#define        FM_FAULT_FRU                    "fru"
+#define        FM_FAULT_FRU_LABEL              "fru-label"
+#define        FM_FAULT_CERTAINTY              "certainty"
+#define        FM_FAULT_RESOURCE               "resource"
+#define        FM_FAULT_LOCATION               "location"
+
+/* resource event versions and payload member names */
+#define        FM_RSRC_VERS0                   0
+#define        FM_RSRC_VERSION                 FM_RSRC_VERS0
+#define        FM_RSRC_RESOURCE                "resource"
+
+/* resource.fm.asru.* payload member names */
+#define        FM_RSRC_ASRU_UUID               "uuid"
+#define        FM_RSRC_ASRU_CODE               "code"
+#define        FM_RSRC_ASRU_FAULTY             "faulty"
+#define        FM_RSRC_ASRU_REPAIRED           "repaired"
+#define        FM_RSRC_ASRU_REPLACED           "replaced"
+#define        FM_RSRC_ASRU_ACQUITTED          "acquitted"
+#define        FM_RSRC_ASRU_UNUSABLE           "unusable"
+#define        FM_RSRC_ASRU_EVENT              "event"
+
+/* resource.fm.xprt.* versions and payload member names */
+#define        FM_RSRC_XPRT_VERS0              0
+#define        FM_RSRC_XPRT_VERSION            FM_RSRC_XPRT_VERS0
+#define        FM_RSRC_XPRT_UUID               "uuid"
+#define        FM_RSRC_XPRT_SUBCLASS           "subclass"
+#define        FM_RSRC_XPRT_FAULT_STATUS       "fault-status"
+#define        FM_RSRC_XPRT_FAULT_HAS_ASRU     "fault-has-asru"
+
+/*
+ * FM ENA Format Macros
+ */
+#define        ENA_FORMAT_MASK                 0x3
+#define        ENA_FORMAT(ena)                 ((ena) & ENA_FORMAT_MASK)
+
+/* ENA format types */
+#define        FM_ENA_FMT0                     0
+#define        FM_ENA_FMT1                     1
+#define        FM_ENA_FMT2                     2
+
+/* Format 1 */
+#define        ENA_FMT1_GEN_MASK               0x00000000000003FCull
+#define        ENA_FMT1_ID_MASK                0xFFFFFFFFFFFFFC00ull
+#define        ENA_FMT1_CPUID_MASK             0x00000000000FFC00ull
+#define        ENA_FMT1_TIME_MASK              0xFFFFFFFFFFF00000ull
+#define        ENA_FMT1_GEN_SHFT               2
+#define        ENA_FMT1_ID_SHFT                10
+#define        ENA_FMT1_CPUID_SHFT             ENA_FMT1_ID_SHFT
+#define        ENA_FMT1_TIME_SHFT              20
+
+/* Format 2 */
+#define        ENA_FMT2_GEN_MASK               0x00000000000003FCull
+#define        ENA_FMT2_ID_MASK                0xFFFFFFFFFFFFFC00ull
+#define        ENA_FMT2_TIME_MASK              ENA_FMT2_ID_MASK
+#define        ENA_FMT2_GEN_SHFT               2
+#define        ENA_FMT2_ID_SHFT                10
+#define        ENA_FMT2_TIME_SHFT              ENA_FMT2_ID_SHFT
+
+/* Common FMRI type names */
+#define        FM_FMRI_AUTHORITY               "authority"
+#define        FM_FMRI_SCHEME                  "scheme"
+#define        FM_FMRI_SVC_AUTHORITY           "svc-authority"
+#define        FM_FMRI_FACILITY                "facility"
+
+/* FMRI authority-type member names */
+#define        FM_FMRI_AUTH_CHASSIS            "chassis-id"
+#define        FM_FMRI_AUTH_PRODUCT            "product-id"
+#define        FM_FMRI_AUTH_DOMAIN             "domain-id"
+#define        FM_FMRI_AUTH_SERVER             "server-id"
+#define        FM_FMRI_AUTH_HOST               "host-id"
+
+#define        FM_AUTH_VERS0                   0
+#define        FM_FMRI_AUTH_VERSION            FM_AUTH_VERS0
+
+/* scheme name values */
+#define        FM_FMRI_SCHEME_FMD              "fmd"
+#define        FM_FMRI_SCHEME_DEV              "dev"
+#define        FM_FMRI_SCHEME_HC               "hc"
+#define        FM_FMRI_SCHEME_SVC              "svc"
+#define        FM_FMRI_SCHEME_CPU              "cpu"
+#define        FM_FMRI_SCHEME_MEM              "mem"
+#define        FM_FMRI_SCHEME_MOD              "mod"
+#define        FM_FMRI_SCHEME_PKG              "pkg"
+#define        FM_FMRI_SCHEME_LEGACY           "legacy-hc"
+#define        FM_FMRI_SCHEME_ZFS              "zfs"
+
+/* Scheme versions */
+#define        FMD_SCHEME_VERSION0             0
+#define        FM_FMD_SCHEME_VERSION           FMD_SCHEME_VERSION0
+#define        DEV_SCHEME_VERSION0             0
+#define        FM_DEV_SCHEME_VERSION           DEV_SCHEME_VERSION0
+#define        FM_HC_VERS0                     0
+#define        FM_HC_SCHEME_VERSION            FM_HC_VERS0
+#define        CPU_SCHEME_VERSION0             0
+#define        CPU_SCHEME_VERSION1             1
+#define        FM_CPU_SCHEME_VERSION           CPU_SCHEME_VERSION1
+#define        MEM_SCHEME_VERSION0             0
+#define        FM_MEM_SCHEME_VERSION           MEM_SCHEME_VERSION0
+#define        MOD_SCHEME_VERSION0             0
+#define        FM_MOD_SCHEME_VERSION           MOD_SCHEME_VERSION0
+#define        PKG_SCHEME_VERSION0             0
+#define        FM_PKG_SCHEME_VERSION           PKG_SCHEME_VERSION0
+#define        LEGACY_SCHEME_VERSION0          0
+#define        FM_LEGACY_SCHEME_VERSION        LEGACY_SCHEME_VERSION0
+#define        SVC_SCHEME_VERSION0             0
+#define        FM_SVC_SCHEME_VERSION           SVC_SCHEME_VERSION0
+#define        ZFS_SCHEME_VERSION0             0
+#define        FM_ZFS_SCHEME_VERSION           ZFS_SCHEME_VERSION0
+
+/* hc scheme member names */
+#define        FM_FMRI_HC_SERIAL_ID            "serial"
+#define        FM_FMRI_HC_PART                 "part"
+#define        FM_FMRI_HC_REVISION             "revision"
+#define        FM_FMRI_HC_ROOT                 "hc-root"
+#define        FM_FMRI_HC_LIST_SZ              "hc-list-sz"
+#define        FM_FMRI_HC_LIST                 "hc-list"
+#define        FM_FMRI_HC_SPECIFIC             "hc-specific"
+
+/* facility member names */
+#define        FM_FMRI_FACILITY_NAME           "facility-name"
+#define        FM_FMRI_FACILITY_TYPE           "facility-type"
+
+/* hc-list version and member names */
+#define        FM_FMRI_HC_NAME                 "hc-name"
+#define        FM_FMRI_HC_ID                   "hc-id"
+
+#define        HC_LIST_VERSION0                0
+#define        FM_HC_LIST_VERSION              HC_LIST_VERSION0
+
+/* hc-specific member names */
+#define        FM_FMRI_HC_SPECIFIC_OFFSET      "offset"
+#define        FM_FMRI_HC_SPECIFIC_PHYSADDR    "physaddr"
+
+/* fmd module scheme member names */
+#define        FM_FMRI_FMD_NAME                "mod-name"
+#define        FM_FMRI_FMD_VERSION             "mod-version"
+
+/* dev scheme member names */
+#define        FM_FMRI_DEV_ID                  "devid"
+#define        FM_FMRI_DEV_PATH                "device-path"
+
+/* pkg scheme member names */
+#define        FM_FMRI_PKG_BASEDIR             "pkg-basedir"
+#define        FM_FMRI_PKG_INST                "pkg-inst"
+#define        FM_FMRI_PKG_VERSION             "pkg-version"
+
+/* svc scheme member names */
+#define        FM_FMRI_SVC_NAME                "svc-name"
+#define        FM_FMRI_SVC_INSTANCE            "svc-instance"
+#define        FM_FMRI_SVC_CONTRACT_ID         "svc-contract-id"
+
+/* svc-authority member names */
+#define        FM_FMRI_SVC_AUTH_SCOPE          "scope"
+#define        FM_FMRI_SVC_AUTH_SYSTEM_FQN     "system-fqn"
+
+/* cpu scheme member names */
+#define        FM_FMRI_CPU_ID                  "cpuid"
+#define        FM_FMRI_CPU_SERIAL_ID           "serial"
+#define        FM_FMRI_CPU_MASK                "cpumask"
+#define        FM_FMRI_CPU_VID                 "cpuvid"
+#define        FM_FMRI_CPU_CPUFRU              "cpufru"
+#define        FM_FMRI_CPU_CACHE_INDEX         "cacheindex"
+#define        FM_FMRI_CPU_CACHE_WAY           "cacheway"
+#define        FM_FMRI_CPU_CACHE_BIT           "cachebit"
+#define        FM_FMRI_CPU_CACHE_TYPE          "cachetype"
+
+#define        FM_FMRI_CPU_CACHE_TYPE_L2       0
+#define        FM_FMRI_CPU_CACHE_TYPE_L3       1
+
+/* legacy-hc scheme member names */
+#define        FM_FMRI_LEGACY_HC               "component"
+#define        FM_FMRI_LEGACY_HC_PREFIX        FM_FMRI_SCHEME_HC":///" \
+    FM_FMRI_LEGACY_HC"="
+
+/* mem scheme member names */
+#define        FM_FMRI_MEM_UNUM                "unum"
+#define        FM_FMRI_MEM_SERIAL_ID           "serial"
+#define        FM_FMRI_MEM_PHYSADDR            "physaddr"
+#define        FM_FMRI_MEM_MEMCONFIG           "memconfig"
+#define        FM_FMRI_MEM_OFFSET              "offset"
+
+/* mod scheme member names */
+#define        FM_FMRI_MOD_PKG                 "mod-pkg"
+#define        FM_FMRI_MOD_NAME                "mod-name"
+#define        FM_FMRI_MOD_ID                  "mod-id"
+#define        FM_FMRI_MOD_DESC                "mod-desc"
+
+/* zfs scheme member names */
+#define        FM_FMRI_ZFS_POOL                "pool"
+#define        FM_FMRI_ZFS_VDEV                "vdev"
+
+extern nv_alloc_t *fm_nva_xcreate(char *, size_t);
+extern void fm_nva_xdestroy(nv_alloc_t *);
+
+extern nvlist_t *fm_nvlist_create(nv_alloc_t *);
+extern void fm_nvlist_destroy(nvlist_t *, int);
+
+#define        FM_NVA_FREE     0               /* free allocator on nvlist_destroy */
+#define        FM_NVA_RETAIN   1               /* keep allocator on nvlist_destroy */
+
+extern void fm_ereport_set(nvlist_t *, int, const char *, uint64_t,
+    const nvlist_t *, ...);
+extern void fm_payload_set(nvlist_t *, ...);
+extern int i_fm_payload_set(nvlist_t *, const char *, va_list);
+extern void fm_fmri_hc_set(nvlist_t *, int, const nvlist_t *, nvlist_t *,
+    int, ...);
+extern void fm_fmri_dev_set(nvlist_t *, int, const nvlist_t *, const char *,
+    const char *);
+extern void fm_fmri_de_set(nvlist_t *, int, const nvlist_t *, const char *);
+extern void fm_fmri_cpu_set(nvlist_t *, int, const nvlist_t *, uint32_t,
+    uint8_t *, const char *);
+extern void fm_fmri_mem_set(nvlist_t *, int, const nvlist_t *, const char *,
+    const char *, uint64_t);
+extern void fm_authority_set(nvlist_t *, int, const char *, const char *,
+    const char *, const char *);
+extern void fm_fmri_zfs_set(nvlist_t *, int, uint64_t, uint64_t);
+
+extern uint64_t fm_ena_increment(uint64_t);
+extern uint64_t fm_ena_generate(uint64_t, uchar_t);
+extern uint64_t fm_ena_generate_cpu(uint64_t, processorid_t, uchar_t);
+extern uint64_t fm_ena_generation_get(uint64_t);
+extern uchar_t fm_ena_format_get(uint64_t);
+extern uint64_t fm_ena_id_get(uint64_t);
+extern uint64_t fm_ena_time_get(uint64_t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_FM_PROTOCOL_H */
diff --git a/module/zfs/include/sys/fm/util.h b/module/zfs/include/sys/fm/util.h
new file mode 100644 (file)
index 0000000..4934814
--- /dev/null
@@ -0,0 +1,105 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef        _SYS_FM_UTIL_H
+#define        _SYS_FM_UTIL_H
+
+#pragma ident  "%Z%%M% %I%     %E% SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/nvpair.h>
+#include <sys/errorq.h>
+
+/*
+ * Shared user/kernel definitions for class length, error channel name,
+ * and kernel event publisher string.
+ */
+#define        FM_MAX_CLASS 100
+#define        FM_ERROR_CHAN   "com.sun:fm:error"
+#define        FM_PUB          "fm"
+
+/*
+ * ereport dump device transport support
+ *
+ * Ereports are written out to the dump device at a proscribed offset from the
+ * end, similar to in-transit log messages.  The ereports are represented as a
+ * erpt_dump_t header followed by ed_size bytes of packed native nvlist data.
+ *
+ * NOTE: All of these constants and the header must be defined so they have the
+ * same representation for *both* 32-bit and 64-bit producers and consumers.
+ */
+#define        ERPT_MAGIC      0xf00d4eddU
+#define        ERPT_MAX_ERRS   16
+#define        ERPT_DATA_SZ    (6 * 1024)
+#define        ERPT_EVCH_MAX   256
+#define        ERPT_HIWAT      64
+
+typedef struct erpt_dump {
+       uint32_t ed_magic;      /* ERPT_MAGIC or zero to indicate end */
+       uint32_t ed_chksum;     /* checksum32() of packed nvlist data */
+       uint32_t ed_size;       /* ereport (nvl) fixed buf size */
+       uint32_t ed_pad;        /* reserved for future use */
+       hrtime_t ed_hrt_nsec;   /* hrtime of this ereport */
+       hrtime_t ed_hrt_base;   /* hrtime sample corresponding to ed_tod_base */
+       struct {
+               uint64_t sec;   /* seconds since gettimeofday() Epoch */
+               uint64_t nsec;  /* nanoseconds past ed_tod_base.sec */
+       } ed_tod_base;
+} erpt_dump_t;
+
+#ifdef _KERNEL
+#include <sys/systm.h>
+
+#define        FM_STK_DEPTH    20      /* maximum stack depth */
+#define        FM_SYM_SZ       64      /* maximum symbol size */
+#define        FM_ERR_PIL      2       /* PIL for ereport_errorq drain processing */
+
+#define        FM_EREPORT_PAYLOAD_NAME_STACK           "stack"
+
+extern errorq_t *ereport_errorq;
+extern void *ereport_dumpbuf;
+extern size_t ereport_dumplen;
+
+extern void fm_init(void);
+extern void fm_nvprint(nvlist_t *);
+extern void fm_panic(const char *, ...);
+extern void fm_banner(void);
+
+extern void fm_ereport_dump(void);
+extern void fm_ereport_post(nvlist_t *, int);
+
+extern void fm_payload_stack_add(nvlist_t *, const pc_t *, int);
+
+#endif  /* _KERNEL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_FM_UTIL_H */
index 03023eaefa728c81279fd52edd8681c17626a533..83525fe03015817cac86f18f596e4ff4a0951ebe 100755 (executable)
@@ -94,17 +94,19 @@ cp ${SRC_UCM}/sys/u8_textprep_data.h                ${DST_MOD}/unicode/include/sys/
 
 echo "* module/zcommon"
 mkdir -p ${DST_MOD}/zcommon/include/sys/fs/
-mkdir -p ${DST_MOD}/zcommon/include/sys/fm/fs/
 cp ${SRC_CM}/zfs/*.c                           ${DST_MOD}/zcommon/
 cp ${SRC_CM}/zfs/*.h                           ${DST_MOD}/zcommon/include/
 cp ${SRC_UCM}/sys/fs/zfs.h                     ${DST_MOD}/zcommon/include/sys/fs/
-cp ${SRC_UCM}/sys/fm/fs/zfs.h                  ${DST_MOD}/zcommon/include/sys/fm/fs/
 
 echo "* module/zfs"
-mkdir -p ${DST_MOD}/zpool/include/sys/
+mkdir -p ${DST_MOD}/zfs/include/sys/fm/fs/
 cp ${SRC_UTS}/intel/zfs/spa_boot.c             ${DST_MOD}/zfs/
 cp ${SRC_ZLIB}/*.c                             ${DST_MOD}/zfs/
 cp ${SRC_ZLIB}/sys/*.h                         ${DST_MOD}/zfs/include/sys/
+cp ${SRC_UCM}/os/fm.c                          ${DST_MOD}/zfs/
+cp ${SRC_UCM}/sys/fm/protocol.h                        ${DST_MOD}/zfs/include/sys/fm/
+cp ${SRC_UCM}/sys/fm/util.h                    ${DST_MOD}/zfs/include/sys/fm/
+cp ${SRC_UCM}/sys/fm/fs/zfs.h                  ${DST_MOD}/zfs/include/sys/fm/fs/
 rm ${DST_MOD}/zfs/vdev_disk.c
 rm ${DST_MOD}/zfs/zvol.c
 rm ${DST_MOD}/zfs/include/sys/vdev_disk.h