X-Git-Url: https://granicus.if.org/sourcecode?a=blobdiff_plain;ds=sidebyside;f=module%2Fzfs%2Fzfs_fm.c;h=df37fed2b9729742eb4f83bf76f84e472a40e39f;hb=6568379eea8f01ad87a6cbcb66111112a1b5665f;hp=7801837f104b8166d09888c5c15e69eff147d192;hpb=5ffb9d1d05d7c512b987dff51f587466d537770f;p=zfs diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c index 7801837f1..df37fed2b 100644 --- a/module/zfs/zfs_fm.c +++ b/module/zfs/zfs_fm.c @@ -112,6 +112,34 @@ zfs_zevent_post_cb(nvlist_t *nvl, nvlist_t *detector) fm_nvlist_destroy(detector, FM_NVA_FREE); } +/* + * We want to rate limit ZIO delay and checksum events so as to not + * flood ZED when a disk is acting up. + * + * Returns 1 if we're ratelimiting, 0 if not. + */ +static int +zfs_is_ratelimiting_event(const char *subclass, vdev_t *vd) +{ + int rc = 0; + /* + * __ratelimit() returns 1 if we're *not* ratelimiting and 0 if we + * are. Invert it to get our return value. + */ + if (strcmp(subclass, FM_EREPORT_ZFS_DELAY) == 0) { + rc = !zfs_ratelimit(&vd->vdev_delay_rl); + } else if (strcmp(subclass, FM_EREPORT_ZFS_CHECKSUM) == 0) { + rc = !zfs_ratelimit(&vd->vdev_checksum_rl); + } + + if (rc) { + /* We're rate limiting */ + fm_erpt_dropped_increment(); + } + + return (rc); +} + static void zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out, const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio, @@ -190,6 +218,12 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out, return; } + if ((strcmp(subclass, FM_EREPORT_ZFS_DELAY) == 0) && + (zio != NULL) && (!zio->io_timestamp)) { + /* Ignore bogus delay events */ + return; + } + /* * Serialize ereport generation */ @@ -250,6 +284,12 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out, if (vd != NULL) { vdev_t *pvd = vd->vdev_parent; + vdev_queue_t *vq = &vd->vdev_queue; + vdev_stat_t *vs = &vd->vdev_stat; + vdev_t *spare_vd; + uint64_t *spare_guids; + char **spare_paths; + int i, spare_count; fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, DATA_TYPE_UINT64, vd->vdev_guid, @@ -267,6 +307,33 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out, fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_VDEV_FRU, DATA_TYPE_STRING, vd->vdev_fru, NULL); + if (vd->vdev_enc_sysfs_path != NULL) + fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_VDEV_ENC_SYSFS_PATH, + DATA_TYPE_STRING, vd->vdev_enc_sysfs_path, NULL); + if (vd->vdev_ashift) + fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_VDEV_ASHIFT, + DATA_TYPE_UINT64, vd->vdev_ashift, NULL); + + if (vq != NULL) { + fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_VDEV_COMP_TS, + DATA_TYPE_UINT64, vq->vq_io_complete_ts, NULL); + fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_VDEV_DELTA_TS, + DATA_TYPE_UINT64, vq->vq_io_delta_ts, NULL); + } + + if (vs != NULL) { + fm_payload_set(ereport, + FM_EREPORT_PAYLOAD_ZFS_VDEV_READ_ERRORS, + DATA_TYPE_UINT64, vs->vs_read_errors, + FM_EREPORT_PAYLOAD_ZFS_VDEV_WRITE_ERRORS, + DATA_TYPE_UINT64, vs->vs_write_errors, + FM_EREPORT_PAYLOAD_ZFS_VDEV_CKSUM_ERRORS, + DATA_TYPE_UINT64, vs->vs_checksum_errors, NULL); + } if (pvd != NULL) { fm_payload_set(ereport, @@ -284,6 +351,28 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out, FM_EREPORT_PAYLOAD_ZFS_PARENT_DEVID, DATA_TYPE_STRING, pvd->vdev_devid, NULL); } + + spare_count = spa->spa_spares.sav_count; + spare_paths = kmem_zalloc(sizeof (char *) * spare_count, + KM_SLEEP); + spare_guids = kmem_zalloc(sizeof (uint64_t) * spare_count, + KM_SLEEP); + + for (i = 0; i < spare_count; i++) { + spare_vd = spa->spa_spares.sav_vdevs[i]; + if (spare_vd) { + spare_paths[i] = spare_vd->vdev_path; + spare_guids[i] = spare_vd->vdev_guid; + } + } + + fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_VDEV_SPARE_PATHS, + DATA_TYPE_STRING_ARRAY, spare_count, spare_paths, + FM_EREPORT_PAYLOAD_ZFS_VDEV_SPARE_GUIDS, + DATA_TYPE_UINT64_ARRAY, spare_count, spare_guids, NULL); + + kmem_free(spare_guids, sizeof (uint64_t) * spare_count); + kmem_free(spare_paths, sizeof (char *) * spare_count); } if (zio != NULL) { @@ -294,8 +383,16 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out, DATA_TYPE_INT32, zio->io_error, NULL); fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_FLAGS, DATA_TYPE_INT32, zio->io_flags, NULL); + fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_STAGE, + DATA_TYPE_UINT32, zio->io_stage, NULL); + fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_PIPELINE, + DATA_TYPE_UINT32, zio->io_pipeline, NULL); fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_DELAY, DATA_TYPE_UINT64, zio->io_delay, NULL); + fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_TIMESTAMP, + DATA_TYPE_UINT64, zio->io_timestamp, NULL); + fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_ZIO_DELTA, + DATA_TYPE_UINT64, zio->io_delta, NULL); /* * If the 'size' parameter is non-zero, it indicates this is a @@ -393,7 +490,8 @@ update_histogram(uint64_t value_arg, uint16_t *hist, uint32_t *count) /* We store the bits in big-endian (largest-first) order */ for (i = 0; i < 64; i++) { if (value & (1ull << i)) { - hist[63 - i]++; + if (hist[63 - i] < UINT16_MAX) + hist[63 - i]++; ++bits; } } @@ -551,7 +649,6 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info, if (badbuf == NULL || goodbuf == NULL) return (eip); - ASSERT3U(nui64s, <=, UINT16_MAX); ASSERT3U(size, ==, nui64s * sizeof (uint64_t)); ASSERT3U(size, <=, SPA_MAXBLOCKSIZE); ASSERT3U(size, <=, UINT32_MAX); @@ -678,6 +775,9 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio, if (ereport == NULL) return; + if (zfs_is_ratelimiting_event(subclass, vd)) + return; + /* Cleanup is handled by the callback function */ zfs_zevent_post(ereport, detector, zfs_zevent_post_cb); #endif @@ -688,7 +788,15 @@ zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, struct zio *zio, uint64_t offset, uint64_t length, void *arg, zio_bad_cksum_t *info) { - zio_cksum_report_t *report = kmem_zalloc(sizeof (*report), KM_SLEEP); + zio_cksum_report_t *report; + + +#ifdef _KERNEL + if (zfs_is_ratelimiting_event(FM_EREPORT_ZFS_CHECKSUM, vd)) + return; +#endif + + report = kmem_zalloc(sizeof (*report), KM_SLEEP); if (zio->io_vsd != NULL) zio->io_vsd_ops->vsd_cksum_report(zio, report, arg); @@ -709,12 +817,7 @@ zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, FM_EREPORT_ZFS_CHECKSUM, spa, vd, zio, offset, length); if (report->zcr_ereport == NULL) { - report->zcr_free(report->zcr_cbdata, report->zcr_cbinfo); - if (report->zcr_ckinfo != NULL) { - kmem_free(report->zcr_ckinfo, - sizeof (*report->zcr_ckinfo)); - } - kmem_free(report, sizeof (*report)); + zfs_ereport_free_checksum(report); return; } #endif @@ -730,13 +833,15 @@ zfs_ereport_finish_checksum(zio_cksum_report_t *report, const void *good_data, const void *bad_data, boolean_t drop_if_identical) { #ifdef _KERNEL - zfs_ecksum_info_t *info = NULL; + zfs_ecksum_info_t *info; + info = annotate_ecksum(report->zcr_ereport, report->zcr_ckinfo, good_data, bad_data, report->zcr_length, drop_if_identical); - if (info != NULL) zfs_zevent_post(report->zcr_ereport, report->zcr_detector, zfs_zevent_post_cb); + else + zfs_zevent_post_cb(report->zcr_ereport, report->zcr_detector); report->zcr_ereport = report->zcr_detector = NULL; if (info != NULL) @@ -763,13 +868,6 @@ zfs_ereport_free_checksum(zio_cksum_report_t *rpt) kmem_free(rpt, sizeof (*rpt)); } -void -zfs_ereport_send_interim_checksum(zio_cksum_report_t *report) -{ -#ifdef _KERNEL - zfs_zevent_post(report->zcr_ereport, report->zcr_detector, NULL); -#endif -} void zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd, @@ -798,7 +896,8 @@ zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd, } static void -zfs_post_common(spa_t *spa, vdev_t *vd, const char *name) +zfs_post_common(spa_t *spa, vdev_t *vd, const char *type, const char *name, + nvlist_t *aux) { #ifdef _KERNEL nvlist_t *resource; @@ -810,17 +909,40 @@ zfs_post_common(spa_t *spa, vdev_t *vd, const char *name) if ((resource = fm_nvlist_create(NULL)) == NULL) return; - (void) snprintf(class, sizeof (class), "%s.%s.%s", FM_RSRC_RESOURCE, + (void) snprintf(class, sizeof (class), "%s.%s.%s", type, ZFS_ERROR_CLASS, name); - VERIFY(nvlist_add_uint8(resource, FM_VERSION, FM_RSRC_VERSION) == 0); - VERIFY(nvlist_add_string(resource, FM_CLASS, class) == 0); - VERIFY(nvlist_add_uint64(resource, - FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, spa_guid(spa)) == 0); + VERIFY0(nvlist_add_uint8(resource, FM_VERSION, FM_RSRC_VERSION)); + VERIFY0(nvlist_add_string(resource, FM_CLASS, class)); + VERIFY0(nvlist_add_uint64(resource, + FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, spa_guid(spa))); + VERIFY0(nvlist_add_int32(resource, + FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT, spa_load_state(spa))); + if (vd) { - VERIFY(nvlist_add_uint64(resource, - FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vd->vdev_guid) == 0); - VERIFY(nvlist_add_uint64(resource, - FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE, vd->vdev_state) == 0); + VERIFY0(nvlist_add_uint64(resource, + FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vd->vdev_guid)); + VERIFY0(nvlist_add_uint64(resource, + FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE, vd->vdev_state)); + if (vd->vdev_path != NULL) + VERIFY0(nvlist_add_string(resource, + FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH, vd->vdev_path)); + if (vd->vdev_devid != NULL) + VERIFY0(nvlist_add_string(resource, + FM_EREPORT_PAYLOAD_ZFS_VDEV_DEVID, vd->vdev_devid)); + if (vd->vdev_fru != NULL) + VERIFY0(nvlist_add_string(resource, + FM_EREPORT_PAYLOAD_ZFS_VDEV_FRU, vd->vdev_fru)); + if (vd->vdev_enc_sysfs_path != NULL) + VERIFY0(nvlist_add_string(resource, + FM_EREPORT_PAYLOAD_ZFS_VDEV_ENC_SYSFS_PATH, + vd->vdev_enc_sysfs_path)); + /* also copy any optional payload data */ + if (aux) { + nvpair_t *elem = NULL; + + while ((elem = nvlist_next_nvpair(aux, elem)) != NULL) + (void) nvlist_add_nvpair(resource, elem); + } } zfs_zevent_post(resource, NULL, zfs_zevent_post_cb); @@ -836,7 +958,7 @@ zfs_post_common(spa_t *spa, vdev_t *vd, const char *name) void zfs_post_remove(spa_t *spa, vdev_t *vd) { - zfs_post_common(spa, vd, FM_EREPORT_RESOURCE_REMOVED); + zfs_post_common(spa, vd, FM_RSRC_CLASS, FM_RESOURCE_REMOVED, NULL); } /* @@ -847,7 +969,7 @@ zfs_post_remove(spa_t *spa, vdev_t *vd) void zfs_post_autoreplace(spa_t *spa, vdev_t *vd) { - zfs_post_common(spa, vd, FM_EREPORT_RESOURCE_AUTOREPLACE); + zfs_post_common(spa, vd, FM_RSRC_CLASS, FM_RESOURCE_AUTOREPLACE, NULL); } /* @@ -857,9 +979,49 @@ zfs_post_autoreplace(spa_t *spa, vdev_t *vd) * open because the device was not found (fault.fs.zfs.device). */ void -zfs_post_state_change(spa_t *spa, vdev_t *vd) +zfs_post_state_change(spa_t *spa, vdev_t *vd, uint64_t laststate) +{ +#ifdef _KERNEL + nvlist_t *aux; + + /* + * Add optional supplemental keys to payload + */ + aux = fm_nvlist_create(NULL); + if (vd && aux) { + if (vd->vdev_physpath) { + (void) nvlist_add_string(aux, + FM_EREPORT_PAYLOAD_ZFS_VDEV_PHYSPATH, + vd->vdev_physpath); + } + if (vd->vdev_enc_sysfs_path) { + (void) nvlist_add_string(aux, + FM_EREPORT_PAYLOAD_ZFS_VDEV_ENC_SYSFS_PATH, + vd->vdev_enc_sysfs_path); + } + + (void) nvlist_add_uint64(aux, + FM_EREPORT_PAYLOAD_ZFS_VDEV_LASTSTATE, laststate); + } + + zfs_post_common(spa, vd, FM_RSRC_CLASS, FM_RESOURCE_STATECHANGE, + aux); + + if (aux) + fm_nvlist_destroy(aux, FM_NVA_FREE); +#endif +} + +/* + * The 'sysevent.fs.zfs.*' events are signals posted to notify user space of + * change in the pool. All sysevents are listed in sys/sysevent/eventdefs.h + * and are designed to be consumed by the ZFS Event Daemon (ZED). For + * additional details refer to the zed(8) man page. + */ +void +zfs_post_sysevent(spa_t *spa, vdev_t *vd, const char *name) { - zfs_post_common(spa, vd, FM_EREPORT_RESOURCE_STATECHANGE); + zfs_post_common(spa, vd, FM_SYSEVENT_CLASS, name, NULL); } #if defined(_KERNEL) && defined(HAVE_SPL) @@ -868,4 +1030,5 @@ EXPORT_SYMBOL(zfs_ereport_post_checksum); EXPORT_SYMBOL(zfs_post_remove); EXPORT_SYMBOL(zfs_post_autoreplace); EXPORT_SYMBOL(zfs_post_state_change); +EXPORT_SYMBOL(zfs_post_sysevent); #endif /* _KERNEL */