/*
* Copyright (c) 2016, Intel Corporation.
+ * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>
*/
#include <libnvpair.h>
libzfs_handle_t *g_zfs_hdl;
/* guid search data */
+typedef enum device_type {
+ DEVICE_TYPE_L2ARC, /* l2arc device */
+ DEVICE_TYPE_SPARE, /* spare device */
+ DEVICE_TYPE_PRIMARY /* any primary pool storage device */
+} device_type_t;
+
typedef struct guid_search {
uint64_t gs_pool_guid;
uint64_t gs_vdev_guid;
char *gs_devid;
+ device_type_t gs_vdev_type;
+ uint64_t gs_vdev_expandtime; /* vdev expansion time */
} guid_search_t;
-static void
+/*
+ * Walks the vdev tree recursively looking for a matching devid.
+ * Returns B_TRUE as soon as a matching device is found, B_FALSE otherwise.
+ */
+static boolean_t
zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg)
{
guid_search_t *gsp = arg;
*/
if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN,
&child, &children) == 0) {
- for (c = 0; c < children; c++)
- zfs_agent_iter_vdev(zhp, child[c], gsp);
- return;
+ for (c = 0; c < children; c++) {
+ if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
+ gsp->gs_vdev_type = DEVICE_TYPE_PRIMARY;
+ return (B_TRUE);
+ }
+ }
}
/*
- * On a devid match, grab the vdev guid
+ * Iterate over any spares and cache devices
*/
- if ((gsp->gs_vdev_guid == 0) &&
- (nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, &path) == 0) &&
+ if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_SPARES,
+ &child, &children) == 0) {
+ for (c = 0; c < children; c++) {
+ if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
+ gsp->gs_vdev_type = DEVICE_TYPE_L2ARC;
+ return (B_TRUE);
+ }
+ }
+ }
+ if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_L2CACHE,
+ &child, &children) == 0) {
+ for (c = 0; c < children; c++) {
+ if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
+ gsp->gs_vdev_type = DEVICE_TYPE_SPARE;
+ return (B_TRUE);
+ }
+ }
+ }
+ /*
+ * On a devid match, grab the vdev guid and expansion time, if any.
+ */
+ if ((nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, &path) == 0) &&
(strcmp(gsp->gs_devid, path) == 0)) {
(void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID,
&gsp->gs_vdev_guid);
+ (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME,
+ &gsp->gs_vdev_expandtime);
+ return (B_TRUE);
}
+
+ return (B_FALSE);
}
static int
if ((config = zpool_get_config(zhp, NULL)) != NULL) {
if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
&nvl) == 0) {
- zfs_agent_iter_vdev(zhp, nvl, gsp);
+ (void) zfs_agent_iter_vdev(zhp, nvl, gsp);
}
}
/*
struct timeval tv;
int64_t tod[2];
uint64_t pool_guid = 0, vdev_guid = 0;
+ guid_search_t search = { 0 };
+ device_type_t devtype = DEVICE_TYPE_PRIMARY;
class = "resource.fs.zfs.removed";
subclass = "";
(void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid);
(void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid);
+ (void) gettimeofday(&tv, NULL);
+ tod[0] = tv.tv_sec;
+ tod[1] = tv.tv_usec;
+ (void) nvlist_add_int64_array(payload, FM_EREPORT_TIME, tod, 2);
+
/*
- * For multipath, ZFS_EV_VDEV_GUID is missing so find it.
+ * For multipath, spare and l2arc devices ZFS_EV_VDEV_GUID or
+ * ZFS_EV_POOL_GUID may be missing so find them.
*/
- if (vdev_guid == 0) {
- guid_search_t search = { 0 };
-
- (void) nvlist_lookup_string(nvl, DEV_IDENTIFIER,
- &search.gs_devid);
+ (void) nvlist_lookup_string(nvl, DEV_IDENTIFIER,
+ &search.gs_devid);
+ (void) zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search);
+ pool_guid = search.gs_pool_guid;
+ vdev_guid = search.gs_vdev_guid;
+ devtype = search.gs_vdev_type;
- (void) zpool_iter(g_zfs_hdl, zfs_agent_iter_pool,
- &search);
- pool_guid = search.gs_pool_guid;
- vdev_guid = search.gs_vdev_guid;
+ /*
+ * We want to avoid reporting "remove" events coming from
+ * libudev for VDEVs which were expanded recently (10s) and
+ * avoid activating spares in response to partitions being
+ * deleted and created in rapid succession.
+ */
+ if (search.gs_vdev_expandtime != 0 &&
+ search.gs_vdev_expandtime + 10 > tv.tv_sec) {
+ zed_log_msg(LOG_INFO, "agent post event: ignoring '%s' "
+ "for recently expanded device '%s'", EC_DEV_REMOVE,
+ search.gs_devid);
+ goto out;
}
(void) nvlist_add_uint64(payload,
FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, pool_guid);
(void) nvlist_add_uint64(payload,
FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vdev_guid);
-
- (void) gettimeofday(&tv, NULL);
- tod[0] = tv.tv_sec;
- tod[1] = tv.tv_usec;
- (void) nvlist_add_int64_array(payload, FM_EREPORT_TIME, tod, 2);
+ switch (devtype) {
+ case DEVICE_TYPE_L2ARC:
+ (void) nvlist_add_string(payload,
+ FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,
+ VDEV_TYPE_L2CACHE);
+ break;
+ case DEVICE_TYPE_SPARE:
+ (void) nvlist_add_string(payload,
+ FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_SPARE);
+ break;
+ case DEVICE_TYPE_PRIMARY:
+ (void) nvlist_add_string(payload,
+ FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_DISK);
+ break;
+ }
zed_log_msg(LOG_INFO, "agent post event: mapping '%s' to '%s'",
EC_DEV_REMOVE, class);
list_insert_tail(&agent_events, event);
(void) pthread_mutex_unlock(&agent_lock);
+out:
(void) pthread_cond_signal(&agent_cond);
}
&child, &children) == 0) {
for (c = 0; c < children; c++)
zfs_iter_vdev(zhp, child[c], data);
- return;
+ }
+
+ /*
+ * Iterate over any spares and cache devices
+ */
+ if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_SPARES,
+ &child, &children) == 0) {
+ for (c = 0; c < children; c++)
+ zfs_iter_vdev(zhp, child[c], data);
+ }
+ if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_L2CACHE,
+ &child, &children) == 0) {
+ for (c = 0; c < children; c++)
+ zfs_iter_vdev(zhp, child[c], data);
}
/* once a vdev was matched and processed there is nothing left to do */
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
*
* Copyright (c) 2016, Intel Corporation.
+ * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>
*/
/*
return (ret);
}
+ if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
+ &child, &children) != 0)
+ return (NULL);
+
+ for (c = 0; c < children; c++) {
+ if ((ret = find_vdev(zhdl, child[c], search_guid)) != NULL)
+ return (ret);
+ }
+
return (NULL);
}
/*
* Given a vdev, attempt to replace it with every known spare until one
- * succeeds.
+ * succeeds or we run out of devices to try.
+ * Return whether we were successful or not in replacing the device.
*/
-static void
+static boolean_t
replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev)
{
nvlist_t *config, *nvroot, *replacement;
config = zpool_get_config(zhp, NULL);
if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
&nvroot) != 0)
- return;
+ return (B_FALSE);
/*
* Find out if there are any hot spares available in the pool.
*/
if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
&spares, &nspares) != 0)
- return;
+ return (B_FALSE);
/*
* lookup "ashift" pool property, we may need it for the replacement
dev_name, basename(spare_name));
if (zpool_vdev_attach(zhp, dev_name, spare_name,
- replacement, B_TRUE) == 0)
- break;
+ replacement, B_TRUE) == 0) {
+ free(dev_name);
+ nvlist_free(replacement);
+ return (B_TRUE);
+ }
}
free(dev_name);
nvlist_free(replacement);
+
+ return (B_FALSE);
}
/*
fmd_hdl_debug(hdl, "zfs_retire_recv: '%s'", class);
/*
- * If this is a resource notifying us of device removal, then simply
- * check for an available spare and continue.
+ * If this is a resource notifying us of device removal then simply
+ * check for an available spare and continue unless the device is a
+ * l2arc vdev, in which case we just offline it.
*/
if (strcmp(class, "resource.fs.zfs.removed") == 0) {
+ char *devtype;
+ char *devname;
+
if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_POOL_GUID,
&pool_guid) != 0 ||
nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID,
&vdev)) == NULL)
return;
- if (fmd_prop_get_int32(hdl, "spare_on_remove"))
- replace_with_spare(hdl, zhp, vdev);
+ devname = zpool_vdev_name(NULL, zhp, vdev, B_FALSE);
+
+ /* Can't replace l2arc with a spare: offline the device */
+ if (nvlist_lookup_string(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,
+ &devtype) == 0 && strcmp(devtype, VDEV_TYPE_L2CACHE) == 0) {
+ fmd_hdl_debug(hdl, "zpool_vdev_offline '%s'", devname);
+ zpool_vdev_offline(zhp, devname, B_TRUE);
+ } else if (!fmd_prop_get_int32(hdl, "spare_on_remove") ||
+ replace_with_spare(hdl, zhp, vdev) == B_FALSE) {
+ /* Could not handle with spare: offline the device */
+ fmd_hdl_debug(hdl, "zpool_vdev_offline '%s'", devname);
+ zpool_vdev_offline(zhp, devname, B_TRUE);
+ }
+
+ free(devname);
zpool_close(zhp);
return;
}
/*
* Attempt to substitute a hot spare.
*/
- replace_with_spare(hdl, zhp, vdev);
+ (void) replace_with_spare(hdl, zhp, vdev);
zpool_close(zhp);
}
#define ZPOOL_CONFIG_MMP_HOSTNAME "mmp_hostname" /* not stored on disk */
#define ZPOOL_CONFIG_MMP_HOSTID "mmp_hostid" /* not stored on disk */
#define ZPOOL_CONFIG_ALLOCATION_BIAS "alloc_bias" /* not stored on disk */
+#define ZPOOL_CONFIG_EXPANSION_TIME "expansion_time" /* not stored */
/*
* The persistent vdev state is stored as separate values rather than a single
uint64_t vdev_leaf_zap;
hrtime_t vdev_mmp_pending; /* 0 if write finished */
uint64_t vdev_mmp_kstat_id; /* to find kstat entry */
+ uint64_t vdev_expansion_time; /* vdev's last expansion time */
/*
* For DTrace to work in userland (libzpool) context, these fields must
for (pvd = vd; pvd != rvd; pvd = pvd->vdev_parent)
pvd->vdev_expanding = !!((flags & ZFS_ONLINE_EXPAND) ||
spa->spa_autoexpand);
+ vd->vdev_expansion_time = gethrestime_sec();
}
vdev_reopen(tvd);
if (vd->vdev_crtxg)
fnvlist_add_uint64(nv, ZPOOL_CONFIG_CREATE_TXG, vd->vdev_crtxg);
+ if (vd->vdev_expansion_time)
+ fnvlist_add_uint64(nv, ZPOOL_CONFIG_EXPANSION_TIME,
+ vd->vdev_expansion_time);
+
if (flags & VDEV_CONFIG_MOS) {
if (vd->vdev_leaf_zap != 0) {
ASSERT(vd->vdev_ops->vdev_op_leaf);
tags = ['functional', 'exec']
[tests/functional/fault]
-tests = ['auto_online_001_pos', 'auto_replace_001_pos', 'auto_spare_001_pos',
- 'auto_spare_002_pos', 'auto_spare_ashift', 'auto_spare_multiple',
- 'auto_spare_shared', 'scrub_after_resilver', 'decrypt_fault',
- 'decompress_fault','zpool_status_-s']
+tests = ['auto_offline_001_pos', 'auto_online_001_pos', 'auto_replace_001_pos',
+ 'auto_spare_001_pos', 'auto_spare_002_pos', 'auto_spare_ashift',
+ 'auto_spare_multiple', 'auto_spare_shared', 'scrub_after_resilver',
+ 'decrypt_fault', 'decompress_fault', 'zpool_status_-s']
tags = ['functional', 'fault']
[tests/functional/features/async_destroy]
fi
typeset prev_size=$(get_pool_prop size $TESTPOOL1)
- typeset zfs_prev_size=$(zfs get -p avail $TESTPOOL1 | tail -1 | \
- awk '{print $3}')
+ typeset zfs_prev_size=$(get_prop avail $TESTPOOL1)
# Expand each device as appropriate being careful to add an artificial
# delay to ensure we get a single history entry for each. This makes
log_must zpool online -e $TESTPOOL1 $FILE_RAW
typeset expand_size=$(get_pool_prop size $TESTPOOL1)
- typeset zfs_expand_size=$(zfs get -p avail $TESTPOOL1 | tail -1 | \
- awk '{print $3}')
+ typeset zfs_expand_size=$(get_prop avail $TESTPOOL1)
log_note "$TESTPOOL1 $type has previous size: $prev_size and " \
"expanded size: $expand_size"
dist_pkgdata_SCRIPTS = \
setup.ksh \
cleanup.ksh \
+ auto_offline_001_pos.ksh \
auto_online_001_pos.ksh \
auto_replace_001_pos.ksh \
auto_spare_001_pos.ksh \
--- /dev/null
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/events/events_common.kshlib
+. $STF_SUITE/tests/functional/fault/fault.cfg
+
+#
+# DESCRIPTION:
+# Testing Fault Management Agent ZED Logic - Physically removed device is
+# offlined and onlined when reattached
+#
+# STRATEGY:
+# 1. Create a pool
+# 2. Simulate physical removal of one device
+# 3. Verify the device is offlined
+# 4. Reattach the device
+# 5. Verify the device is onlined
+# 6. Repeat the same tests with a spare device: zed will use the spare to handle
+# the removed data device
+# 7. Repeat the same tests again with a faulted spare device: zed should offline
+# the removed data device if no spare is available
+#
+# NOTE: the use of 'block_device_wait' throughout the test helps avoid race
+# conditions caused by mixing creation/removal events from partitioning the
+# disk (zpool create) and events from physically removing it (remove_disk).
+#
+verify_runnable "both"
+
+if is_linux; then
+ # Add one 512b scsi_debug device (4Kn would generate IO errors)
+ # NOTE: must be larger than other "file" vdevs and minimum SPA devsize:
+ # add 32m of fudge
+ load_scsi_debug $(($SPA_MINDEVSIZE/1024/1024+32)) 1 1 1 '512b'
+else
+ log_unsupported "scsi debug module unsupported"
+fi
+
+function cleanup
+{
+ destroy_pool $TESTPOOL
+ rm -f $filedev1
+ rm -f $filedev2
+ rm -f $filedev3
+ rm -f $sparedev
+ unload_scsi_debug
+}
+
+log_assert "ZED detects physically removed devices"
+
+log_onexit cleanup
+
+filedev1="$TEST_BASE_DIR/file-vdev-1"
+filedev2="$TEST_BASE_DIR/file-vdev-2"
+filedev3="$TEST_BASE_DIR/file-vdev-3"
+sparedev="$TEST_BASE_DIR/file-vdev-spare"
+removedev=$(get_debug_device)
+
+typeset poolconfs=("mirror $filedev1 $removedev"
+ "raidz $filedev1 $removedev"
+ "raidz2 $filedev1 $filedev2 $removedev"
+ "raidz3 $filedev1 $filedev2 $filedev3 $removedev"
+ "$filedev1 cache $removedev"
+ "mirror $filedev1 $filedev2 cache $removedev"
+ "raidz $filedev1 $filedev2 $filedev3 cache $removedev"
+)
+
+log_must truncate -s $SPA_MINDEVSIZE $filedev1
+log_must truncate -s $SPA_MINDEVSIZE $filedev2
+log_must truncate -s $SPA_MINDEVSIZE $filedev3
+log_must truncate -s $SPA_MINDEVSIZE $sparedev
+
+for conf in "${poolconfs[@]}"
+do
+ # 1. Create a pool
+ log_must zpool create -f $TESTPOOL $conf
+ block_device_wait
+
+ # 2. Simulate physical removal of one device
+ remove_disk $removedev
+
+ # 3. Verify the device is offlined
+ log_must wait_vdev_state $TESTPOOL $removedev "OFFLINE"
+
+ # 4. Reattach the device
+ insert_disk $removedev
+
+ # 5. Verify the device is onlined
+ log_must wait_vdev_state $TESTPOOL $removedev "ONLINE"
+
+ # cleanup
+ destroy_pool $TESTPOOL
+ log_must parted "/dev/${removedev}" -s -- mklabel msdos
+ block_device_wait
+done
+
+# 6. Repeat the same tests with a spare device: zed will use the spare to handle
+# the removed data device
+for conf in "${poolconfs[@]}"
+do
+ # 1. Create a pool with a spare
+ log_must zpool create -f $TESTPOOL $conf
+ block_device_wait
+ log_must zpool add $TESTPOOL spare $sparedev
+
+ # 3. Simulate physical removal of one device
+ remove_disk $removedev
+
+ # 4. Verify the device is handled by the spare unless is a l2arc disk
+ # which can only be offlined
+ if [[ $(echo "$conf" | grep -c 'cache') -eq 0 ]]; then
+ log_must wait_hotspare_state $TESTPOOL $sparedev "INUSE"
+ else
+ log_must wait_vdev_state $TESTPOOL $removedev "OFFLINE"
+ fi
+
+ # 5. Reattach the device
+ insert_disk $removedev
+
+ # 6. Verify the device is onlined
+ log_must wait_vdev_state $TESTPOOL $removedev "ONLINE"
+
+ # cleanup
+ destroy_pool $TESTPOOL
+ log_must parted "/dev/${removedev}" -s -- mklabel msdos
+ block_device_wait
+done
+
+# 7. Repeat the same tests again with a faulted spare device: zed should offline
+# the removed data device if no spare is available
+for conf in "${poolconfs[@]}"
+do
+ # 1. Create a pool with a spare
+ log_must zpool create -f $TESTPOOL $conf
+ block_device_wait
+ log_must zpool add $TESTPOOL spare $sparedev
+
+ # 2. Fault the spare device making it unavailable
+ log_must zpool offline -f $TESTPOOL $sparedev
+ log_must wait_hotspare_state $TESTPOOL $sparedev "FAULTED"
+
+ # 3. Simulate physical removal of one device
+ remove_disk $removedev
+
+ # 4. Verify the device is offlined
+ log_must wait_vdev_state $TESTPOOL $removedev "OFFLINE"
+
+ # 5. Reattach the device
+ insert_disk $removedev
+
+ # 6. Verify the device is onlined
+ log_must wait_vdev_state $TESTPOOL $removedev "ONLINE"
+
+ # cleanup
+ destroy_pool $TESTPOOL
+ log_must parted "/dev/${removedev}" -s -- mklabel msdos
+ block_device_wait
+done
+
+log_pass "ZED detects physically removed devices"