]> granicus.if.org Git - zfs/commitdiff
Fix 'zpool add' handling of nested interior VDEVs
authorLOLi <loli10K@users.noreply.github.com>
Thu, 28 Dec 2017 18:15:32 +0000 (19:15 +0100)
committerTony Hutter <hutter2@llnl.gov>
Tue, 30 Jan 2018 16:27:31 +0000 (10:27 -0600)
When replacing a faulted device which was previously handled by a spare
multiple levels of nested interior VDEVs will be present in the pool
configuration; the following example illustrates one of the possible
situations:

   NAME                          STATE     READ WRITE CKSUM
   testpool                      DEGRADED     0     0     0
     raidz1-0                    DEGRADED     0     0     0
       spare-0                   DEGRADED     0     0     0
         replacing-0             DEGRADED     0     0     0
           /var/tmp/fault-dev    UNAVAIL      0     0     0  cannot open
           /var/tmp/replace-dev  ONLINE       0     0     0
         /var/tmp/spare-dev1     ONLINE       0     0     0
       /var/tmp/safe-dev         ONLINE       0     0     0
   spares
     /var/tmp/spare-dev1         INUSE     currently in use

This is safe and allowed, but get_replication() needs to handle this
situation gracefully to let zpool add new devices to the pool.

Reviewed-by: George Melikov <mail@gmelikov.ru>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: loli10K <ezomori.nozomu@gmail.com>
Closes #6678
Closes #6996

cmd/zpool/zpool_vdev.c
tests/runfiles/linux.run
tests/zfs-tests/include/libtest.shlib
tests/zfs-tests/tests/functional/cli_root/zpool_add/Makefile.am
tests/zfs-tests/tests/functional/cli_root/zpool_add/add_nested_replacing_spare.ksh [new file with mode: 0755]

index 97faa5f9bee121644ddcf4fcac50c27a4e14475b..fd6bd9e7677d1554da2c7e2ca5662987876d1993 100644 (file)
@@ -860,9 +860,11 @@ get_replication(nvlist_t *nvroot, boolean_t fatal)
 
                                /*
                                 * If this is a replacing or spare vdev, then
-                                * get the real first child of the vdev.
+                                * get the real first child of the vdev: do this
+                                * in a loop because replacing and spare vdevs
+                                * can be nested.
                                 */
-                               if (strcmp(childtype,
+                               while (strcmp(childtype,
                                    VDEV_TYPE_REPLACING) == 0 ||
                                    strcmp(childtype, VDEV_TYPE_SPARE) == 0) {
                                        nvlist_t **rchild;
index f872c0cbf1d6e1397db3b01dd134fbcea55e256c..303c275299d839afc12496f2746f045e88f26b86 100644 (file)
@@ -228,7 +228,7 @@ tests = ['zpool_add_001_pos', 'zpool_add_002_pos', 'zpool_add_003_pos',
     'zpool_add_004_pos', 'zpool_add_005_pos', 'zpool_add_006_pos',
     'zpool_add_007_neg', 'zpool_add_008_neg', 'zpool_add_009_neg',
     'zpool_add_010_pos',
-    'add-o_ashift', 'add_prop_ashift']
+    'add-o_ashift', 'add_prop_ashift', 'add_nested_replacing_spare']
 tags = ['functional', 'cli_root', 'zpool_add']
 
 [tests/functional/cli_root/zpool_attach]
index 345d1903de8daf42cc630695f59356a2cd66f563..86f172a6d348d767d911f4e6d21d79f25b53c926 100644 (file)
@@ -1988,6 +1988,31 @@ function check_hotspare_state # pool disk state{inuse,avail}
        return 0
 }
 
+#
+# Wait until a hotspare transitions to a given state or times out.
+#
+# Return 0 when  pool/disk matches expected state, 1 on timeout.
+#
+function wait_hotspare_state # pool disk state timeout
+{
+       typeset pool=$1
+       typeset disk=${2#$/DEV_DSKDIR/}
+       typeset state=$3
+       typeset timeout=${4:-60}
+       typeset -i i=0
+
+       while [[ $i -lt $timeout ]]; do
+               if check_hotspare_state $pool $disk $state; then
+                       return 0
+               fi
+
+               i=$((i+1))
+               sleep 1
+       done
+
+       return 1
+}
+
 #
 # Verify a given slog disk is inuse or avail
 #
@@ -2026,6 +2051,31 @@ function check_vdev_state # pool disk state{online,offline,unavail}
        return 0
 }
 
+#
+# Wait until a vdev transitions to a given state or times out.
+#
+# Return 0 when  pool/disk matches expected state, 1 on timeout.
+#
+function wait_vdev_state # pool disk state timeout
+{
+       typeset pool=$1
+       typeset disk=${2#$/DEV_DSKDIR/}
+       typeset state=$3
+       typeset timeout=${4:-60}
+       typeset -i i=0
+
+       while [[ $i -lt $timeout ]]; do
+               if check_vdev_state $pool $disk $state; then
+                       return 0
+               fi
+
+               i=$((i+1))
+               sleep 1
+       done
+
+       return 1
+}
+
 #
 # Check the output of 'zpool status -v <pool>',
 # and to see if the content of <token> contain the <keyword> specified.
@@ -3394,13 +3444,25 @@ function zed_stop
        if [[ -f ${ZEDLET_DIR}/zed.pid ]]; then
                zedpid=$(cat ${ZEDLET_DIR}/zed.pid)
                kill $zedpid
-               wait $zedpid
+               while ps -p $zedpid > /dev/null; do
+                       sleep 1
+               done
                rm -f ${ZEDLET_DIR}/zed.pid
        fi
-
        return 0
 }
 
+#
+# Drain all zevents
+#
+function zed_events_drain
+{
+       while [ $(zpool events -H | wc -l) -ne 0 ]; do
+              sleep 1
+              zpool events -c >/dev/null
+       done
+}
+
 #
 # Check is provided device is being active used as a swap device.
 #
index 4b6b533fecca407d06525c6a4af2a9268e1a9958..0620282992d5a0d78f94bf3d40c5360fb6913e2c 100644 (file)
@@ -15,4 +15,5 @@ dist_pkgdata_SCRIPTS = \
        zpool_add_009_neg.ksh \
        zpool_add_010_pos.ksh \
        add-o_ashift.ksh \
-       add_prop_ashift.ksh
+       add_prop_ashift.ksh \
+       add_nested_replacing_spare.ksh
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_add/add_nested_replacing_spare.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_add/add_nested_replacing_spare.ksh
new file mode 100755 (executable)
index 0000000..b380798
--- /dev/null
@@ -0,0 +1,111 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2017, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/cli_root/zpool_create/zpool_create.shlib
+
+#
+# DESCRIPTION:
+#      'zpool add' works with nested replacing/spare vdevs
+#
+# STRATEGY:
+#      1. Create a redundant pool with a spare device
+#      2. Manually fault a device, wait for the hot-spare and then replace it:
+#         this creates a situation where replacing and spare vdevs are nested.
+#      3. Verify 'zpool add' is able to add new devices to the pool.
+#
+
+verify_runnable "global"
+
+function cleanup
+{
+       zed_stop
+       zed_cleanup
+       log_must zinject -c all
+       destroy_pool $TESTPOOL
+       log_must rm -f $DATA_DEVS $SPARE_DEVS
+}
+
+log_assert "'zpool add' works with nested replacing/spare vdevs"
+log_onexit cleanup
+
+FAULT_DEV="$TEST_BASE_DIR/fault-dev"
+SAFE_DEV1="$TEST_BASE_DIR/safe-dev1"
+SAFE_DEV2="$TEST_BASE_DIR/safe-dev2"
+SAFE_DEV3="$TEST_BASE_DIR/safe-dev3"
+SAFE_DEVS="$SAFE_DEV1 $SAFE_DEV2 $SAFE_DEV3"
+REPLACE_DEV="$TEST_BASE_DIR/replace-dev"
+ADD_DEV="$TEST_BASE_DIR/add-dev"
+DATA_DEVS="$FAULT_DEV $SAFE_DEVS $REPLACE_DEV $ADD_DEV"
+SPARE_DEV1="$TEST_BASE_DIR/spare-dev1"
+SPARE_DEV2="$TEST_BASE_DIR/spare-dev2"
+SPARE_DEVS="$SPARE_DEV1 $SPARE_DEV2"
+
+# We need ZED running to work with spares
+zed_setup
+zed_start
+# Clear events from previous runs
+zed_events_drain
+
+for type in "mirror" "raidz1" "raidz2" "raidz3"
+do
+       # 1. Create a redundant pool with a spare device
+       truncate -s $SPA_MINDEVSIZE $DATA_DEVS $SPARE_DEVS
+       log_must zpool create $TESTPOOL $type $FAULT_DEV $SAFE_DEVS
+       log_must zpool add $TESTPOOL spare $SPARE_DEV1
+
+       # 2.1 Fault a device, verify the spare is kicked in
+       log_must zinject -d $FAULT_DEV -e nxio -T all -f 100 $TESTPOOL
+       log_must zpool scrub $TESTPOOL
+       log_must wait_vdev_state $TESTPOOL $FAULT_DEV "UNAVAIL" 60
+       log_must wait_vdev_state $TESTPOOL $SPARE_DEV1 "ONLINE" 60
+       log_must wait_hotspare_state $TESTPOOL $SPARE_DEV1 "INUSE"
+       log_must check_state $TESTPOOL "" "DEGRADED"
+
+       # 2.2 Replace the faulted device: this creates a replacing vdev inside a
+       #     spare vdev
+       log_must zpool replace $TESTPOOL $FAULT_DEV $REPLACE_DEV
+       log_must wait_vdev_state $TESTPOOL $REPLACE_DEV "ONLINE" 60
+       zpool status | awk -v poolname="$TESTPOOL" -v type="$type" 'BEGIN {s=""}
+           $1 ~ poolname {c=4}; (c && c--) { s=s$1":" }
+           END { if (s != poolname":"type"-0:spare-0:replacing-0:") exit 1; }'
+       if [[ $? -ne 0 ]]; then
+               log_fail "Pool does not contain nested replacing/spare vdevs"
+       fi
+
+       # 3. Verify 'zpool add' is able to add new devices
+       log_must zpool add $TESTPOOL spare $SPARE_DEV2
+       log_must wait_hotspare_state $TESTPOOL $SPARE_DEV2 "AVAIL"
+       log_must zpool add -f $TESTPOOL $ADD_DEV
+       log_must wait_vdev_state $TESTPOOL $ADD_DEV "ONLINE" 60
+
+       # Cleanup
+       log_must zinject -c all
+       destroy_pool $TESTPOOL
+       log_must rm -f $DATA_DEVS $SPARE_DEVS
+done
+
+log_pass "'zpool add' works with nested replacing/spare vdevs"