From: Serapheim Dimitropoulos Date: Tue, 12 Jun 2018 22:34:20 +0000 (-0700) Subject: OpenZFS 9591 - ms_shift can be incorrectly changed X-Git-Tag: zfs-0.8.0-rc1~109 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7637ef8d235f34876c8600e3006e0bb9763b24d8;p=zfs OpenZFS 9591 - ms_shift can be incorrectly changed ms_shift can be incorrectly changed changed in MOS config for indirect vdevs that have been historically expanded According to spa_config_update() we expect new vdevs to have vdev_ms_array equal to 0 and then we go ahead and set their metaslab size. The problem is that indirect vdevs also have vdev_ms_array == 0 because their metaslabs are destroyed once their removal is done. As a result, if a vdev was expanded and then removed may have its ms_shift changed if another vdev was added after its removal. Fortunately this behavior does not cause any type of crash or bad behavior in the kernel but it can confuse zdb and anyone doing any kind of analysis of the history of the pools. Authored by: Serapheim Dimitropoulos Reviewed by: Matthew Ahrens Reviewed by: George Wilson Reviewed by: John Kennedy Reviewed by: Prashanth Sreenivasa Reviewed by: Brian Behlendorf Signed-off-by: Tim Chase Ported-by: Tim Chase OpenZFS-commit: https://github.com/openzfs/openzfs/pull/651 OpenZFS-issue: https://illumos.org/issues/9591a External-issue: DLPX-58879 Closes #7644 --- diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c index b94f0fc84..8616abda3 100644 --- a/module/zfs/spa_config.c +++ b/module/zfs/spa_config.c @@ -22,7 +22,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 2011, 2015 by Delphix. All rights reserved. + * Copyright (c) 2011, 2018 by Delphix. All rights reserved. * Copyright 2017 Joyent, Inc. */ @@ -576,6 +576,18 @@ spa_config_update(spa_t *spa, int what) */ for (c = 0; c < rvd->vdev_children; c++) { vdev_t *tvd = rvd->vdev_child[c]; + + /* + * Explicitly skip vdevs that are indirect or + * log vdevs that are being removed. The reason + * is that both of those can have vdev_ms_array + * set to 0 and we wouldn't want to change their + * metaslab size nor call vdev_expand() on them. + */ + if (!vdev_is_concrete(tvd) || + (tvd->vdev_islog && tvd->vdev_removing)) + continue; + if (tvd->vdev_ms_array == 0) vdev_metaslab_set_size(tvd); vdev_expand(tvd, txg); diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 5b67e5f5f..37bb5a0c5 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -4172,11 +4172,11 @@ vdev_expand(vdev_t *vd, uint64_t txg) { ASSERT(vd->vdev_top == vd); ASSERT(spa_config_held(vd->vdev_spa, SCL_ALL, RW_WRITER) == SCL_ALL); + ASSERT(vdev_is_concrete(vd)); vdev_set_deflate_ratio(vd); - if ((vd->vdev_asize >> vd->vdev_ms_shift) > vd->vdev_ms_count && - vdev_is_concrete(vd)) { + if ((vd->vdev_asize >> vd->vdev_ms_shift) > vd->vdev_ms_count) { VERIFY(vdev_metaslab_init(vd, txg) == 0); vdev_config_dirty(vd); } diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index af646f777..3d3ef0afa 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -683,15 +683,16 @@ tags = ['functional', 'refreserv'] [tests/functional/removal] pre = -tests = ['removal_sanity', 'removal_all_vdev', 'removal_check_space', - 'removal_condense_export', - 'removal_multiple_indirection', 'removal_remap', - 'removal_remap_deadlists', - 'removal_with_add', 'removal_with_create_fs', 'removal_with_dedup', - 'removal_with_export', 'removal_with_ganging', 'removal_with_remap', - 'removal_with_remove', 'removal_with_scrub', 'removal_with_send', - 'removal_with_send_recv', 'removal_with_snapshot', 'removal_with_write', - 'removal_with_zdb', 'removal_resume_export', +tests = ['removal_all_vdev', 'removal_check_space', + 'removal_condense_export', 'removal_multiple_indirection', + 'removal_remap', 'removal_remap_deadlists', + 'removal_resume_export', 'removal_sanity', 'removal_with_add', + 'removal_with_create_fs', 'removal_with_dedup', + 'removal_with_export', 'removal_with_ganging', + 'removal_with_remap', 'removal_with_remove', + 'removal_with_scrub', 'removal_with_send', + 'removal_with_send_recv', 'removal_with_snapshot', + 'removal_with_write', 'removal_with_zdb', 'remove_expanded', 'remove_mirror', 'remove_mirror_sanity', 'remove_raidz'] tags = ['functional', 'removal'] diff --git a/tests/zfs-tests/tests/functional/removal/Makefile.am b/tests/zfs-tests/tests/functional/removal/Makefile.am index eac82a2f1..c2b333a00 100644 --- a/tests/zfs-tests/tests/functional/removal/Makefile.am +++ b/tests/zfs-tests/tests/functional/removal/Makefile.am @@ -27,6 +27,6 @@ dist_pkgdata_SCRIPTS = \ removal_with_send.ksh removal_with_send_recv.ksh \ removal_with_snapshot.ksh removal_with_write.ksh \ removal_with_zdb.ksh remove_mirror.ksh remove_mirror_sanity.ksh \ - remove_raidz.ksh removal.kshlib + remove_raidz.ksh remove_expanded.ksh removal.kshlib pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/removal diff --git a/tests/zfs-tests/tests/functional/removal/remove_expanded.ksh b/tests/zfs-tests/tests/functional/removal/remove_expanded.ksh new file mode 100755 index 000000000..e7e63b705 --- /dev/null +++ b/tests/zfs-tests/tests/functional/removal/remove_expanded.ksh @@ -0,0 +1,89 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# +# CDDL HEADER END +# + +# +# Copyright (c) 2018 by Delphix. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/removal/removal.kshlib + +# +# BACKGROUND: +# +# ztest hit an issue where it ran zdb and zdb failed because +# it couldn't access some indirect mappings at the end of a +# vdev. The issue was that the vdev's ms_shift had changed after +# it was removed by the addition of another vdev. This test is +# a regression test for ensuring this case doesn't come up again. +# + + +TMPDIR=${TMPDIR:-$TEST_BASE_DIR} +DISK0=$TMPDIR/dsk0 +DISK1=$TMPDIR/dsk1 +DISK2=$TMPDIR/dsk2 + +log_must truncate -s $MINVDEVSIZE $DISK0 +log_must truncate -s $(($MINVDEVSIZE * 3)) $DISK1 +log_must truncate -s $MINVDEVSIZE $DISK2 + +function cleanup +{ + default_cleanup_noexit + log_must rm -f $DISK0 $DISK1 $DISK2 +} + +# +# Setup the pool with one disk . +# +log_must default_setup_noexit "$DISK0" +log_onexit cleanup + +# +# Expand vdev. +# +log_must truncate -s $(($MINVDEVSIZE * 2)) $DISK0 +log_must zpool reopen $TESTPOOL +log_must zpool online -e $TESTPOOL $DISK0 + +# +# Fill up the whole vdev. +# +dd if=/dev/urandom of=$TESTDIR/$TESTFILE0 bs=8M + +# +# Add another vdev and remove the first vdev creating indirect +# mappings for nearly all the allocatable space from the first +# vdev. Wait for removal to finish. +# +log_must zpool add $TESTPOOL $DISK1 +log_must zpool remove $TESTPOOL $DISK0 +log_must wait_for_removal $TESTPOOL + +# +# Add a new vdev that will trigger a change in the config. +# Run sync once to ensure that the config actually changed. +# +log_must zpool add $TESTPOOL $DISK2 +log_must sync + +# +# Ensure that zdb does not find any problems with this. +# +log_must zdb $TESTPOOL + +log_pass "Removal of expanded vdev doesn't cause any problems."