From 9d3f7b87919b7d0d869153ca72844f565cd0bf52 Mon Sep 17 00:00:00 2001 From: Olaf Faaland Date: Tue, 2 May 2017 13:55:24 -0700 Subject: [PATCH] Write label 2,3 uberblocks when vdev expands When vdev_psize increases, the location of labels 2 and 3 changes because their location is relative to the end of the device. The configs for labels 2 and 3 are written during the next spa_sync() because the vdev is added to the dirty config list. However, the uberblock rings are not re-written in their new location, leaving the device vulnerable to the beginning of the device being overwritten or damaged. This patch copies the uberblock ring from label 0 to labels 2 and 3, in their new locations, at the next sync after vdev_psize increases. Also, add a test zpool_expand_004_pos.ksh to confirm the uberblocks are copied. Reviewed-by: BearBabyLiu Reviewed-by: Andreas Dilger Reviewed-by: Brian Behlendorf Signed-off-by: Olaf Faaland Closes #5108 --- include/sys/vdev_impl.h | 1 + module/zfs/vdev.c | 7 ++ module/zfs/vdev_label.c | 61 +++++++++++ tests/runfiles/linux.run | 2 +- .../cli_root/zpool_expand/Makefile.am | 3 +- .../zpool_expand/zpool_expand_004_pos.ksh | 102 ++++++++++++++++++ 6 files changed, 174 insertions(+), 2 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_004_pos.ksh diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h index d7f11a2b8..c9e9bede9 100644 --- a/include/sys/vdev_impl.h +++ b/include/sys/vdev_impl.h @@ -229,6 +229,7 @@ struct vdev { boolean_t vdev_cant_write; /* vdev is failing all writes */ boolean_t vdev_isspare; /* was a hot spare */ boolean_t vdev_isl2cache; /* was a l2cache device */ + boolean_t vdev_copy_uberblocks; /* post expand copy uberblocks */ vdev_queue_t vdev_queue; /* I/O deadline schedule queue */ vdev_cache_t vdev_cache; /* physical block cache */ spa_aux_vdev_t *vdev_aux; /* for l2cache and spares vdevs */ diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index da0639102..dfb1ef522 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -1330,6 +1330,13 @@ vdev_open(vdev_t *vd) max_asize = max_osize; } + /* + * If the vdev was expanded, record this so that we can re-create the + * uberblock rings in labels {2,3}, during the next sync. + */ + if ((psize > vd->vdev_psize) && (vd->vdev_psize != 0)) + vd->vdev_copy_uberblocks = B_TRUE; + vd->vdev_psize = psize; /* diff --git a/module/zfs/vdev_label.c b/module/zfs/vdev_label.c index b50a297bc..20c0ac86a 100644 --- a/module/zfs/vdev_label.c +++ b/module/zfs/vdev_label.c @@ -1132,6 +1132,60 @@ vdev_uberblock_load(vdev_t *rvd, uberblock_t *ub, nvlist_t **config) spa_config_exit(spa, SCL_ALL, FTAG); } +/* + * For use when a leaf vdev is expanded. + * The location of labels 2 and 3 changed, and at the new location the + * uberblock rings are either empty or contain garbage. The sync will write + * new configs there because the vdev is dirty, but expansion also needs the + * uberblock rings copied. Read them from label 0 which did not move. + * + * Since the point is to populate labels {2,3} with valid uberblocks, + * we zero uberblocks we fail to read or which are not valid. + */ + +static void +vdev_copy_uberblocks(vdev_t *vd) +{ + abd_t *ub_abd; + zio_t *write_zio; + int locks = (SCL_L2ARC | SCL_ZIO); + int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | + ZIO_FLAG_SPECULATIVE; + + ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_READER) == + SCL_STATE); + ASSERT(vd->vdev_ops->vdev_op_leaf); + + spa_config_enter(vd->vdev_spa, locks, FTAG, RW_READER); + + ub_abd = abd_alloc(VDEV_UBERBLOCK_SIZE(vd), B_TRUE); + + write_zio = zio_root(vd->vdev_spa, NULL, NULL, flags); + for (int n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) { + const int src_label = 0; + zio_t *zio; + + zio = zio_root(vd->vdev_spa, NULL, NULL, flags); + vdev_label_read(zio, vd, src_label, ub_abd, + VDEV_UBERBLOCK_OFFSET(vd, n), VDEV_UBERBLOCK_SIZE(vd), + NULL, NULL, flags); + + if (zio_wait(zio) || uberblock_verify(abd_to_buf(ub_abd))) + abd_zero(ub_abd, VDEV_UBERBLOCK_SIZE(vd)); + + for (int l = 2; l < VDEV_LABELS; l++) + vdev_label_write(write_zio, vd, l, ub_abd, + VDEV_UBERBLOCK_OFFSET(vd, n), + VDEV_UBERBLOCK_SIZE(vd), NULL, NULL, + flags | ZIO_FLAG_DONT_PROPAGATE); + } + (void) zio_wait(write_zio); + + spa_config_exit(vd->vdev_spa, locks, FTAG); + + abd_free(ub_abd); +} + /* * On success, increment root zio's count of good writes. * We only get credit for writes to known-visible vdevs; see spa_vdev_add(). @@ -1163,6 +1217,13 @@ vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd, int flags) if (!vdev_writeable(vd)) return; + /* If the vdev was expanded, need to copy uberblock rings. */ + if (vd->vdev_state == VDEV_STATE_HEALTHY && + vd->vdev_copy_uberblocks == B_TRUE) { + vdev_copy_uberblocks(vd); + vd->vdev_copy_uberblocks = B_FALSE; + } + n = ub->ub_txg & (VDEV_UBERBLOCK_COUNT(vd) - 1); /* Copy the uberblock_t into the ABD */ diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index 406491aa1..8439f8e66 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -276,7 +276,7 @@ tests = ['zpool_detach_001_neg'] # zpool_expand_001_pos - https://github.com/zfsonlinux/zfs/issues/2437 # zpool_expand_003_pos - https://github.com/zfsonlinux/zfs/issues/2437 [tests/functional/cli_root/zpool_expand] -tests = ['zpool_expand_002_pos'] +tests = ['zpool_expand_002_pos', 'zpool_expand_004_pos'] # DISABLED: # zpool_export_004_pos - https://github.com/zfsonlinux/zfs/issues/3484 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/Makefile.am index dc90f9f01..672261718 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/Makefile.am @@ -5,4 +5,5 @@ dist_pkgdata_SCRIPTS = \ cleanup.ksh \ zpool_expand_001_pos.ksh \ zpool_expand_002_pos.ksh \ - zpool_expand_003_neg.ksh + zpool_expand_003_neg.ksh \ + zpool_expand_004_pos.ksh diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_004_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_004_pos.ksh new file mode 100755 index 000000000..69481ba1a --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_004_pos.ksh @@ -0,0 +1,102 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright 2009 Sun Microsystems, Inc. All rights reserved. +# Use is subject to license terms. +# + +# +# Copyright (c) 2012, 2015 by Delphix. All rights reserved. +# Copyright (c) 2017 Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/cli_root/zpool_expand/zpool_expand.cfg + +# +# DESCRIPTION: +# After vdev expansion, all 4 labels have the same set of uberblocks. +# +# +# STRATEGY: +# 1) Create 3 files +# 2) Create a pool backed by the files +# 3) Expand the files' size with truncate +# 4) Use zpool online -e to expand the vdevs +# 5) Check that for all the devices, all 4 labels have the same uberblocks +# + +verify_runnable "global" + +function cleanup +{ + if poolexists $TESTPOOL1; then + log_must zpool destroy $TESTPOOL1 + fi + + for i in 1 2 3; do + [ -e ${TEMPFILE}.$i ] && log_must rm ${TEMPFILE}.$i + done +} + +log_onexit cleanup + +log_assert "After vdev expansion, all 4 labels have the same set of uberblocks." + +for type in " " mirror raidz raidz2; do + for i in 1 2 3; do + log_must truncate -s $org_size ${TEMPFILE}.$i + done + + log_must zpool create $TESTPOOL1 $type $TEMPFILE.1 \ + $TEMPFILE.2 $TEMPFILE.3 + + sync_pool $TESTPOOL1 + + for i in 1 2 3; do + log_must truncate -s $exp_size ${TEMPFILE}.$i + done + + for i in 1 2 3; do + log_must zpool online -e $TESTPOOL1 ${TEMPFILE}.$i + done + + sync_pool $TESTPOOL1 + + + for i in 1 2 3; do + non_uniform=$(zdb -lu ${TEMPFILE}.$i | \ + grep 'labels = ' | \ + grep -c -v 'labels = 0 1 2 3') + + log_note "non-uniform label count: $non_uniform" + + if [[ $non_uniform -ne 0 ]]; then + log_fail "After vdev expansion, all labels contents are not identical" + fi + done + + log_must zpool destroy $TESTPOOL1 +done + +log_pass "After vdev expansion, all 4 labels have the same set of uberblocks." -- 2.40.0