]> granicus.if.org Git - zfs/commitdiff
Fix issues with raw receive_write_byref()
authorTom Caputi <tcaputi@datto.com>
Mon, 20 Aug 2018 18:03:56 +0000 (14:03 -0400)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Mon, 20 Aug 2018 18:03:56 +0000 (11:03 -0700)
This patch fixes 2 issues with raw, deduplicated send streams. The
first is that datasets who had been completely received earlier in
the stream were not still marked as raw receives. This caused
problems when newly received datasets attempted to fetch raw data
from these datasets without this flag set.

The second problem was that the arc freeze checksum code was not
consistent about which locks needed to be held while performing
its asserts. The proper locking needed to run these asserts is
actually fairly nuanced, since the asserts touch the linked list
of buffers (requiring the header lock), the arc_state (requiring
the b_evict_lock), and the b_freeze_cksum (requiring the
b_freeze_lock). This seems like a large performance sacrifice and
a lot of unneeded complexity to verify that this relatively small
debug feature is working as intended, so this patch simply removes
these asserts instead.

Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #7701

module/zfs/arc.c
module/zfs/dbuf.c
module/zfs/dmu.c
module/zfs/dmu_send.c
tests/runfiles/linux.run
tests/zfs-tests/tests/functional/rsend/Makefile.am
tests/zfs-tests/tests/functional/rsend/send-wDR_encrypted_zvol.ksh [new file with mode: 0755]

index 7f2929c17e1a026e42849c1b114c97163a95fc75..f9436d7c471ba301a9757b9683fc2321a4768c18 100644 (file)
@@ -1561,6 +1561,9 @@ arc_cksum_free(arc_buf_hdr_t *hdr)
 static boolean_t
 arc_hdr_has_uncompressed_buf(arc_buf_hdr_t *hdr)
 {
+       ASSERT(hdr->b_l1hdr.b_state == arc_anon ||
+           MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr));
+
        for (arc_buf_t *b = hdr->b_l1hdr.b_buf; b != NULL; b = b->b_next) {
                if (!ARC_BUF_COMPRESSED(b)) {
                        return (B_TRUE);
@@ -1584,15 +1587,13 @@ arc_cksum_verify(arc_buf_t *buf)
        if (!(zfs_flags & ZFS_DEBUG_MODIFY))
                return;
 
-       if (ARC_BUF_COMPRESSED(buf)) {
-               ASSERT(hdr->b_l1hdr.b_freeze_cksum == NULL ||
-                   arc_hdr_has_uncompressed_buf(hdr));
+       if (ARC_BUF_COMPRESSED(buf))
                return;
-       }
 
        ASSERT(HDR_HAS_L1HDR(hdr));
 
        mutex_enter(&hdr->b_l1hdr.b_freeze_lock);
+
        if (hdr->b_l1hdr.b_freeze_cksum == NULL || HDR_IO_ERROR(hdr)) {
                mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
                return;
@@ -1650,11 +1651,7 @@ arc_cksum_compute(arc_buf_t *buf)
        ASSERT(HDR_HAS_L1HDR(hdr));
 
        mutex_enter(&buf->b_hdr->b_l1hdr.b_freeze_lock);
-       if (hdr->b_l1hdr.b_freeze_cksum != NULL) {
-               ASSERT(arc_hdr_has_uncompressed_buf(hdr));
-               mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
-               return;
-       } else if (ARC_BUF_COMPRESSED(buf)) {
+       if (hdr->b_l1hdr.b_freeze_cksum != NULL || ARC_BUF_COMPRESSED(buf)) {
                mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
                return;
        }
@@ -1746,14 +1743,10 @@ arc_buf_thaw(arc_buf_t *buf)
        arc_cksum_verify(buf);
 
        /*
-        * Compressed buffers do not manipulate the b_freeze_cksum or
-        * allocate b_thawed.
+        * Compressed buffers do not manipulate the b_freeze_cksum.
         */
-       if (ARC_BUF_COMPRESSED(buf)) {
-               ASSERT(hdr->b_l1hdr.b_freeze_cksum == NULL ||
-                   arc_hdr_has_uncompressed_buf(hdr));
+       if (ARC_BUF_COMPRESSED(buf))
                return;
-       }
 
        ASSERT(HDR_HAS_L1HDR(hdr));
        arc_cksum_free(hdr);
@@ -1763,26 +1756,14 @@ arc_buf_thaw(arc_buf_t *buf)
 void
 arc_buf_freeze(arc_buf_t *buf)
 {
-       arc_buf_hdr_t *hdr = buf->b_hdr;
-       kmutex_t *hash_lock;
-
        if (!(zfs_flags & ZFS_DEBUG_MODIFY))
                return;
 
-       if (ARC_BUF_COMPRESSED(buf)) {
-               ASSERT(hdr->b_l1hdr.b_freeze_cksum == NULL ||
-                   arc_hdr_has_uncompressed_buf(hdr));
+       if (ARC_BUF_COMPRESSED(buf))
                return;
-       }
-
-       hash_lock = HDR_LOCK(hdr);
-       mutex_enter(hash_lock);
 
-       ASSERT(HDR_HAS_L1HDR(hdr));
-       ASSERT(hdr->b_l1hdr.b_freeze_cksum != NULL ||
-           hdr->b_l1hdr.b_state == arc_anon);
+       ASSERT(HDR_HAS_L1HDR(buf->b_hdr));
        arc_cksum_compute(buf);
-       mutex_exit(hash_lock);
 }
 
 /*
@@ -1901,7 +1882,7 @@ arc_hdr_authenticate(arc_buf_hdr_t *hdr, spa_t *spa, uint64_t dsobj)
        void *tmpbuf = NULL;
        abd_t *abd = hdr->b_l1hdr.b_pabd;
 
-       ASSERT(HDR_LOCK(hdr) == NULL || MUTEX_HELD(HDR_LOCK(hdr)));
+       ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr));
        ASSERT(HDR_AUTHENTICATED(hdr));
        ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
 
@@ -1971,7 +1952,7 @@ arc_hdr_decrypt(arc_buf_hdr_t *hdr, spa_t *spa, const zbookmark_phys_t *zb)
        boolean_t no_crypt = B_FALSE;
        boolean_t bswap = (hdr->b_l1hdr.b_byteswap != DMU_BSWAP_NUMFUNCS);
 
-       ASSERT(HDR_LOCK(hdr) == NULL || MUTEX_HELD(HDR_LOCK(hdr)));
+       ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr));
        ASSERT(HDR_ENCRYPTED(hdr));
 
        arc_hdr_alloc_abd(hdr, B_FALSE);
@@ -2090,7 +2071,7 @@ arc_buf_untransform_in_place(arc_buf_t *buf, kmutex_t *hash_lock)
 
        ASSERT(HDR_ENCRYPTED(hdr));
        ASSERT3U(hdr->b_crypt_hdr.b_ot, ==, DMU_OT_DNODE);
-       ASSERT(HDR_LOCK(hdr) == NULL || MUTEX_HELD(HDR_LOCK(hdr)));
+       ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr));
        ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
 
        zio_crypt_copy_dnode_bonus(hdr->b_l1hdr.b_pabd, buf->b_data,
index f0bee6c2bb59ad8d950dbd0097f41d01f7935a30..1af0d8010807a307e0cdc1001be8b03025127f24 100644 (file)
@@ -1267,7 +1267,6 @@ dbuf_read_verify_dnode_crypt(dmu_buf_impl_t *db, uint32_t flags)
            !DMU_OT_IS_ENCRYPTED(dn->dn_bonustype))))
                err = 0;
 
-
        DB_DNODE_EXIT(db);
 
        return (err);
index 9d00098c8b0dc07ce38b9c6ce24941f7febd3985..88b574d4aa04fd5f48245a82fd57851a0fa27eb9 100644 (file)
@@ -1622,6 +1622,7 @@ dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
        dmu_buf_t *dst_handle;
        dmu_buf_impl_t *dstdb;
        dmu_buf_impl_t *srcdb = (dmu_buf_impl_t *)handle;
+       dmu_object_type_t type;
        arc_buf_t *abuf;
        uint64_t datalen;
        boolean_t byteorder;
@@ -1637,11 +1638,15 @@ dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
        dstdb = (dmu_buf_impl_t *)dst_handle;
        datalen = arc_buf_size(srcdb->db_buf);
 
+       DB_DNODE_ENTER(dstdb);
+       type = DB_DNODE(dstdb)->dn_type;
+       DB_DNODE_EXIT(dstdb);
+
        /* allocated an arc buffer that matches the type of srcdb->db_buf */
        if (arc_is_encrypted(srcdb->db_buf)) {
                arc_get_raw_params(srcdb->db_buf, &byteorder, salt, iv, mac);
                abuf = arc_loan_raw_buf(os->os_spa, dmu_objset_id(os),
-                   byteorder, salt, iv, mac, DB_DNODE(dstdb)->dn_type,
+                   byteorder, salt, iv, mac, type,
                    datalen, arc_buf_lsize(srcdb->db_buf),
                    arc_get_compression(srcdb->db_buf));
        } else {
@@ -1649,7 +1654,7 @@ dmu_copy_from_buf(objset_t *os, uint64_t object, uint64_t offset,
                ASSERT3U(arc_get_compression(srcdb->db_buf),
                    ==, ZIO_COMPRESS_OFF);
                abuf = arc_loan_buf(os->os_spa,
-                   DMU_OT_IS_METADATA(DB_DNODE(dstdb)->dn_type), datalen);
+                   DMU_OT_IS_METADATA(type), datalen);
        }
 
        ASSERT3U(datalen, ==, arc_buf_size(abuf));
index 61d4ebd4ff7b1c26d36bc53eab23638286a786a0..1f4d3a1048b81ce7f5ead95a9e0746345f2cd344 100644 (file)
@@ -2343,9 +2343,14 @@ free_guid_map_onexit(void *arg)
        guid_map_entry_t *gmep;
 
        while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) {
-               dsl_dataset_long_rele(gmep->gme_ds, gmep);
-               dsl_dataset_rele_flags(gmep->gme_ds,
-                   (gmep->raw) ? 0 : DS_HOLD_FLAG_DECRYPT, gmep);
+               ds_hold_flags_t dsflags = DS_HOLD_FLAG_DECRYPT;
+
+               if (gmep->raw) {
+                       gmep->gme_ds->ds_objset->os_raw_receive = B_FALSE;
+                       dsflags &= ~DS_HOLD_FLAG_DECRYPT;
+               }
+
+               dsl_dataset_disown(gmep->gme_ds, dsflags, gmep);
                kmem_free(gmep, sizeof (guid_map_entry_t));
        }
        avl_destroy(ca);
@@ -4237,6 +4242,7 @@ add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj,
        dsl_pool_t *dp;
        dsl_dataset_t *snapds;
        guid_map_entry_t *gmep;
+       objset_t *os;
        ds_hold_flags_t dsflags = (raw) ? 0 : DS_HOLD_FLAG_DECRYPT;
        int err;
 
@@ -4246,13 +4252,29 @@ add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj,
        if (err != 0)
                return (err);
        gmep = kmem_alloc(sizeof (*gmep), KM_SLEEP);
-       err = dsl_dataset_hold_obj_flags(dp, snapobj, dsflags, gmep, &snapds);
+       err = dsl_dataset_own_obj(dp, snapobj, dsflags, gmep, &snapds);
        if (err == 0) {
-               gmep->guid = dsl_dataset_phys(snapds)->ds_guid;
+               /*
+                * If this is a deduplicated raw send stream, we need
+                * to make sure that we can still read raw blocks from
+                * earlier datasets in the stream, so we set the
+                * os_raw_receive flag now.
+                */
+               if (raw) {
+                       err = dmu_objset_from_ds(snapds, &os);
+                       if (err != 0) {
+                               dsl_dataset_disown(snapds, dsflags, FTAG);
+                               dsl_pool_rele(dp, FTAG);
+                               kmem_free(gmep, sizeof (*gmep));
+                               return (err);
+                       }
+                       os->os_raw_receive = B_TRUE;
+               }
+
                gmep->raw = raw;
+               gmep->guid = dsl_dataset_phys(snapds)->ds_guid;
                gmep->gme_ds = snapds;
                avl_add(guid_map, gmep);
-               dsl_dataset_long_hold(snapds, gmep);
        } else {
                kmem_free(gmep, sizeof (*gmep));
        }
index 445463cf8d19ceda4c1520fe9d5bb8dd5b1f1002..844a02caa86f2cd2c4ec09cca8cef11296502434 100644 (file)
@@ -745,7 +745,7 @@ tests = ['rsend_001_pos', 'rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos',
     'send-c_embedded_blocks', 'send-c_resume', 'send-cpL_varied_recsize',
     'send-c_recv_dedup', 'send_encrypted_files', 'send_encrypted_heirarchy',
     'send_encrypted_props', 'send_freeobjects', 'send_realloc_dnode_size',
-    'send_hole_birth']
+    'send_hole_birth', 'send-wDR_encrypted_zvol']
 tags = ['functional', 'rsend']
 
 [tests/functional/scrub_mirror]
index d65a9144e5891bb492e2812748256691b8d46aef..316bcb4e6774c0d207dea413a1a88fa8078e15ec 100644 (file)
@@ -41,7 +41,8 @@ dist_pkgdata_SCRIPTS = \
        send-cpL_varied_recsize.ksh \
        send_freeobjects.ksh \
        send_realloc_dnode_size.ksh \
-       send_hole_birth.ksh
+       send_hole_birth.ksh \
+       send-wDR_encrypted_zvol.ksh
 
 dist_pkgdata_DATA = \
        rsend.cfg \
diff --git a/tests/zfs-tests/tests/functional/rsend/send-wDR_encrypted_zvol.ksh b/tests/zfs-tests/tests/functional/rsend/send-wDR_encrypted_zvol.ksh
new file mode 100755 (executable)
index 0000000..49b846e
--- /dev/null
@@ -0,0 +1,93 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2018 by Datto Inc. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/rsend/rsend.kshlib
+
+#
+# DESCRIPTION:
+# Verify that zvols with dedup=on and encryption=on can be sent and received
+# with a deduplicated raw send stream.
+#
+# STRATEGY:
+# 1. Create a zvol with dedup and encryption on and put a filesystem on it
+# 2. Copy a file into the zvol a few times and take a snapshot
+# 3. Repeat step 2 a few times to create more snapshots
+# 4. Send all snapshots in a recursive, raw, deduplicated send stream
+# 5. Mount the received zvol and verify that all of the data there is correct
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+       ismounted $recvmnt ext4 && log_must umount $recvmnt
+       ismounted $mntpnt ext4 && log_must umount $mntpnt
+       [[ -d $recvmnt ]] && log_must rm -rf $keyfile
+       [[ -d $mntpnt ]] && log_must rm -rf $keyfile
+       destroy_dataset $TESTPOOL/recv "-r"
+       destroy_dataset $TESTPOOL/$TESTVOL "-r"
+       [[ -f $keyfile ]] && log_must rm $keyfile
+       [[ -f $sendfile ]] && log_must rm $sendfile
+}
+log_onexit cleanup
+
+log_assert "Verify zfs can receive raw, recursive, and deduplicated send streams"
+
+typeset keyfile=/$TESTPOOL/pkey
+typeset snap_count=5
+typeset zdev=$ZVOL_DEVDIR/$TESTPOOL/$TESTVOL
+typeset mntpnt=$TESTDIR/$TESTVOL
+typeset recvdev=$ZVOL_DEVDIR/$TESTPOOL/recv
+typeset recvmnt=$TESTDIR/recvmnt
+typeset sendfile=$TESTDIR/sendfile
+
+log_must eval "echo 'password' > $keyfile"
+
+log_must zfs create -o dedup=on -o encryption=on -o keyformat=passphrase \
+       -o keylocation=file://$keyfile -V 128M $TESTPOOL/$TESTVOL
+log_must block_device_wait
+
+log_must eval "echo 'y' | newfs -t ext4 -v $zdev"
+log_must mkdir -p $mntpnt
+log_must mkdir -p $recvmnt
+log_must mount $zdev $mntpnt
+
+for ((i = 1; i <= $snap_count; i++)); do
+       log_must dd if=/dev/urandom of=$mntpnt/file bs=1M count=1
+       for ((j = 0; j < 10; j++)); do
+               log_must cp $mntpnt/file $mntpnt/file$j
+       done
+
+       log_must sync
+       log_must zfs snap $TESTPOOL/$TESTVOL@snap$i
+done
+
+log_must eval "zfs send -wDR $TESTPOOL/$TESTVOL@snap$snap_count > $sendfile"
+log_must eval "zfs recv $TESTPOOL/recv < $sendfile"
+log_must zfs load-key $TESTPOOL/recv
+log_must block_device_wait
+
+log_must mount $recvdev $recvmnt
+
+md5_1=$(cat $mntpnt/* | md5sum | awk '{print $1}')
+md5_2=$(cat $recvmnt/* | md5sum | awk '{print $1}')
+[[ "$md5_1" == "$md5_2" ]] || log_fail "md5 mismatch: $md5_1 != $md5_2"
+
+log_pass "zfs can receive raw, recursive, and deduplicated send streams"