]> granicus.if.org Git - zfs/commitdiff
Raw receive fix and encrypted objset security fix
authorTom Caputi <tcaputi@datto.com>
Thu, 28 Jun 2018 16:20:34 +0000 (12:20 -0400)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Thu, 28 Jun 2018 16:20:34 +0000 (09:20 -0700)
This patch fixes two problems with the encryption code. First, the
current code does not correctly prohibit the DMU from updating
dn_maxblkid during object truncation within a raw receive. This
usually only causes issues when the truncating DRR_FREE record is
aggregated with DRR_FREE records later in the receive, so it is
relatively hard to hit.

Second, this patch fixes a security issue where reading blocks
within an encrypted object did not guarantee that the dnode block
itself had ever been verified against its MAC. Usually the
verification happened anyway when the bonus buffer was read, but
some use cases (notably zvols) might never perform the check.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Signed-off-by: Tom Caputi <tcaputi@datto.com>
Closes #7632

module/zfs/arc.c
module/zfs/dbuf.c
module/zfs/dnode_sync.c

index bf514f00f07411c88bb4c7876ebbc15fa1d0a5b1..ab256484119fa1567f85cf576faddbe7ee059ccf 100644 (file)
@@ -2137,13 +2137,17 @@ arc_buf_fill(arc_buf_t *buf, spa_t *spa, const zbookmark_phys_t *zb,
        }
 
        /*
-        * Adjust encrypted and authenticated headers to accomodate the
-        * request if needed.
+        * Adjust encrypted and authenticated headers to accomodate
+        * the request if needed. Dnode blocks (ARC_FILL_IN_PLACE) are
+        * allowed to fail decryption due to keys not being loaded
+        * without being marked as an IO error.
         */
        if (HDR_PROTECTED(hdr)) {
                error = arc_fill_hdr_crypt(hdr, hash_lock, spa,
                    zb, !!(flags & ARC_FILL_NOAUTH));
-               if (error != 0) {
+               if (error == EACCES && (flags & ARC_FILL_IN_PLACE) != 0) {
+                       return (error);
+               } else if (error != 0) {
                        if (hash_lock != NULL)
                                mutex_enter(hash_lock);
                        arc_hdr_set_flags(hdr, ARC_FLAG_IO_ERROR);
index e3738db525e8e0db610758798ad5ef9e1009d3ac..0a8e6d08b3076b99aa9827b544fee87ff5e7daa1 100644 (file)
@@ -1119,6 +1119,64 @@ dbuf_read_done(zio_t *zio, const zbookmark_phys_t *zb, const blkptr_t *bp,
        dbuf_rele_and_unlock(db, NULL, B_FALSE);
 }
 
+
+/*
+ * This function ensures that, when doing a decrypting read of a block,
+ * we make sure we have decrypted the dnode associated with it. We must do
+ * this so that we ensure we are fully authenticating the checksum-of-MACs
+ * tree from the root of the objset down to this block. Indirect blocks are
+ * always verified against their secure checksum-of-MACs assuming that the
+ * dnode containing them is correct. Now that we are doing a decrypting read,
+ * we can be sure that the key is loaded and verify that assumption. This is
+ * especially important considering that we always read encrypted dnode
+ * blocks as raw data (without verifying their MACs) to start, and
+ * decrypt / authenticate them when we need to read an encrypted bonus buffer.
+ */
+static int
+dbuf_read_verify_dnode_crypt(dmu_buf_impl_t *db, uint32_t flags)
+{
+       int err = 0;
+       objset_t *os = db->db_objset;
+       arc_buf_t *dnode_abuf;
+       dnode_t *dn;
+       zbookmark_phys_t zb;
+
+       ASSERT(MUTEX_HELD(&db->db_mtx));
+
+       if (!os->os_encrypted || os->os_raw_receive ||
+           (flags & DB_RF_NO_DECRYPT) != 0)
+               return (0);
+
+       DB_DNODE_ENTER(db);
+       dn = DB_DNODE(db);
+       dnode_abuf = (dn->dn_dbuf != NULL) ? dn->dn_dbuf->db_buf : NULL;
+
+       if (dnode_abuf == NULL || !arc_is_encrypted(dnode_abuf)) {
+               DB_DNODE_EXIT(db);
+               return (0);
+       }
+
+       SET_BOOKMARK(&zb, dmu_objset_id(os),
+           DMU_META_DNODE_OBJECT, 0, dn->dn_dbuf->db_blkid);
+       err = arc_untransform(dnode_abuf, os->os_spa, &zb, B_TRUE);
+
+       /*
+        * An error code of EACCES tells us that the key is still not
+        * available. This is ok if we are only reading authenticated
+        * (and therefore non-encrypted) blocks.
+        */
+       if (err == EACCES && ((db->db_blkid != DMU_BONUS_BLKID &&
+           !DMU_OT_IS_ENCRYPTED(dn->dn_type)) ||
+           (db->db_blkid == DMU_BONUS_BLKID &&
+           !DMU_OT_IS_ENCRYPTED(dn->dn_bonustype))))
+               err = 0;
+
+
+       DB_DNODE_EXIT(db);
+
+       return (err);
+}
+
 static int
 dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
 {
@@ -1143,23 +1201,13 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
                 */
                int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen);
                int max_bonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots);
-               arc_buf_t *dn_buf = (dn->dn_dbuf != NULL) ?
-                   dn->dn_dbuf->db_buf : NULL;
 
                /* if the underlying dnode block is encrypted, decrypt it */
-               if (dn_buf != NULL && dn->dn_objset->os_encrypted &&
-                   DMU_OT_IS_ENCRYPTED(dn->dn_bonustype) &&
-                   (flags & DB_RF_NO_DECRYPT) == 0 &&
-                   arc_is_encrypted(dn_buf)) {
-                       SET_BOOKMARK(&zb, dmu_objset_id(db->db_objset),
-                           DMU_META_DNODE_OBJECT, 0, dn->dn_dbuf->db_blkid);
-                       err = arc_untransform(dn_buf, dn->dn_objset->os_spa,
-                           &zb, B_TRUE);
-                       if (err != 0) {
-                               DB_DNODE_EXIT(db);
-                               mutex_exit(&db->db_mtx);
-                               return (err);
-                       }
+               err = dbuf_read_verify_dnode_crypt(db, flags);
+               if (err != 0) {
+                       DB_DNODE_EXIT(db);
+                       mutex_exit(&db->db_mtx);
+                       return (err);
                }
 
                ASSERT3U(bonuslen, <=, db->db.db_size);
@@ -1215,17 +1263,6 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
                return (0);
        }
 
-       DB_DNODE_EXIT(db);
-
-       db->db_state = DB_READ;
-       mutex_exit(&db->db_mtx);
-
-       if (DBUF_IS_L2CACHEABLE(db))
-               aflags |= ARC_FLAG_L2CACHE;
-
-       SET_BOOKMARK(&zb, dmu_objset_id(db->db_objset),
-           db->db.db_object, db->db_level, db->db_blkid);
-
        /*
         * All bps of an encrypted os should have the encryption bit set.
         * If this is not true it indicates tampering and we report an error.
@@ -1234,11 +1271,31 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
                spa_log_error(db->db_objset->os_spa, &zb);
                zfs_panic_recover("unencrypted block in encrypted "
                    "object set %llu", dmu_objset_id(db->db_objset));
+               DB_DNODE_EXIT(db);
+               mutex_exit(&db->db_mtx);
                return (SET_ERROR(EIO));
        }
 
+       err = dbuf_read_verify_dnode_crypt(db, flags);
+       if (err != 0) {
+               DB_DNODE_EXIT(db);
+               mutex_exit(&db->db_mtx);
+               return (err);
+       }
+
+       DB_DNODE_EXIT(db);
+
+       db->db_state = DB_READ;
+       mutex_exit(&db->db_mtx);
+
+       if (DBUF_IS_L2CACHEABLE(db))
+               aflags |= ARC_FLAG_L2CACHE;
+
        dbuf_add_ref(db, NULL);
 
+       SET_BOOKMARK(&zb, dmu_objset_id(db->db_objset),
+           db->db.db_object, db->db_level, db->db_blkid);
+
        zio_flags = (flags & DB_RF_CANFAIL) ?
            ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED;
 
@@ -1358,14 +1415,22 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
                spa_t *spa = dn->dn_objset->os_spa;
 
                /*
-                * If the arc buf is compressed or encrypted, we need to
-                * untransform it to read the data. This could happen during
-                * the "zfs receive" of a stream which is deduplicated and
-                * either raw or compressed. We do not need to do this if the
-                * caller wants raw encrypted data.
+                * Ensure that this block's dnode has been decrypted if
+                * the caller has requested decrypted data.
                 */
-               if (db->db_buf != NULL && (flags & DB_RF_NO_DECRYPT) == 0 &&
+               err = dbuf_read_verify_dnode_crypt(db, flags);
+
+               /*
+                * If the arc buf is compressed or encrypted and the caller
+                * requested uncompressed data, we need to untransform it
+                * before returning. We also call arc_untransform() on any
+                * unauthenticated blocks, which will verify their MAC if
+                * the key is now available.
+                */
+               if (err == 0 && db->db_buf != NULL &&
+                   (flags & DB_RF_NO_DECRYPT) == 0 &&
                    (arc_is_encrypted(db->db_buf) ||
+                   arc_is_unauthenticated(db->db_buf) ||
                    arc_get_compression(db->db_buf) != ZIO_COMPRESS_OFF)) {
                        zbookmark_phys_t zb;
 
@@ -1376,7 +1441,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
                        dbuf_set_data(db, db->db_buf);
                }
                mutex_exit(&db->db_mtx);
-               if (prefetch)
+               if (err == 0 && prefetch)
                        dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE);
                if ((flags & DB_RF_HAVESTRUCT) == 0)
                        rw_exit(&dn->dn_struct_rwlock);
@@ -1943,6 +2008,8 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
        ddt_prefetch(os->os_spa, db->db_blkptr);
 
        if (db->db_level == 0) {
+               ASSERT(!db->db_objset->os_raw_receive ||
+                   dn->dn_maxblkid >= db->db_blkid);
                dnode_new_blkid(dn, db->db_blkid, tx, drop_struct_lock);
                ASSERT(dn->dn_maxblkid >= db->db_blkid);
        }
@@ -3806,8 +3873,10 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
        if (db->db_level == 0) {
                mutex_enter(&dn->dn_mtx);
                if (db->db_blkid > dn->dn_phys->dn_maxblkid &&
-                   db->db_blkid != DMU_SPILL_BLKID)
+                   db->db_blkid != DMU_SPILL_BLKID) {
+                       ASSERT0(db->db_objset->os_raw_receive);
                        dn->dn_phys->dn_maxblkid = db->db_blkid;
+               }
                mutex_exit(&dn->dn_mtx);
 
                if (dn->dn_type == DMU_OT_DNODE) {
index 044031e4f7b11740ca9002d7666abece5cd9fa32..830da26f89b2d22c3ce396ac5389dd3eb1ebcc6c 100644 (file)
@@ -367,7 +367,12 @@ dnode_sync_free_range_impl(dnode_t *dn, uint64_t blkid, uint64_t nblks,
                }
        }
 
-       if (trunc) {
+       /*
+        * Do not truncate the maxblkid if we are performing a raw
+        * receive. The raw receive sets the mablkid manually and
+        * must not be overriden.
+        */
+       if (trunc && !dn->dn_objset->os_raw_receive) {
                ASSERTV(uint64_t off);
                dn->dn_phys->dn_maxblkid = blkid == 0 ? 0 : blkid - 1;