/*
* Manually set the maxblkid on a dnode. This will adjust nlevels accordingly
- * to accommodate the change.
+ * to accommodate the change. When calling this function, the caller must
+ * ensure that the object's nlevels can sufficiently support the new maxblkid.
*/
int dmu_object_set_maxblkid(objset_t *os, uint64_t object, uint64_t maxblkid,
dmu_tx_t *tx);
struct zfetch dn_zfetch;
};
+/*
+ * We use this (otherwise unused) bit to indicate if the value of
+ * dn_next_maxblkid[txgoff] is valid to use in dnode_sync().
+ */
+#define DMU_NEXT_MAXBLKID_SET (1ULL << 63)
+
/*
* Adds a level of indirection between the dbuf and the dnode to avoid
* iterating descendent dbufs in dnode_move(). Handles are not allocated
int dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx);
void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx);
void dnode_diduse_space(dnode_t *dn, int64_t space);
-void dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t);
+void dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx,
+ boolean_t have_read, boolean_t force);
uint64_t dnode_block_freed(dnode_t *dn, uint64_t blkid);
void dnode_init(void);
void dnode_fini(void);
if (db->db_level == 0) {
ASSERT(!db->db_objset->os_raw_receive ||
dn->dn_maxblkid >= db->db_blkid);
- dnode_new_blkid(dn, db->db_blkid, tx, drop_struct_lock);
+ dnode_new_blkid(dn, db->db_blkid, tx,
+ drop_struct_lock, B_FALSE);
ASSERT(dn->dn_maxblkid >= db->db_blkid);
}
if (err)
return (err);
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
- dnode_new_blkid(dn, maxblkid, tx, B_FALSE);
+ dnode_new_blkid(dn, maxblkid, tx, B_FALSE, B_TRUE);
rw_exit(&dn->dn_struct_rwlock);
dnode_rele(dn, FTAG);
return (0);
object = drro->drr_object;
- /* nblkptr will be bounded by the bonus size and type */
+ /* nblkptr should be bounded by the bonus size and type */
if (rwa->raw && nblkptr != drro->drr_nblkptr)
return (SET_ERROR(EINVAL));
+ /*
+ * Check for indicators that the object was freed and
+ * reallocated. For all sends, these indicators are:
+ * - A changed block size
+ * - A smaller nblkptr
+ * - A changed dnode size
+ * For raw sends we also check a few other fields to
+ * ensure we are preserving the objset structure exactly
+ * as it was on the receive side:
+ * - A changed indirect block size
+ * - A smaller nlevels
+ */
if (drro->drr_blksz != doi.doi_data_block_size ||
nblkptr < doi.doi_nblkptr ||
dn_slots != doi.doi_dnodesize >> DNODE_SHIFT ||
/*
* The dmu does not currently support decreasing nlevels
- * on an object. For non-raw sends, this does not matter
- * and the new object can just use the previous one's nlevels.
- * For raw sends, however, the structure of the received dnode
- * (including nlevels) must match that of the send side.
- * Therefore, instead of using dmu_object_reclaim(), we must
- * free the object completely and call dmu_object_claim_dnsize()
- * instead.
+ * or changing the number of dnode slots on an object. For
+ * non-raw sends, this does not matter and the new object
+ * can just use the previous one's nlevels. For raw sends,
+ * however, the structure of the received dnode (including
+ * nlevels and dnode slots) must match that of the send
+ * side. Therefore, instead of using dmu_object_reclaim(),
+ * we must free the object completely and call
+ * dmu_object_claim_dnsize() instead.
*/
if ((rwa->raw && drro->drr_nlevels < doi.doi_indirection) ||
dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) {
txg_wait_synced(dmu_objset_pool(rwa->os), 0);
object = DMU_NEW_OBJECT;
}
+
+ /*
+ * For raw receives, free everything beyond the new incoming
+ * maxblkid. Normally this would be done with a DRR_FREE
+ * record that would come after this DRR_OBJECT record is
+ * processed. However, for raw receives we manually set the
+ * maxblkid from the drr_maxblkid and so we must first free
+ * everything above that blkid to ensure the DMU is always
+ * consistent with itself.
+ */
+ if (rwa->raw) {
+ err = dmu_free_long_range(rwa->os, drro->drr_object,
+ (drro->drr_maxblkid + 1) * drro->drr_blksz,
+ DMU_OBJECT_END);
+ if (err != 0)
+ return (SET_ERROR(EINVAL));
+ }
} else if (err == EEXIST) {
/*
* The object requested is currently an interior slot of a
/* handle more restrictive dnode structuring for raw recvs */
if (rwa->raw) {
/*
- * Set the indirect block shift and nlevels. This will not fail
- * because we ensured all of the blocks were free earlier if
- * this is a new object.
+ * Set the indirect block size, block shift, nlevels.
+ * This will not fail because we ensured all of the
+ * blocks were freed earlier if this is a new object.
+ * For non-new objects block size and indirect block
+ * shift cannot change and nlevels can only increase.
*/
VERIFY0(dmu_object_set_blocksize(rwa->os, drro->drr_object,
drro->drr_blksz, drro->drr_indblkshift, tx));
VERIFY0(dmu_object_set_nlevels(rwa->os, drro->drr_object,
drro->drr_nlevels, tx));
+
+ /*
+ * Set the maxblkid. We will never free the first block of
+ * an object here because a maxblkid of 0 could indicate
+ * an object with a single block or one with no blocks.
+ * This will always succeed because we freed all blocks
+ * beyond the new maxblkid above.
+ */
VERIFY0(dmu_object_set_maxblkid(rwa->os, drro->drr_object,
drro->drr_maxblkid, tx));
}
/* read-holding callers must not rely on the lock being continuously held */
void
-dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read)
+dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read,
+ boolean_t force)
{
int epbs, new_nlevels;
uint64_t sz;
}
}
- if (blkid <= dn->dn_maxblkid)
+ /*
+ * Raw sends (indicated by the force flag) require that we take the
+ * given blkid even if the value is lower than the current value.
+ */
+ if (!force && blkid <= dn->dn_maxblkid)
goto out;
+ /*
+ * We use the (otherwise unused) top bit of dn_next_maxblkid[txgoff]
+ * to indicate that this field is set. This allows us to set the
+ * maxblkid to 0 on an existing object in dnode_sync().
+ */
dn->dn_maxblkid = blkid;
- dn->dn_next_maxblkid[tx->tx_txg & TXG_MASK] = blkid;
+ dn->dn_next_maxblkid[tx->tx_txg & TXG_MASK] =
+ blkid | DMU_NEXT_MAXBLKID_SET;
/*
* Compute the number of levels necessary to support the new maxblkid.
+ * Raw sends will ensure nlevels is set correctly for us.
*/
new_nlevels = 1;
epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
ASSERT3U(new_nlevels, <=, DN_MAX_LEVELS);
- if (new_nlevels > dn->dn_nlevels)
- dnode_set_nlevels_impl(dn, new_nlevels, tx);
+ if (!force) {
+ if (new_nlevels > dn->dn_nlevels)
+ dnode_set_nlevels_impl(dn, new_nlevels, tx);
+ } else {
+ ASSERT3U(dn->dn_nlevels, >=, new_nlevels);
+ }
out:
if (have_read)
}
}
- /*
- * Do not truncate the maxblkid if we are performing a raw
- * receive. The raw receive sets the mablkid manually and
- * must not be overriden.
- */
- if (trunc && !dn->dn_objset->os_raw_receive) {
+ if (trunc) {
ASSERTV(uint64_t off);
dn->dn_phys->dn_maxblkid = blkid == 0 ? 0 : blkid - 1;
/*
* This must be done after dnode_sync_free_range()
- * and dnode_increase_indirection().
+ * and dnode_increase_indirection(). See dnode_new_blkid()
+ * for an explanation of the high bit being set.
*/
if (dn->dn_next_maxblkid[txgoff]) {
mutex_enter(&dn->dn_mtx);
- dnp->dn_maxblkid = dn->dn_next_maxblkid[txgoff];
+ dnp->dn_maxblkid =
+ dn->dn_next_maxblkid[txgoff] & ~DMU_NEXT_MAXBLKID_SET;
dn->dn_next_maxblkid[txgoff] = 0;
mutex_exit(&dn->dn_mtx);
}
mdn->dn_checksum = checksum;
rw_enter(&mdn->dn_struct_rwlock, RW_WRITER);
- dnode_new_blkid(mdn, maxblkid, tx, B_FALSE);
+ dnode_new_blkid(mdn, maxblkid, tx, B_FALSE, B_TRUE);
rw_exit(&mdn->dn_struct_rwlock);
/*
break;
case 'y':
verify = 1;
- if (phase != PHASE_ALL) {
- fprintf(stderr,
- "Error: -y and -o are incompatible.\n");
- rc = 1;
- }
break;
case 'n':
nth = strtol(optarg, NULL, 0);
PHASE_ALL, PHASE_INVAL);
rc = 1;
}
- if (verify == 1) {
- fprintf(stderr,
- "Error: -y and -o are incompatible.\n");
- rc = 1;
- }
break;
default:
rc = 1;
# 3. Add a small 512 byte file to the filesystem
# 4. Add a larger 32M file to the filesystem
# 5. Add a large sparse file to the filesystem
-# 6. Add a file truncated to 4M to the filesystem
-# 7. Add a sparse file with metadata compression disabled to the filesystem
-# 8. Add and remove 1000 empty files to the filesystem
-# 9. Add a file with a large xattr value
-# 10. Snapshot the filesystem
-# 11. Send and receive the filesystem, ensuring that it can be mounted
+# 6. Add a 3 files that are to be truncated later
+# 7. Add 1000 empty files to the filesystem
+# 8. Add a file with a large xattr value
+# 9. Use xattrtest to create files with random xattrs (with and without xattrs=on)
+# 10. Take a snapshot of the filesystem
+# 11. Truncate one of the files from 32M to 128k
+# 12. Truncate one of the files from 512k to 384k
+# 13. Truncate one of the files from 512k to 0 to 384k
+# 14. Remove the 1000 empty files to the filesystem
+# 15. Take another snapshot of the filesystem
+# 16. Send and receive both snapshots
+# 17. Mount the filesystem and check the contents
#
verify_runnable "both"
}
log_onexit cleanup
+function recursive_cksum
+{
+ find $1 -type f -exec sha256sum {} \; | \
+ sort -k 2 | awk '{ print $1 }' | sha256sum
+}
+
log_assert "Verify 'zfs send -w' works with many different file layouts"
typeset keyfile=/$TESTPOOL/pkey
typeset sendfile=/$TESTPOOL/sendfile
+typeset sendfile2=/$TESTPOOL/sendfile2
+# Create an encrypted dataset
log_must eval "echo 'password' > $keyfile"
log_must zfs create -o encryption=on -o keyformat=passphrase \
-o keylocation=file://$keyfile $TESTPOOL/$TESTFS2
+# Create files with vaired layouts on disk
log_must touch /$TESTPOOL/$TESTFS2/empty
log_must mkfile 512 /$TESTPOOL/$TESTFS2/small
log_must mkfile 32M /$TESTPOOL/$TESTFS2/full
log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS2/sparse \
- bs=512 count=1 seek=10G >/dev/null 2>&1
+ bs=512 count=1 seek=1048576 >/dev/null 2>&1
log_must mkfile 32M /$TESTPOOL/$TESTFS2/truncated
-log_must truncate -s 4M /$TESTPOOL/$TESTFS2/truncated
-sync
+log_must mkfile 524288 /$TESTPOOL/$TESTFS2/truncated2
+log_must mkfile 524288 /$TESTPOOL/$TESTFS2/truncated3
log_must mkdir -p /$TESTPOOL/$TESTFS2/dir
for i in {1..1000}; do
log_must mkfile 512 /$TESTPOOL/$TESTFS2/dir/file-$i
done
-sync
+log_must mkdir -p /$TESTPOOL/$TESTFS2/xattrondir
+log_must zfs set xattr=on $TESTPOOL/$TESTFS2
+log_must xattrtest -f 10 -x 3 -s 32768 -r -k -p /$TESTPOOL/$TESTFS2/xattrondir
+log_must mkdir -p /$TESTPOOL/$TESTFS2/xattrsadir
+log_must zfs set xattr=sa $TESTPOOL/$TESTFS2
+log_must xattrtest -f 10 -x 3 -s 32768 -r -k -p /$TESTPOOL/$TESTFS2/xattrsadir
+
+# ZoL issue #7432
+log_must zfs set compression=on xattr=sa $TESTPOOL/$TESTFS2
+log_must touch /$TESTPOOL/$TESTFS2/attrs
+log_must eval "python -c 'print \"a\" * 4096' | \
+ attr -s bigval /$TESTPOOL/$TESTFS2/attrs"
+
+log_must zfs snapshot $TESTPOOL/$TESTFS2@snap1
+
+#
+# Truncate files created in the first snapshot. The first tests
+# truncating a large file to a single block. The second tests
+# truncating one block off the end of a file without changing
+# the required nlevels to hold it. The last tests handling
+# of a maxblkid that is dropped and then raised again.
+#
+log_must truncate -s 131072 /$TESTPOOL/$TESTFS2/truncated
+log_must truncate -s 393216 /$TESTPOOL/$TESTFS2/truncated2
+log_must truncate -s 0 /$TESTPOOL/$TESTFS2/truncated3
+log_must zpool sync $TESTPOOL
+log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS2/truncated3 \
+ bs=128k count=3 iflag=fullblock
+
+# Remove the empty files created in the first snapshot
for i in {1..1000}; do
log_must rm /$TESTPOOL/$TESTFS2/dir/file-$i
done
sync
-# ZoL issue #7432
-log_must zfs set compression=on xattr=sa $TESTPOOL/$TESTFS2
-log_must touch /$TESTPOOL/$TESTFS2/attrs
-log_must eval "python -c 'print \"a\" * 4096' | attr -s bigval /$TESTPOOL/$TESTFS2/attrs"
+log_must zfs snapshot $TESTPOOL/$TESTFS2@snap2
+expected_cksum=$(recursive_cksum /$TESTPOOL/$TESTFS2)
-log_must zfs snapshot $TESTPOOL/$TESTFS2@now
-log_must eval "zfs send -wR $TESTPOOL/$TESTFS2@now > $sendfile"
+log_must eval "zfs send -wp $TESTPOOL/$TESTFS2@snap1 > $sendfile"
+log_must eval "zfs send -wp -i @snap1 $TESTPOOL/$TESTFS2@snap2 > $sendfile2"
log_must eval "zfs recv -F $TESTPOOL/recv < $sendfile"
+log_must eval "zfs recv -F $TESTPOOL/recv < $sendfile2"
log_must zfs load-key $TESTPOOL/recv
log_must zfs mount -a
+actual_cksum=$(recursive_cksum /$TESTPOOL/recv)
+[[ "$expected_cksum" != "$actual_cksum" ]] && \
+ log_fail "Recursive checksums differ ($expected_cksum != $actual_cksum)"
+
+log_must xattrtest -f 10 -o3 -y -p /$TESTPOOL/recv/xattrondir
+log_must xattrtest -f 10 -o3 -y -p /$TESTPOOL/recv/xattrsadir
log_pass "Verified 'zfs send -w' works with many different file layouts"