From 3a549dc7a1f5e5511b4c8699081f704eeb4381b7 Mon Sep 17 00:00:00 2001 From: Matthew Ahrens Date: Thu, 11 Jan 2018 08:54:38 -0800 Subject: [PATCH] OpenZFS 9442 - decrease indirect block size of spacemaps Authored by: Matthew Ahrens Reviewed by: Serapheim Dimitropoulos Reviewed by: George Wilson Reviewed by: Albert Lee Reviewed by: Igor Kozhukhov Reviewed by: George Melikov Approved by: Dan McDonald Ported-by: Brian Behlendorf Updates to indirect blocks of spacemaps can contribute significantly to write inflation. Therefore we want to reduce the indirect block size of spacemaps from 128K to 16K. Porting notes: * Refactored to allow the dmu_object_alloc(), dmu_object_alloc_ibs() and dmu_object_alloc_dnsize() functions to use a common shared dmu_object_alloc_impl() function. OpenZFS-issue: https://www.illumos.org/issues/9442 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/0c2e6408b Closes #7712 --- include/sys/dmu.h | 3 +++ module/zfs/dmu_object.c | 46 +++++++++++++++++++++++++++++------------ module/zfs/space_map.c | 19 ++++++++++++----- 3 files changed, 50 insertions(+), 18 deletions(-) diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 28756e6f7..67f4be1c2 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -389,6 +389,9 @@ typedef struct dmu_buf { */ uint64_t dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx); +uint64_t dmu_object_alloc_ibs(objset_t *os, dmu_object_type_t ot, int blocksize, + int indirect_blockshift, + dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx); uint64_t dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot, int blocksize, dmu_object_type_t bonus_type, int bonus_len, int dnodesize, dmu_tx_t *tx); diff --git a/module/zfs/dmu_object.c b/module/zfs/dmu_object.c index 586a04b16..b9960782e 100644 --- a/module/zfs/dmu_object.c +++ b/module/zfs/dmu_object.c @@ -41,17 +41,10 @@ */ int dmu_object_alloc_chunk_shift = 7; -uint64_t -dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize, - dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) -{ - return dmu_object_alloc_dnsize(os, ot, blocksize, bonustype, bonuslen, - 0, tx); -} - -uint64_t -dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot, int blocksize, - dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx) +static uint64_t +dmu_object_alloc_impl(objset_t *os, dmu_object_type_t ot, int blocksize, + int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen, + int dnodesize, dmu_tx_t *tx) { uint64_t object; uint64_t L1_dnode_count = DNODES_PER_BLOCK << @@ -182,8 +175,9 @@ dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot, int blocksize, * again now that we have the struct lock. */ if (dn->dn_type == DMU_OT_NONE) { - dnode_allocate(dn, ot, blocksize, 0, - bonustype, bonuslen, dn_slots, tx); + dnode_allocate(dn, ot, blocksize, + indirect_blockshift, bonustype, + bonuslen, dn_slots, tx); rw_exit(&dn->dn_struct_rwlock); dmu_tx_add_new_object(tx, dn); dnode_rele(dn, FTAG); @@ -206,6 +200,31 @@ dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot, int blocksize, } } +uint64_t +dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize, + dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) +{ + return dmu_object_alloc_impl(os, ot, blocksize, 0, bonustype, + bonuslen, 0, tx); +} + +uint64_t +dmu_object_alloc_ibs(objset_t *os, dmu_object_type_t ot, int blocksize, + int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen, + dmu_tx_t *tx) +{ + return dmu_object_alloc_impl(os, ot, blocksize, indirect_blockshift, + bonustype, bonuslen, 0, tx); +} + +uint64_t +dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot, int blocksize, + dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx) +{ + return (dmu_object_alloc_impl(os, ot, blocksize, 0, bonustype, + bonuslen, dnodesize, tx)); +} + int dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot, int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx) @@ -423,6 +442,7 @@ dmu_object_free_zapified(objset_t *mos, uint64_t object, dmu_tx_t *tx) #if defined(_KERNEL) EXPORT_SYMBOL(dmu_object_alloc); +EXPORT_SYMBOL(dmu_object_alloc_ibs); EXPORT_SYMBOL(dmu_object_alloc_dnsize); EXPORT_SYMBOL(dmu_object_claim); EXPORT_SYMBOL(dmu_object_claim_dnsize); diff --git a/module/zfs/space_map.c b/module/zfs/space_map.c index 5f67a7987..9ba6ff6ff 100644 --- a/module/zfs/space_map.c +++ b/module/zfs/space_map.c @@ -52,6 +52,14 @@ */ boolean_t zfs_force_some_double_word_sm_entries = B_FALSE; +/* + * Override the default indirect block size of 128K, instead use 16K for + * spacemaps (2^14 bytes). This dramatically reduces write inflation since + * appending to a spacemap typically has to write one data block (4KB) and one + * or two indirect blocks (16K-32K, rather than 128K). + */ +int space_map_ibs = 14; + boolean_t sm_entry_is_debug(uint64_t e) { @@ -674,8 +682,8 @@ space_map_write_impl(space_map_t *sm, range_tree_t *rt, maptype_t maptype, * * [1] The feature is enabled. * [2] The offset or run is too big for a single-word entry, - * or the vdev_id is set (meaning not equal to - * SM_NO_VDEVID). + * or the vdev_id is set (meaning not equal to + * SM_NO_VDEVID). * * Note that for purposes of testing we've added the case that * we write two-word entries occasionally when the feature is @@ -837,7 +845,8 @@ space_map_truncate(space_map_t *sm, int blocksize, dmu_tx_t *tx) */ if ((spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM) && doi.doi_bonus_size != sizeof (space_map_phys_t)) || - doi.doi_data_block_size != blocksize) { + doi.doi_data_block_size != blocksize || + doi.doi_metadata_block_size != 1 << space_map_ibs) { zfs_dbgmsg("txg %llu, spa %s, sm %p, reallocating " "object[%llu]: old bonus %u, old blocksz %u", dmu_tx_get_txg(tx), spa_name(spa), sm, sm->sm_object, @@ -893,8 +902,8 @@ space_map_alloc(objset_t *os, int blocksize, dmu_tx_t *tx) bonuslen = SPACE_MAP_SIZE_V0; } - object = dmu_object_alloc(os, DMU_OT_SPACE_MAP, blocksize, - DMU_OT_SPACE_MAP_HEADER, bonuslen, tx); + object = dmu_object_alloc_ibs(os, DMU_OT_SPACE_MAP, blocksize, + space_map_ibs, DMU_OT_SPACE_MAP_HEADER, bonuslen, tx); return (object); } -- 2.40.0