]> granicus.if.org Git - zfs/commitdiff
OpenZFS 9442 - decrease indirect block size of spacemaps
authorMatthew Ahrens <mahrens@delphix.com>
Thu, 11 Jan 2018 16:54:38 +0000 (08:54 -0800)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Wed, 25 Jul 2018 21:11:35 +0000 (14:11 -0700)
Authored by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Serapheim Dimitropoulos <serapheim.dimitro@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Albert Lee <trisk@forkgnu.org>
Reviewed by: Igor Kozhukhov <igor@dilos.org>
Reviewed by: George Melikov <mail@gmelikov.ru>
Approved by: Dan McDonald <danmcd@joyent.com>
Ported-by: Brian Behlendorf <behlendorf1@llnl.gov>
Updates to indirect blocks of spacemaps can contribute significantly to
write inflation.  Therefore we want to reduce the indirect block size of
spacemaps from 128K to 16K.

Porting notes:
* Refactored to allow the dmu_object_alloc(), dmu_object_alloc_ibs()
  and dmu_object_alloc_dnsize() functions to use a common shared
  dmu_object_alloc_impl() function.

OpenZFS-issue: https://www.illumos.org/issues/9442
OpenZFS-commit: https://github.com/openzfs/openzfs/commit/0c2e6408b
Closes #7712

include/sys/dmu.h
module/zfs/dmu_object.c
module/zfs/space_map.c

index 28756e6f7f815eb7baeb736bd849589957e58eb5..67f4be1c2438814018eedb2f08795481649424b2 100644 (file)
@@ -389,6 +389,9 @@ typedef struct dmu_buf {
  */
 uint64_t dmu_object_alloc(objset_t *os, dmu_object_type_t ot,
     int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx);
+uint64_t dmu_object_alloc_ibs(objset_t *os, dmu_object_type_t ot, int blocksize,
+    int indirect_blockshift,
+    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
 uint64_t dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot,
     int blocksize, dmu_object_type_t bonus_type, int bonus_len,
     int dnodesize, dmu_tx_t *tx);
index 586a04b1653688cb9358c8f65897b02478fc4c98..b9960782efd116a746d944600bfbd6b45c58e434 100644 (file)
  */
 int dmu_object_alloc_chunk_shift = 7;
 
-uint64_t
-dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize,
-    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
-{
-       return dmu_object_alloc_dnsize(os, ot, blocksize, bonustype, bonuslen,
-           0, tx);
-}
-
-uint64_t
-dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot, int blocksize,
-    dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx)
+static uint64_t
+dmu_object_alloc_impl(objset_t *os, dmu_object_type_t ot, int blocksize,
+    int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen,
+    int dnodesize, dmu_tx_t *tx)
 {
        uint64_t object;
        uint64_t L1_dnode_count = DNODES_PER_BLOCK <<
@@ -182,8 +175,9 @@ dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot, int blocksize,
                         * again now that we have the struct lock.
                         */
                        if (dn->dn_type == DMU_OT_NONE) {
-                               dnode_allocate(dn, ot, blocksize, 0,
-                                   bonustype, bonuslen, dn_slots, tx);
+                               dnode_allocate(dn, ot, blocksize,
+                                   indirect_blockshift, bonustype,
+                                   bonuslen, dn_slots, tx);
                                rw_exit(&dn->dn_struct_rwlock);
                                dmu_tx_add_new_object(tx, dn);
                                dnode_rele(dn, FTAG);
@@ -206,6 +200,31 @@ dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot, int blocksize,
        }
 }
 
+uint64_t
+dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize,
+    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
+{
+       return dmu_object_alloc_impl(os, ot, blocksize, 0, bonustype,
+           bonuslen, 0, tx);
+}
+
+uint64_t
+dmu_object_alloc_ibs(objset_t *os, dmu_object_type_t ot, int blocksize,
+    int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen,
+    dmu_tx_t *tx)
+{
+       return dmu_object_alloc_impl(os, ot, blocksize, indirect_blockshift,
+           bonustype, bonuslen, 0, tx);
+}
+
+uint64_t
+dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot, int blocksize,
+    dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx)
+{
+       return (dmu_object_alloc_impl(os, ot, blocksize, 0, bonustype,
+           bonuslen, dnodesize, tx));
+}
+
 int
 dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot,
     int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
@@ -423,6 +442,7 @@ dmu_object_free_zapified(objset_t *mos, uint64_t object, dmu_tx_t *tx)
 
 #if defined(_KERNEL)
 EXPORT_SYMBOL(dmu_object_alloc);
+EXPORT_SYMBOL(dmu_object_alloc_ibs);
 EXPORT_SYMBOL(dmu_object_alloc_dnsize);
 EXPORT_SYMBOL(dmu_object_claim);
 EXPORT_SYMBOL(dmu_object_claim_dnsize);
index 5f67a798727d0a103b5fd6c024f8dc1ca569baa9..9ba6ff6ff4c2ad6b05f05f0e2318e6410ed961e9 100644 (file)
  */
 boolean_t zfs_force_some_double_word_sm_entries = B_FALSE;
 
+/*
+ * Override the default indirect block size of 128K, instead use 16K for
+ * spacemaps (2^14 bytes).  This dramatically reduces write inflation since
+ * appending to a spacemap typically has to write one data block (4KB) and one
+ * or two indirect blocks (16K-32K, rather than 128K).
+ */
+int space_map_ibs = 14;
+
 boolean_t
 sm_entry_is_debug(uint64_t e)
 {
@@ -674,8 +682,8 @@ space_map_write_impl(space_map_t *sm, range_tree_t *rt, maptype_t maptype,
                 *
                 * [1] The feature is enabled.
                 * [2] The offset or run is too big for a single-word entry,
-                *      or the vdev_id is set (meaning not equal to
-                *      SM_NO_VDEVID).
+                *      or the vdev_id is set (meaning not equal to
+                *      SM_NO_VDEVID).
                 *
                 * Note that for purposes of testing we've added the case that
                 * we write two-word entries occasionally when the feature is
@@ -837,7 +845,8 @@ space_map_truncate(space_map_t *sm, int blocksize, dmu_tx_t *tx)
         */
        if ((spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM) &&
            doi.doi_bonus_size != sizeof (space_map_phys_t)) ||
-           doi.doi_data_block_size != blocksize) {
+           doi.doi_data_block_size != blocksize ||
+           doi.doi_metadata_block_size != 1 << space_map_ibs) {
                zfs_dbgmsg("txg %llu, spa %s, sm %p, reallocating "
                    "object[%llu]: old bonus %u, old blocksz %u",
                    dmu_tx_get_txg(tx), spa_name(spa), sm, sm->sm_object,
@@ -893,8 +902,8 @@ space_map_alloc(objset_t *os, int blocksize, dmu_tx_t *tx)
                bonuslen = SPACE_MAP_SIZE_V0;
        }
 
-       object = dmu_object_alloc(os, DMU_OT_SPACE_MAP, blocksize,
-           DMU_OT_SPACE_MAP_HEADER, bonuslen, tx);
+       object = dmu_object_alloc_ibs(os, DMU_OT_SPACE_MAP, blocksize,
+           space_map_ibs, DMU_OT_SPACE_MAP_HEADER, bonuslen, tx);
 
        return (object);
 }