]> granicus.if.org Git - zfs/commitdiff
Illumos #4374
authorMatthew Ahrens <mahrens@delphix.com>
Wed, 16 Apr 2014 03:40:22 +0000 (19:40 -0800)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Wed, 30 Jul 2014 16:20:35 +0000 (09:20 -0700)
4374 dn_free_ranges should use range_tree_t

Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Max Grossman <max.grossman@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com
Reviewed by: Garrett D'Amore <garrett@damore.org>
Reviewed by: Dan McDonald <danmcd@omniti.com>
Approved by: Dan McDonald <danmcd@omniti.com>

References:
  https://www.illumos.org/issues/4374
  https://github.com/illumos/illumos-gate/commit/bf16b11

Ported by: Tim Chase <tim@chase2k.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #2531

19 files changed:
cmd/zdb/zdb.c
cmd/zpool/zpool_vdev.c
include/sys/dnode.h
include/sys/range_tree.h
include/sys/zfs_context.h
lib/libzpool/kernel.c
module/zfs/dbuf.c
module/zfs/ddt.c
module/zfs/dnode.c
module/zfs/dnode_sync.c
module/zfs/metaslab.c
module/zfs/range_tree.c
module/zfs/space_map.c
module/zfs/vdev.c
module/zfs/vdev_disk.c
module/zfs/zap.c
module/zfs/zap_leaf.c
module/zfs/zap_micro.c
module/zfs/zio.c

index b29cddb6e800e3ada0fd67e23ff52c9d5596df2a..66b91cd97a68859fbc9adf40bbbc1275bb9828fe 100644 (file)
@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
  */
 
 #include <stdio.h>
@@ -2788,7 +2788,8 @@ dump_simulated_ddt(spa_t *spa)
                dds.dds_ref_psize = zdde->zdde_ref_psize;
                dds.dds_ref_dsize = zdde->zdde_ref_dsize;
 
-               ddt_stat_add(&ddh_total.ddh_stat[highbit(refcnt) - 1], &dds, 0);
+               ddt_stat_add(&ddh_total.ddh_stat[highbit64(refcnt) - 1],
+                   &dds, 0);
 
                umem_free(zdde, sizeof (*zdde));
        }
index 31ee78c86f8a474c7e43b5f52028aafc4199df6a..686782b077385ddd770e0260d13d9a9b609016d1 100644 (file)
@@ -740,7 +740,7 @@ make_leaf_vdev(nvlist_t *props, const char *arg, uint64_t is_log)
                int sector_size;
 
                if (check_sector_size_database(path, &sector_size) == B_TRUE)
-                       ashift = highbit(sector_size) - 1;
+                       ashift = highbit64(sector_size) - 1;
        }
 
        if (ashift > 0)
index 4ca2a8a1380659ff5e2b333497002335c0978944..64bf000b6d65cf8f8ba4a3bed6d5405bf9e1f79e 100644 (file)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
  */
 
 #ifndef        _SYS_DNODE_H
@@ -220,7 +220,7 @@ typedef struct dnode {
        /* protected by dn_mtx: */
        kmutex_t dn_mtx;
        list_t dn_dirty_records[TXG_SIZE];
-       avl_tree_t dn_ranges[TXG_SIZE];
+       struct range_tree *dn_free_ranges[TXG_SIZE];
        uint64_t dn_allocated_txg;
        uint64_t dn_free_txg;
        uint64_t dn_assigned_txg;
@@ -302,8 +302,6 @@ void dnode_buf_byteswap(void *buf, size_t size);
 void dnode_verify(dnode_t *dn);
 int dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx);
 void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx);
-void dnode_clear_range(dnode_t *dn, uint64_t blkid,
-    uint64_t nblks, dmu_tx_t *tx);
 void dnode_diduse_space(dnode_t *dn, int64_t space);
 void dnode_willuse_space(dnode_t *dn, int64_t space, dmu_tx_t *tx);
 void dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t);
index a41effe4e988ec326fe42e08d3cacf22ad53176e..9f3ead537165f3a7b8c52fe58eedef66c1b1952e 100644 (file)
@@ -24,7 +24,7 @@
  */
 
 /*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
  */
 
 #ifndef _SYS_RANGE_TREE_H
@@ -85,6 +85,7 @@ void range_tree_stat_verify(range_tree_t *rt);
 
 void range_tree_add(void *arg, uint64_t start, uint64_t size);
 void range_tree_remove(void *arg, uint64_t start, uint64_t size);
+void range_tree_clear(range_tree_t *rt, uint64_t start, uint64_t size);
 
 void range_tree_vacate(range_tree_t *rt, range_tree_func_t *func, void *arg);
 void range_tree_walk(range_tree_t *rt, range_tree_func_t *func, void *arg);
index ec2a7dea0ed715b7373e5c378bd4861e2e7f62de..fd3e0b0d4f91a20d96b1b5bb3d8cc0178c9142ca 100644 (file)
@@ -25,7 +25,7 @@
 /*
  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
  */
 
 #ifndef _SYS_ZFS_CONTEXT_H
@@ -623,7 +623,7 @@ extern void delay(clock_t ticks);
 
 extern uint64_t physmem;
 
-extern int highbit(ulong_t i);
+extern int highbit64(uint64_t i);
 extern int random_get_bytes(uint8_t *ptr, size_t len);
 extern int random_get_pseudo_bytes(uint8_t *ptr, size_t len);
 
index 05bbd06ccbbc5421a6c2bb2b3296d952242f6ab5..ba232425d3c11c68fb96216d1de2a6b87597d151 100644 (file)
@@ -1014,17 +1014,15 @@ delay(clock_t ticks)
  * High order bit is 31 (or 63 in _LP64 kernel).
  */
 int
-highbit(ulong_t i)
+highbit64(uint64_t i)
 {
        register int h = 1;
 
        if (i == 0)
                return (0);
-#ifdef _LP64
-       if (i & 0xffffffff00000000ul) {
+       if (i & 0xffffffff00000000ULL) {
                h += 32; i >>= 32;
        }
-#endif
        if (i & 0xffff0000) {
                h += 16; i >>= 16;
        }
index f530e055bab07bb77e127cc4a8618306ba588ab1..c6e7197b6f57deee6781e8522a8bcd4cabdd070a 100644 (file)
@@ -21,7 +21,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  */
 
@@ -40,6 +40,7 @@
 #include <sys/dmu_zfetch.h>
 #include <sys/sa.h>
 #include <sys/sa_impl.h>
+#include <sys/range_tree.h>
 
 struct dbuf_hold_impl_data {
        /* Function arguments */
@@ -1234,7 +1235,10 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
        if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID &&
            db->db_blkid != DMU_SPILL_BLKID) {
                mutex_enter(&dn->dn_mtx);
-               dnode_clear_range(dn, db->db_blkid, 1, tx);
+               if (dn->dn_free_ranges[txgoff] != NULL) {
+                       range_tree_clear(dn->dn_free_ranges[txgoff],
+                           db->db_blkid, 1);
+               }
                mutex_exit(&dn->dn_mtx);
                db->db_freed_in_flight = FALSE;
        }
index 93997588da79b37fd09dd444b38380bd4b6123b7..d854e5f2341f9b0ec83d385fb91b5fca7c6e9f2d 100644 (file)
@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -423,7 +423,7 @@ ddt_stat_update(ddt_t *ddt, ddt_entry_t *dde, uint64_t neg)
 
        ddt_stat_generate(ddt, dde, &dds);
 
-       bucket = highbit(dds.dds_ref_blocks) - 1;
+       bucket = highbit64(dds.dds_ref_blocks) - 1;
        ASSERT(bucket >= 0);
 
        ddh = &ddt->ddt_histogram[dde->dde_type][dde->dde_class];
index 44ec80001814290f2e4979e63436e89139e75d5b..5aee10409aa95498eaa410576a1bf2ab857fb029 100644 (file)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -35,8 +35,7 @@
 #include <sys/spa.h>
 #include <sys/zio.h>
 #include <sys/dmu_zfetch.h>
-
-static int free_range_compar(const void *node1, const void *node2);
+#include <sys/range_tree.h>
 
 static kmem_cache_t *dnode_cache;
 /*
@@ -92,9 +91,7 @@ dnode_cons(void *arg, void *unused, int kmflag)
 
        for (i = 0; i < TXG_SIZE; i++) {
                list_link_init(&dn->dn_dirty_link[i]);
-               avl_create(&dn->dn_ranges[i], free_range_compar,
-                   sizeof (free_range_t),
-                   offsetof(struct free_range, fr_node));
+               dn->dn_free_ranges[i] = NULL;
                list_create(&dn->dn_dirty_records[i],
                    sizeof (dbuf_dirty_record_t),
                    offsetof(dbuf_dirty_record_t, dr_dirty_node));
@@ -142,7 +139,7 @@ dnode_dest(void *arg, void *unused)
 
        for (i = 0; i < TXG_SIZE; i++) {
                ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
-               avl_destroy(&dn->dn_ranges[i]);
+               ASSERT3P(dn->dn_free_ranges[i], ==, NULL);
                list_destroy(&dn->dn_dirty_records[i]);
                ASSERT0(dn->dn_next_nblkptr[i]);
                ASSERT0(dn->dn_next_nlevels[i]);
@@ -313,19 +310,6 @@ dnode_buf_byteswap(void *vbuf, size_t size)
        }
 }
 
-static int
-free_range_compar(const void *node1, const void *node2)
-{
-       const free_range_t *rp1 = node1;
-       const free_range_t *rp2 = node2;
-
-       if (rp1->fr_blkid < rp2->fr_blkid)
-               return (-1);
-       else if (rp1->fr_blkid > rp2->fr_blkid)
-               return (1);
-       else return (0);
-}
-
 void
 dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx)
 {
@@ -374,7 +358,7 @@ dnode_setdblksz(dnode_t *dn, int size)
            1<<(sizeof (dn->dn_phys->dn_datablkszsec) * 8));
        dn->dn_datablksz = size;
        dn->dn_datablkszsec = size >> SPA_MINBLOCKSHIFT;
-       dn->dn_datablkshift = ISP2(size) ? highbit(size - 1) : 0;
+       dn->dn_datablkshift = ISP2(size) ? highbit64(size - 1) : 0;
 }
 
 static dnode_t *
@@ -530,7 +514,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
                ASSERT0(dn->dn_next_blksz[i]);
                ASSERT(!list_link_active(&dn->dn_dirty_link[i]));
                ASSERT3P(list_head(&dn->dn_dirty_records[i]), ==, NULL);
-               ASSERT0(avl_numnodes(&dn->dn_ranges[i]));
+               ASSERT3P(dn->dn_free_ranges[i], ==, NULL);
        }
 
        dn->dn_type = ot;
@@ -695,7 +679,8 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn)
                list_move_tail(&ndn->dn_dirty_records[i],
                    &odn->dn_dirty_records[i]);
        }
-       bcopy(&odn->dn_ranges[0], &ndn->dn_ranges[0], sizeof (odn->dn_ranges));
+       bcopy(&odn->dn_free_ranges[0], &ndn->dn_free_ranges[0],
+           sizeof (odn->dn_free_ranges));
        ndn->dn_allocated_txg = odn->dn_allocated_txg;
        ndn->dn_free_txg = odn->dn_free_txg;
        ndn->dn_assigned_txg = odn->dn_assigned_txg;
@@ -758,8 +743,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn)
                list_create(&odn->dn_dirty_records[i],
                    sizeof (dbuf_dirty_record_t),
                    offsetof(dbuf_dirty_record_t, dr_dirty_node));
-               odn->dn_ranges[i].avl_root = NULL;
-               odn->dn_ranges[i].avl_numnodes = 0;
+               odn->dn_free_ranges[i] = NULL;
                odn->dn_next_nlevels[i] = 0;
                odn->dn_next_indblkshift[i] = 0;
                odn->dn_next_bonustype[i] = 0;
@@ -1462,59 +1446,6 @@ out:
                rw_downgrade(&dn->dn_struct_rwlock);
 }
 
-void
-dnode_clear_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx)
-{
-       avl_tree_t *tree = &dn->dn_ranges[tx->tx_txg&TXG_MASK];
-       avl_index_t where;
-       free_range_t *rp;
-       free_range_t rp_tofind;
-       uint64_t endblk = blkid + nblks;
-
-       ASSERT(MUTEX_HELD(&dn->dn_mtx));
-       ASSERT(nblks <= UINT64_MAX - blkid); /* no overflow */
-
-       dprintf_dnode(dn, "blkid=%llu nblks=%llu txg=%llu\n",
-           blkid, nblks, tx->tx_txg);
-       rp_tofind.fr_blkid = blkid;
-       rp = avl_find(tree, &rp_tofind, &where);
-       if (rp == NULL)
-               rp = avl_nearest(tree, where, AVL_BEFORE);
-       if (rp == NULL)
-               rp = avl_nearest(tree, where, AVL_AFTER);
-
-       while (rp && (rp->fr_blkid <= blkid + nblks)) {
-               uint64_t fr_endblk = rp->fr_blkid + rp->fr_nblks;
-               free_range_t *nrp = AVL_NEXT(tree, rp);
-
-               if (blkid <= rp->fr_blkid && endblk >= fr_endblk) {
-                       /* clear this entire range */
-                       avl_remove(tree, rp);
-                       kmem_free(rp, sizeof (free_range_t));
-               } else if (blkid <= rp->fr_blkid &&
-                   endblk > rp->fr_blkid && endblk < fr_endblk) {
-                       /* clear the beginning of this range */
-                       rp->fr_blkid = endblk;
-                       rp->fr_nblks = fr_endblk - endblk;
-               } else if (blkid > rp->fr_blkid && blkid < fr_endblk &&
-                   endblk >= fr_endblk) {
-                       /* clear the end of this range */
-                       rp->fr_nblks = blkid - rp->fr_blkid;
-               } else if (blkid > rp->fr_blkid && endblk < fr_endblk) {
-                       /* clear a chunk out of this range */
-                       free_range_t *new_rp =
-                           kmem_alloc(sizeof (free_range_t), KM_PUSHPAGE);
-
-                       new_rp->fr_blkid = endblk;
-                       new_rp->fr_nblks = fr_endblk - endblk;
-                       avl_insert_here(tree, new_rp, rp, AVL_AFTER);
-                       rp->fr_nblks = blkid - rp->fr_blkid;
-               }
-               /* there may be no overlap */
-               rp = nrp;
-       }
-}
-
 void
 dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
 {
@@ -1665,22 +1596,17 @@ done:
         * We will finish up this free operation in the syncing phase.
         */
        mutex_enter(&dn->dn_mtx);
-       dnode_clear_range(dn, blkid, nblks, tx);
        {
-               free_range_t *rp, *found;
-               avl_index_t where;
-               avl_tree_t *tree = &dn->dn_ranges[tx->tx_txg&TXG_MASK];
-
-               /* Add new range to dn_ranges */
-               rp = kmem_alloc(sizeof (free_range_t), KM_PUSHPAGE);
-               rp->fr_blkid = blkid;
-               rp->fr_nblks = nblks;
-               found = avl_find(tree, rp, &where);
-               ASSERT(found == NULL);
-               avl_insert(tree, rp, where);
-               dprintf_dnode(dn, "blkid=%llu nblks=%llu txg=%llu\n",
-                   blkid, nblks, tx->tx_txg);
+       int txgoff = tx->tx_txg & TXG_MASK;
+       if (dn->dn_free_ranges[txgoff] == NULL) {
+               dn->dn_free_ranges[txgoff] =
+                   range_tree_create(NULL, NULL, &dn->dn_mtx);
+       }
+       range_tree_clear(dn->dn_free_ranges[txgoff], blkid, nblks);
+       range_tree_add(dn->dn_free_ranges[txgoff], blkid, nblks);
        }
+       dprintf_dnode(dn, "blkid=%llu nblks=%llu txg=%llu\n",
+           blkid, nblks, tx->tx_txg);
        mutex_exit(&dn->dn_mtx);
 
        dbuf_free_range(dn, blkid, blkid + nblks - 1, tx);
@@ -1708,7 +1634,6 @@ dnode_spill_freed(dnode_t *dn)
 uint64_t
 dnode_block_freed(dnode_t *dn, uint64_t blkid)
 {
-       free_range_t range_tofind;
        void *dp = spa_get_dsl(dn->dn_objset->os_spa);
        int i;
 
@@ -1728,20 +1653,10 @@ dnode_block_freed(dnode_t *dn, uint64_t blkid)
        if (blkid == DMU_SPILL_BLKID)
                return (dnode_spill_freed(dn));
 
-       range_tofind.fr_blkid = blkid;
        mutex_enter(&dn->dn_mtx);
        for (i = 0; i < TXG_SIZE; i++) {
-               free_range_t *range_found;
-               avl_index_t idx;
-
-               range_found = avl_find(&dn->dn_ranges[i], &range_tofind, &idx);
-               if (range_found) {
-                       ASSERT(range_found->fr_nblks > 0);
-                       break;
-               }
-               range_found = avl_nearest(&dn->dn_ranges[i], idx, AVL_BEFORE);
-               if (range_found &&
-                   range_found->fr_blkid + range_found->fr_nblks > blkid)
+               if (dn->dn_free_ranges[i] != NULL &&
+                   range_tree_contains(dn->dn_free_ranges[i], blkid, 1))
                        break;
        }
        mutex_exit(&dn->dn_mtx);
index 885dc2dbca11cc08d3ccb1d36dc322c2b5153047..23892006080c194236941fb4f4b93948ecfb8e8d 100644 (file)
@@ -21,7 +21,7 @@
 
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -32,6 +32,7 @@
 #include <sys/dmu_objset.h>
 #include <sys/dsl_dataset.h>
 #include <sys/spa.h>
+#include <sys/range_tree.h>
 #include <sys/zfeature.h>
 
 static void
@@ -322,7 +323,7 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks,
  * and "free" all the blocks contained there.
  */
 static void
-dnode_sync_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks,
+dnode_sync_free_range_impl(dnode_t *dn, uint64_t blkid, uint64_t nblks,
     dmu_tx_t *tx)
 {
        blkptr_t *bp = dn->dn_phys->dn_blkptr;
@@ -381,6 +382,22 @@ dnode_sync_free_range(dnode_t *dn, uint64_t blkid, uint64_t nblks,
        }
 }
 
+typedef struct dnode_sync_free_range_arg {
+       dnode_t *dsfra_dnode;
+       dmu_tx_t *dsfra_tx;
+} dnode_sync_free_range_arg_t;
+
+static void
+dnode_sync_free_range(void *arg, uint64_t blkid, uint64_t nblks)
+{
+       dnode_sync_free_range_arg_t *dsfra = arg;
+       dnode_t *dn = dsfra->dsfra_dnode;
+
+       mutex_exit(&dn->dn_mtx);
+       dnode_sync_free_range_impl(dn, blkid, nblks, dsfra->dsfra_tx);
+       mutex_enter(&dn->dn_mtx);
+}
+
 /*
  * Try to kick all the dnode's dbufs out of the cache...
  */
@@ -542,7 +559,6 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
 void
 dnode_sync(dnode_t *dn, dmu_tx_t *tx)
 {
-       free_range_t *rp;
        dnode_phys_t *dnp = dn->dn_phys;
        int txgoff = tx->tx_txg & TXG_MASK;
        list_t *list = &dn->dn_dirty_records[txgoff];
@@ -601,9 +617,9 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
                    SPA_MINBLOCKSIZE) == 0);
                ASSERT(BP_IS_HOLE(&dnp->dn_blkptr[0]) ||
                    dn->dn_maxblkid == 0 || list_head(list) != NULL ||
-                   avl_last(&dn->dn_ranges[txgoff]) ||
                    dn->dn_next_blksz[txgoff] >> SPA_MINBLOCKSHIFT ==
-                   dnp->dn_datablkszsec);
+                   dnp->dn_datablkszsec ||
+                   range_tree_space(dn->dn_free_ranges[txgoff]) != 0);
                dnp->dn_datablkszsec =
                    dn->dn_next_blksz[txgoff] >> SPA_MINBLOCKSHIFT;
                dn->dn_next_blksz[txgoff] = 0;
@@ -661,13 +677,16 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
        }
 
        /* process all the "freed" ranges in the file */
-       while ((rp = avl_last(&dn->dn_ranges[txgoff]))) {
-               dnode_sync_free_range(dn, rp->fr_blkid, rp->fr_nblks, tx);
-               /* grab the mutex so we don't race with dnode_block_freed() */
+       if (dn->dn_free_ranges[txgoff] != NULL) {
+               dnode_sync_free_range_arg_t dsfra;
+               dsfra.dsfra_dnode = dn;
+               dsfra.dsfra_tx = tx;
                mutex_enter(&dn->dn_mtx);
-               avl_remove(&dn->dn_ranges[txgoff], rp);
+               range_tree_vacate(dn->dn_free_ranges[txgoff],
+                   dnode_sync_free_range, &dsfra);
+               range_tree_destroy(dn->dn_free_ranges[txgoff]);
+               dn->dn_free_ranges[txgoff] = NULL;
                mutex_exit(&dn->dn_mtx);
-               kmem_free(rp, sizeof (free_range_t));
        }
 
        if (freeing_dnode) {
index 6d6ff0914da7d4a511924507d0c63fa3a76a16da..06d7f7cd203c4dd3b9443751402410fb685a24cd 100644 (file)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  */
 
@@ -703,7 +703,7 @@ metaslab_ff_alloc(metaslab_t *msp, uint64_t size)
         * may exist in the same region.
         */
        uint64_t align = size & -size;
-       uint64_t *cursor = &msp->ms_lbas[highbit(align) - 1];
+       uint64_t *cursor = &msp->ms_lbas[highbit64(align) - 1];
        avl_tree_t *t = &msp->ms_tree->rt_root;
 
        return (metaslab_block_picker(t, cursor, size, align));
@@ -744,7 +744,7 @@ metaslab_df_alloc(metaslab_t *msp, uint64_t size)
         * may exist in the same region.
         */
        uint64_t align = size & -size;
-       uint64_t *cursor = &msp->ms_lbas[highbit(align) - 1];
+       uint64_t *cursor = &msp->ms_lbas[highbit64(align) - 1];
        range_tree_t *rt = msp->ms_tree;
        avl_tree_t *t = &rt->rt_root;
        uint64_t max_size = metaslab_block_maxsize(msp);
@@ -869,7 +869,7 @@ metaslab_ndf_alloc(metaslab_t *msp, uint64_t size)
        avl_tree_t *t = &msp->ms_tree->rt_root;
        avl_index_t where;
        range_seg_t *rs, rsearch;
-       uint64_t hbit = highbit(size);
+       uint64_t hbit = highbit64(size);
        uint64_t *cursor = &msp->ms_lbas[hbit - 1];
        uint64_t max_size = metaslab_block_maxsize(msp);
 
@@ -1117,7 +1117,7 @@ metaslab_weight_factor(metaslab_t *msp)
        if (msp->ms_sm == NULL) {
                vdev_t *vd = msp->ms_group->mg_vd;
 
-               i = highbit(msp->ms_size) - 1;
+               i = highbit64(msp->ms_size) - 1;
                sectors = msp->ms_size >> vd->vdev_ashift;
                return (sectors * i * vd->vdev_ashift);
        }
index cb4641078ed2a9dbf0ce488a3cbf2c1441d95e36..f35d20c08c0e1f8521d690d0ac1dc44b3512a127 100644 (file)
@@ -23,7 +23,7 @@
  * Use is subject to license terms.
  */
 /*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -60,7 +60,7 @@ range_tree_stat_verify(range_tree_t *rt)
        for (rs = avl_first(&rt->rt_root); rs != NULL;
            rs = AVL_NEXT(&rt->rt_root, rs)) {
                uint64_t size = rs->rs_end - rs->rs_start;
-               int idx = highbit(size) - 1;
+               int idx = highbit64(size) - 1;
 
                hist[idx]++;
                ASSERT3U(hist[idx], !=, 0);
@@ -79,7 +79,7 @@ static void
 range_tree_stat_incr(range_tree_t *rt, range_seg_t *rs)
 {
        uint64_t size = rs->rs_end - rs->rs_start;
-       int idx = highbit(size) - 1;
+       int idx = highbit64(size) - 1;
 
        ASSERT3U(idx, <,
            sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram));
@@ -93,7 +93,7 @@ static void
 range_tree_stat_decr(range_tree_t *rt, range_seg_t *rs)
 {
        uint64_t size = rs->rs_end - rs->rs_start;
-       int idx = highbit(size) - 1;
+       int idx = highbit64(size) - 1;
 
        ASSERT3U(idx, <,
            sizeof (rt->rt_histogram) / sizeof (*rt->rt_histogram));
@@ -299,10 +299,10 @@ range_tree_remove(void *arg, uint64_t start, uint64_t size)
 }
 
 static range_seg_t *
-range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size,
-    avl_index_t *wherep)
+range_tree_find_impl(range_tree_t *rt, uint64_t start, uint64_t size)
 {
-       range_seg_t rsearch, *rs;
+       avl_index_t where;
+       range_seg_t rsearch;
        uint64_t end = start + size;
 
        ASSERT(MUTEX_HELD(rt->rt_lock));
@@ -310,9 +310,14 @@ range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size,
 
        rsearch.rs_start = start;
        rsearch.rs_end = end;
-       rs = avl_find(&rt->rt_root, &rsearch, wherep);
+       return (avl_find(&rt->rt_root, &rsearch, &where));
+}
 
-       if (rs != NULL && rs->rs_start <= start && rs->rs_end >= end)
+static range_seg_t *
+range_tree_find(range_tree_t *rt, uint64_t start, uint64_t size)
+{
+       range_seg_t *rs = range_tree_find_impl(rt, start, size);
+       if (rs != NULL && rs->rs_start <= start && rs->rs_end >= start + size)
                return (rs);
        return (NULL);
 }
@@ -321,10 +326,9 @@ void
 range_tree_verify(range_tree_t *rt, uint64_t off, uint64_t size)
 {
        range_seg_t *rs;
-       avl_index_t where;
 
        mutex_enter(rt->rt_lock);
-       rs = range_tree_find(rt, off, size, &where);
+       rs = range_tree_find(rt, off, size);
        if (rs != NULL)
                panic("freeing free block; rs=%p", (void *)rs);
        mutex_exit(rt->rt_lock);
@@ -333,9 +337,23 @@ range_tree_verify(range_tree_t *rt, uint64_t off, uint64_t size)
 boolean_t
 range_tree_contains(range_tree_t *rt, uint64_t start, uint64_t size)
 {
-       avl_index_t where;
+       return (range_tree_find(rt, start, size) != NULL);
+}
 
-       return (range_tree_find(rt, start, size, &where) != NULL);
+/*
+ * Ensure that this range is not in the tree, regardless of whether
+ * it is currently in the tree.
+ */
+void
+range_tree_clear(range_tree_t *rt, uint64_t start, uint64_t size)
+{
+       range_seg_t *rs;
+
+       while ((rs = range_tree_find_impl(rt, start, size)) != NULL) {
+               uint64_t free_start = MAX(rs->rs_start, start);
+               uint64_t free_end = MIN(rs->rs_end, start + size);
+               range_tree_remove(rt, free_start, free_end - free_start);
+       }
 }
 
 void
index 645a81387c780e1fdbfb3b5b213f0ec0c707dcb1..c7a2312b4fb67f96594cc27fc4d2629d39bfdb99 100644 (file)
@@ -23,7 +23,7 @@
  * Use is subject to license terms.
  */
 /*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -270,7 +270,7 @@ space_map_set_blocksize(space_map_t *sm, uint64_t size, dmu_tx_t *tx)
                 * adding more blocks. The block size can grow until it
                 * reaches space_map_max_blksz.
                 */
-               newsz = ISP2(size) ? size : 1ULL << highbit(size);
+               newsz = ISP2(size) ? size : 1ULL << highbit64(size);
                if (newsz > space_map_max_blksz)
                        newsz = space_map_max_blksz;
 
index 69a3153178fc979d59103a330870b11fe26f3474..89df3674ece4627a0f6d0768767f1ca243e6babf 100644 (file)
@@ -22,7 +22,7 @@
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -1584,7 +1584,7 @@ vdev_metaslab_set_size(vdev_t *vd)
        /*
         * Aim for roughly 200 metaslabs per vdev.
         */
-       vd->vdev_ms_shift = highbit(vd->vdev_asize / 200);
+       vd->vdev_ms_shift = highbit64(vd->vdev_asize / 200);
        vd->vdev_ms_shift = MAX(vd->vdev_ms_shift, SPA_MAXBLOCKSHIFT);
 }
 
index ee14165f5717f55e7a11ed3278a997557032be0c..9cc6745d16ca6b9a14ef1bd7f92936a6ca88ddf2 100644 (file)
@@ -23,7 +23,7 @@
  * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  * Rewritten for Linux by Brian Behlendorf <behlendorf1@llnl.gov>.
  * LLNL-CODE-403049.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
  */
 
 #include <sys/zfs_context.h>
@@ -308,7 +308,7 @@ skip_open:
        *max_psize = *psize;
 
        /* Based on the minimum sector size set the block size */
-       *ashift = highbit(MAX(block_size, SPA_MINBLOCKSIZE)) - 1;
+       *ashift = highbit64(MAX(block_size, SPA_MINBLOCKSIZE)) - 1;
 
        /* Try to set the io scheduler elevator algorithm */
        (void) vdev_elevator_switch(v, zfs_vdev_scheduler);
index cfae26a749995393b182641a4e77516faef97db2..02c92748f6c050210deca4441361e2a04a1324d2 100644 (file)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
  */
 
 /*
@@ -84,7 +84,7 @@ fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags)
            &zap->zap_f.zap_phys, zap_evict);
 
        mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
-       zap->zap_f.zap_block_shift = highbit(zap->zap_dbuf->db_size) - 1;
+       zap->zap_f.zap_block_shift = highbit64(zap->zap_dbuf->db_size) - 1;
 
        zp = zap->zap_f.zap_phys;
        /*
@@ -458,7 +458,7 @@ zap_open_leaf(uint64_t blkid, dmu_buf_t *db)
        rw_init(&l->l_rwlock, NULL, RW_DEFAULT, NULL);
        rw_enter(&l->l_rwlock, RW_WRITER);
        l->l_blkid = blkid;
-       l->l_bs = highbit(db->db_size)-1;
+       l->l_bs = highbit64(db->db_size) - 1;
        l->l_dbuf = db;
        l->l_phys = NULL;
 
index 13bc879d7e82645cd1a1a05423e941a94fa79b22..0e30ef319bd08f9fc95364bc02b5b7f12ae5f055 100644 (file)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
  */
 
 /*
@@ -105,16 +105,16 @@ zap_leaf_byteswap(zap_leaf_phys_t *buf, int size)
 {
        int i;
        zap_leaf_t l;
-       l.l_bs = highbit(size)-1;
+       l.l_bs = highbit64(size) - 1;
        l.l_phys = buf;
 
-       buf->l_hdr.lh_block_type =      BSWAP_64(buf->l_hdr.lh_block_type);
-       buf->l_hdr.lh_prefix =          BSWAP_64(buf->l_hdr.lh_prefix);
-       buf->l_hdr.lh_magic =           BSWAP_32(buf->l_hdr.lh_magic);
-       buf->l_hdr.lh_nfree =           BSWAP_16(buf->l_hdr.lh_nfree);
-       buf->l_hdr.lh_nentries =        BSWAP_16(buf->l_hdr.lh_nentries);
-       buf->l_hdr.lh_prefix_len =      BSWAP_16(buf->l_hdr.lh_prefix_len);
-       buf->l_hdr.lh_freelist =        BSWAP_16(buf->l_hdr.lh_freelist);
+       buf->l_hdr.lh_block_type =      BSWAP_64(buf->l_hdr.lh_block_type);
+       buf->l_hdr.lh_prefix =          BSWAP_64(buf->l_hdr.lh_prefix);
+       buf->l_hdr.lh_magic =           BSWAP_32(buf->l_hdr.lh_magic);
+       buf->l_hdr.lh_nfree =           BSWAP_16(buf->l_hdr.lh_nfree);
+       buf->l_hdr.lh_nentries =        BSWAP_16(buf->l_hdr.lh_nentries);
+       buf->l_hdr.lh_prefix_len =      BSWAP_16(buf->l_hdr.lh_prefix_len);
+       buf->l_hdr.lh_freelist =        BSWAP_16(buf->l_hdr.lh_freelist);
 
        for (i = 0; i < ZAP_LEAF_HASH_NUMENTRIES(&l); i++)
                buf->l_hash[i] = BSWAP_16(buf->l_hash[i]);
@@ -157,7 +157,7 @@ zap_leaf_init(zap_leaf_t *l, boolean_t sort)
 {
        int i;
 
-       l->l_bs = highbit(l->l_dbuf->db_size)-1;
+       l->l_bs = highbit64(l->l_dbuf->db_size) - 1;
        zap_memset(&l->l_phys->l_hdr, 0, sizeof (struct zap_leaf_header));
        zap_memset(l->l_phys->l_hash, CHAIN_END, 2*ZAP_LEAF_HASH_NUMENTRIES(l));
        for (i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) {
index 73d84b55431ff62b5a63756beed9dead74a8b36b..68fb747697d203ab971cad24c5614b0704481667 100644 (file)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
  */
 
 #include <sys/zio.h>
@@ -374,7 +374,7 @@ mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
 
        if (*(uint64_t *)db->db_data != ZBT_MICRO) {
                mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
-               zap->zap_f.zap_block_shift = highbit(db->db_size) - 1;
+               zap->zap_f.zap_block_shift = highbit64(db->db_size) - 1;
        } else {
                zap->zap_ismicro = TRUE;
        }
index f84e2129bd2b0ce4e8c53b04ca26aaeef845914a..7e440a381c1a6b0216f8ee1862398e238771a801 100644 (file)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
  * Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
  */
 
@@ -1367,7 +1367,7 @@ __zio_execute(zio_t *zio)
                }
 
                zio->io_stage = stage;
-               rv = zio_pipeline[highbit(stage) - 1](zio);
+               rv = zio_pipeline[highbit64(stage) - 1](zio);
 
                if (rv == ZIO_PIPELINE_STOP)
                        return;