]> granicus.if.org Git - zfs/commitdiff
The patch resolves the extra call to dnode_cons() in dnode_create().
authorBrian Behlendorf <behlendorf1@llnl.gov>
Thu, 19 Mar 2009 22:22:48 +0000 (15:22 -0700)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Thu, 19 Mar 2009 22:22:48 +0000 (15:22 -0700)
The extra call to the constructor was there to reinitialize the non-
trivial primatives in the dnode (lists, mutexs, condvars, avl tree, etc).
This was safe, although not exactly clean, on Solaris because none of
the primitives allocate memory.  In the Linux port this is not true.
To keep stack usage to a minimum several of the primatives dynamically
allocate memory thus initializing them twice results in a memory leak.

This patch resolves this problem for Solaris and Linux by ensuring all
*_inits are called in the constructor, and all *_destroys are called
in the destructor.  Additionally we ensure that all dnode objects are
properly deconstructed before being freed to the slab, and when the
objects are allocated from the slab all required data members are
explicity initialized to correct values.

.topmsg
module/zfs/dmu_zfetch.c
module/zfs/dnode.c
module/zfs/include/sys/dmu_zfetch.h
module/zfs/include/sys/dnode.h

diff --git a/.topmsg b/.topmsg
index 83b3e71cc9685ed85ee8b98235b768f431585625..e8212e012c43bd76626cc19172ec4a61db17a242 100644 (file)
--- a/.topmsg
+++ b/.topmsg
@@ -1,7 +1,20 @@
 From: Brian Behlendorf <behlendorf1@llnl.gov>
 Subject: [PATCH] fix dnode constructor
 
-Don't call the constructor twice
+The patch removes the extra call to dnode_cons() in dnode_create().  The
+extra call to the constructor was there to reinitialize the non-trivial
+data structures in the dnode (lists, mutexs, condvars, avl tree, etc).
+This was safe, although not exactly clean, on Solaris because none of
+the primitives allocate memory.  In the Linux port this is not true.
+To keep stack usage to a minimum several of the primatives dynamically
+allocate memory thus initializing them twice results in a memory leak.
+
+This patch resolves this problem for Solaris and Linux by ensures all
+*_init's are called in the constructor, and all *_destroy's are called
+in the destructor.  Additionally we ensure that all dnode objects are
+properly deconstructed before being freed to the slab, and when the
+objects are allocated from the slab all required data members are
+explicity initialized to correct values.
 
 Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
 
index 4d79fe98e17ee6125b9b4522e7ac95c3fbca18e2..5ba366c4498056fd407cc7be33d5dc3b32f8af49 100644 (file)
@@ -203,20 +203,53 @@ dmu_zfetch_dofetch(zfetch_t *zf, zstream_t *zs)
 void
 dmu_zfetch_init(zfetch_t *zf, dnode_t *dno)
 {
-       if (zf == NULL) {
-               return;
-       }
-
        zf->zf_dnode = dno;
        zf->zf_stream_cnt = 0;
        zf->zf_alloc_fail = 0;
+}
 
+/*
+ * Clean-up state associated with a zfetch structure.  This frees allocated
+ * structure members, empties the zf_stream tree, and generally makes things
+ * nice.  This doesn't free the zfetch_t itself, that's left to the caller.
+ */
+void
+dmu_zfetch_rele(zfetch_t *zf)
+{
+       zstream_t *zs;
+       zstream_t *zs_next;
+
+       for (zs = list_head(&zf->zf_stream); zs; zs = zs_next) {
+               zs_next = list_next(&zf->zf_stream, zs);
+
+               list_remove(&zf->zf_stream, zs);
+               mutex_destroy(&zs->zst_lock);
+               kmem_free(zs, sizeof (zstream_t));
+       }
+}
+
+/*
+ * Construct a zfetch structure.
+ */
+void
+dmu_zfetch_cons(zfetch_t *zf)
+{
        list_create(&zf->zf_stream, sizeof (zstream_t),
            offsetof(zstream_t, zst_node));
 
        rw_init(&zf->zf_rwlock, NULL, RW_DEFAULT, NULL);
 }
 
+/*
+ * Destruct a zfetch structure.
+ */
+void
+dmu_zfetch_dest(zfetch_t *zf)
+{
+       list_destroy(&zf->zf_stream);
+       rw_destroy(&zf->zf_rwlock);
+}
+
 /*
  * This function computes the actual size, in blocks, that can be prefetched,
  * and fetches it.
@@ -441,32 +474,6 @@ out:
        return (rc);
 }
 
-/*
- * Clean-up state associated with a zfetch structure.  This frees allocated
- * structure members, empties the zf_stream tree, and generally makes things
- * nice.  This doesn't free the zfetch_t itself, that's left to the caller.
- */
-void
-dmu_zfetch_rele(zfetch_t *zf)
-{
-       zstream_t       *zs;
-       zstream_t       *zs_next;
-
-       ASSERT(!RW_LOCK_HELD(&zf->zf_rwlock));
-
-       for (zs = list_head(&zf->zf_stream); zs; zs = zs_next) {
-               zs_next = list_next(&zf->zf_stream, zs);
-
-               list_remove(&zf->zf_stream, zs);
-               mutex_destroy(&zs->zst_lock);
-               kmem_free(zs, sizeof (zstream_t));
-       }
-       list_destroy(&zf->zf_stream);
-       rw_destroy(&zf->zf_rwlock);
-
-       zf->zf_dnode = NULL;
-}
-
 /*
  * Given a zfetch and zstream structure, insert the zstream structure into the
  * AVL tree contained within the zfetch structure.  Peform the appropriate
index b00a8c4a7172592ca75f3d1b65cb12f33cb23fb4..384a5bea9f92877d47edd5c8d015316f6d149692 100644 (file)
@@ -72,6 +72,7 @@ dnode_cons(void *arg, void *unused, int kmflag)
 
        list_create(&dn->dn_dbufs, sizeof (dmu_buf_impl_t),
            offsetof(dmu_buf_impl_t, db_link));
+       dmu_zfetch_cons(&dn->dn_zfetch);
 
        return (0);
 }
@@ -96,6 +97,7 @@ dnode_dest(void *arg, void *unused)
        }
 
        list_destroy(&dn->dn_dbufs);
+       dmu_zfetch_dest(&dn->dn_zfetch);
 }
 
 void
@@ -165,6 +167,27 @@ dnode_verify(dnode_t *dn)
        if (drop_struct_lock)
                rw_exit(&dn->dn_struct_rwlock);
 }
+
+void
+dnode_verify_clean(dnode_t *dn)
+{
+       int i;
+
+       ASSERT(!RW_LOCK_HELD(&dn->dn_struct_rwlock));
+       ASSERT(!MUTEX_HELD(&dn->dn_mtx));
+       ASSERT(!MUTEX_HELD(&dn->dn_dbufs_mtx));
+       ASSERT(refcount_is_zero(&dn->dn_holds));
+       ASSERT(refcount_is_zero(&dn->dn_tx_holds));
+
+       for (i = 0; i < TXG_SIZE; i++) {
+               ASSERT(NULL == list_head(&dn->dn_dirty_records[i]));
+                ASSERT(0 == avl_numnodes(&dn->dn_ranges[i]));
+       }
+
+       ASSERT(NULL == list_head(&dn->dn_dbufs));
+       ASSERT(NULL == list_head(&dn->dn_zfetch.zf_stream));
+       ASSERT(!RW_LOCK_HELD(&dn->dn_zfetch.zf_rwlock));
+}
 #endif
 
 void
@@ -276,14 +299,32 @@ dnode_create(objset_impl_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
     uint64_t object)
 {
        dnode_t *dn = kmem_cache_alloc(dnode_cache, KM_SLEEP);
+       int i;
 
+       DNODE_VERIFY_CLEAN(dn);
        dn->dn_objset = os;
        dn->dn_object = object;
        dn->dn_dbuf = db;
        dn->dn_phys = dnp;
 
-       if (dnp->dn_datablkszsec)
+       list_link_init(&dn->dn_link);
+       for (i = 0; i < TXG_SIZE; i++) {
+               list_link_init(&dn->dn_dirty_link[i]);
+               dn->dn_next_nblkptr[i] = 0;
+               dn->dn_next_nlevels[i] = 0;
+               dn->dn_next_indblkshift[i] = 0;
+               dn->dn_next_bonuslen[i] = 0;
+               dn->dn_next_blksz[i] = 0;
+       }
+
+       if (dnp->dn_datablkszsec) {
                dnode_setdblksz(dn, dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
+       } else {
+               dn->dn_datablksz = 0;
+               dn->dn_datablkszsec = 0;
+               dn->dn_datablkshift = 0;
+       }
+
        dn->dn_indblkshift = dnp->dn_indblkshift;
        dn->dn_nlevels = dnp->dn_nlevels;
        dn->dn_type = dnp->dn_type;
@@ -293,7 +334,13 @@ dnode_create(objset_impl_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
        dn->dn_bonustype = dnp->dn_bonustype;
        dn->dn_bonuslen = dnp->dn_bonuslen;
        dn->dn_maxblkid = dnp->dn_maxblkid;
-
+       dn->dn_allocated_txg = 0;
+       dn->dn_free_txg = 0;
+       dn->dn_assigned_txg = 0;
+       dn->dn_dirtyctx = DN_UNDIRTIED;
+       dn->dn_dirtyctx_firstset = NULL;
+       dn->dn_bonus = NULL;
+       dn->dn_zio = NULL;
        dmu_zfetch_init(&dn->dn_zfetch, dn);
 
        ASSERT(dn->dn_phys->dn_type < DMU_OT_NUMTYPES);
@@ -335,6 +382,7 @@ dnode_destroy(dnode_t *dn)
                dbuf_evict(dn->dn_bonus);
                dn->dn_bonus = NULL;
        }
+       DNODE_VERIFY_CLEAN(dn);
        kmem_cache_free(dnode_cache, dn);
        arc_space_return(sizeof (dnode_t), ARC_SPACE_OTHER);
 }
index c94bced933aff00fd0a59b682b9b6144fc6c3707..97b6738522165ed2840babf674289f0e13f19f64 100644 (file)
@@ -65,6 +65,9 @@ typedef struct zfetch {
 
 void           dmu_zfetch_init(zfetch_t *, struct dnode *);
 void           dmu_zfetch_rele(zfetch_t *);
+void           dmu_zfetch_cons(zfetch_t *);
+void           dmu_zfetch_dest(zfetch_t *);
+
 void           dmu_zfetch(zfetch_t *, uint64_t, uint64_t, int);
 
 
index be9e569083216f8b4ca49e4f15253f16cc4a6a90..f802a480f8b11783a6d97e03a86727474e7fea95 100644 (file)
@@ -222,6 +222,7 @@ void dnode_free(dnode_t *dn, dmu_tx_t *tx);
 void dnode_byteswap(dnode_phys_t *dnp);
 void dnode_buf_byteswap(void *buf, size_t size);
 void dnode_verify(dnode_t *dn);
+void dnode_verify_clean(dnode_t *dn);
 int dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx);
 uint64_t dnode_current_max_length(dnode_t *dn);
 void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx);
@@ -259,12 +260,14 @@ void dnode_evict_dbufs(dnode_t *dn);
 _NOTE(CONSTCOND) } while (0)
 
 #define        DNODE_VERIFY(dn)                dnode_verify(dn)
+#define        DNODE_VERIFY_CLEAN(dn)          dnode_verify_clean(dn)
 #define        FREE_VERIFY(db, start, end, tx) free_verify(db, start, end, tx)
 
 #else
 
 #define        dprintf_dnode(db, fmt, ...)
 #define        DNODE_VERIFY(dn)
+#define        DNODE_VERIFY_CLEAN(dn)
 #define        FREE_VERIFY(db, start, end, tx)
 
 #endif