From: Brian Behlendorf <behlendorf1@llnl.gov>
Subject: [PATCH] fix dnode constructor
-Don't call the constructor twice
+The patch removes the extra call to dnode_cons() in dnode_create(). The
+extra call to the constructor was there to reinitialize the non-trivial
+data structures in the dnode (lists, mutexs, condvars, avl tree, etc).
+This was safe, although not exactly clean, on Solaris because none of
+the primitives allocate memory. In the Linux port this is not true.
+To keep stack usage to a minimum several of the primatives dynamically
+allocate memory thus initializing them twice results in a memory leak.
+
+This patch resolves this problem for Solaris and Linux by ensures all
+*_init's are called in the constructor, and all *_destroy's are called
+in the destructor. Additionally we ensure that all dnode objects are
+properly deconstructed before being freed to the slab, and when the
+objects are allocated from the slab all required data members are
+explicity initialized to correct values.
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
void
dmu_zfetch_init(zfetch_t *zf, dnode_t *dno)
{
- if (zf == NULL) {
- return;
- }
-
zf->zf_dnode = dno;
zf->zf_stream_cnt = 0;
zf->zf_alloc_fail = 0;
+}
+/*
+ * Clean-up state associated with a zfetch structure. This frees allocated
+ * structure members, empties the zf_stream tree, and generally makes things
+ * nice. This doesn't free the zfetch_t itself, that's left to the caller.
+ */
+void
+dmu_zfetch_rele(zfetch_t *zf)
+{
+ zstream_t *zs;
+ zstream_t *zs_next;
+
+ for (zs = list_head(&zf->zf_stream); zs; zs = zs_next) {
+ zs_next = list_next(&zf->zf_stream, zs);
+
+ list_remove(&zf->zf_stream, zs);
+ mutex_destroy(&zs->zst_lock);
+ kmem_free(zs, sizeof (zstream_t));
+ }
+}
+
+/*
+ * Construct a zfetch structure.
+ */
+void
+dmu_zfetch_cons(zfetch_t *zf)
+{
list_create(&zf->zf_stream, sizeof (zstream_t),
offsetof(zstream_t, zst_node));
rw_init(&zf->zf_rwlock, NULL, RW_DEFAULT, NULL);
}
+/*
+ * Destruct a zfetch structure.
+ */
+void
+dmu_zfetch_dest(zfetch_t *zf)
+{
+ list_destroy(&zf->zf_stream);
+ rw_destroy(&zf->zf_rwlock);
+}
+
/*
* This function computes the actual size, in blocks, that can be prefetched,
* and fetches it.
return (rc);
}
-/*
- * Clean-up state associated with a zfetch structure. This frees allocated
- * structure members, empties the zf_stream tree, and generally makes things
- * nice. This doesn't free the zfetch_t itself, that's left to the caller.
- */
-void
-dmu_zfetch_rele(zfetch_t *zf)
-{
- zstream_t *zs;
- zstream_t *zs_next;
-
- ASSERT(!RW_LOCK_HELD(&zf->zf_rwlock));
-
- for (zs = list_head(&zf->zf_stream); zs; zs = zs_next) {
- zs_next = list_next(&zf->zf_stream, zs);
-
- list_remove(&zf->zf_stream, zs);
- mutex_destroy(&zs->zst_lock);
- kmem_free(zs, sizeof (zstream_t));
- }
- list_destroy(&zf->zf_stream);
- rw_destroy(&zf->zf_rwlock);
-
- zf->zf_dnode = NULL;
-}
-
/*
* Given a zfetch and zstream structure, insert the zstream structure into the
* AVL tree contained within the zfetch structure. Peform the appropriate
list_create(&dn->dn_dbufs, sizeof (dmu_buf_impl_t),
offsetof(dmu_buf_impl_t, db_link));
+ dmu_zfetch_cons(&dn->dn_zfetch);
return (0);
}
}
list_destroy(&dn->dn_dbufs);
+ dmu_zfetch_dest(&dn->dn_zfetch);
}
void
if (drop_struct_lock)
rw_exit(&dn->dn_struct_rwlock);
}
+
+void
+dnode_verify_clean(dnode_t *dn)
+{
+ int i;
+
+ ASSERT(!RW_LOCK_HELD(&dn->dn_struct_rwlock));
+ ASSERT(!MUTEX_HELD(&dn->dn_mtx));
+ ASSERT(!MUTEX_HELD(&dn->dn_dbufs_mtx));
+ ASSERT(refcount_is_zero(&dn->dn_holds));
+ ASSERT(refcount_is_zero(&dn->dn_tx_holds));
+
+ for (i = 0; i < TXG_SIZE; i++) {
+ ASSERT(NULL == list_head(&dn->dn_dirty_records[i]));
+ ASSERT(0 == avl_numnodes(&dn->dn_ranges[i]));
+ }
+
+ ASSERT(NULL == list_head(&dn->dn_dbufs));
+ ASSERT(NULL == list_head(&dn->dn_zfetch.zf_stream));
+ ASSERT(!RW_LOCK_HELD(&dn->dn_zfetch.zf_rwlock));
+}
#endif
void
uint64_t object)
{
dnode_t *dn = kmem_cache_alloc(dnode_cache, KM_SLEEP);
+ int i;
+ DNODE_VERIFY_CLEAN(dn);
dn->dn_objset = os;
dn->dn_object = object;
dn->dn_dbuf = db;
dn->dn_phys = dnp;
- if (dnp->dn_datablkszsec)
+ list_link_init(&dn->dn_link);
+ for (i = 0; i < TXG_SIZE; i++) {
+ list_link_init(&dn->dn_dirty_link[i]);
+ dn->dn_next_nblkptr[i] = 0;
+ dn->dn_next_nlevels[i] = 0;
+ dn->dn_next_indblkshift[i] = 0;
+ dn->dn_next_bonuslen[i] = 0;
+ dn->dn_next_blksz[i] = 0;
+ }
+
+ if (dnp->dn_datablkszsec) {
dnode_setdblksz(dn, dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
+ } else {
+ dn->dn_datablksz = 0;
+ dn->dn_datablkszsec = 0;
+ dn->dn_datablkshift = 0;
+ }
+
dn->dn_indblkshift = dnp->dn_indblkshift;
dn->dn_nlevels = dnp->dn_nlevels;
dn->dn_type = dnp->dn_type;
dn->dn_bonustype = dnp->dn_bonustype;
dn->dn_bonuslen = dnp->dn_bonuslen;
dn->dn_maxblkid = dnp->dn_maxblkid;
-
+ dn->dn_allocated_txg = 0;
+ dn->dn_free_txg = 0;
+ dn->dn_assigned_txg = 0;
+ dn->dn_dirtyctx = DN_UNDIRTIED;
+ dn->dn_dirtyctx_firstset = NULL;
+ dn->dn_bonus = NULL;
+ dn->dn_zio = NULL;
dmu_zfetch_init(&dn->dn_zfetch, dn);
ASSERT(dn->dn_phys->dn_type < DMU_OT_NUMTYPES);
dbuf_evict(dn->dn_bonus);
dn->dn_bonus = NULL;
}
+ DNODE_VERIFY_CLEAN(dn);
kmem_cache_free(dnode_cache, dn);
arc_space_return(sizeof (dnode_t), ARC_SPACE_OTHER);
}
void dmu_zfetch_init(zfetch_t *, struct dnode *);
void dmu_zfetch_rele(zfetch_t *);
+void dmu_zfetch_cons(zfetch_t *);
+void dmu_zfetch_dest(zfetch_t *);
+
void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, int);
void dnode_byteswap(dnode_phys_t *dnp);
void dnode_buf_byteswap(void *buf, size_t size);
void dnode_verify(dnode_t *dn);
+void dnode_verify_clean(dnode_t *dn);
int dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx);
uint64_t dnode_current_max_length(dnode_t *dn);
void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx);
_NOTE(CONSTCOND) } while (0)
#define DNODE_VERIFY(dn) dnode_verify(dn)
+#define DNODE_VERIFY_CLEAN(dn) dnode_verify_clean(dn)
#define FREE_VERIFY(db, start, end, tx) free_verify(db, start, end, tx)
#else
#define dprintf_dnode(db, fmt, ...)
#define DNODE_VERIFY(dn)
+#define DNODE_VERIFY_CLEAN(dn)
#define FREE_VERIFY(db, start, end, tx)
#endif