db->db.db_size = size;
if (db->db_level == 0) {
- ASSERT3U(db->db_last_dirty->dr_txg, ==, tx->tx_txg);
db->db_last_dirty->dt.dl.dr_data = buf;
}
+ ASSERT3U(db->db_last_dirty->dr_txg, ==, tx->tx_txg);
+ ASSERT3U(db->db_last_dirty->dr_accounted, ==, osize);
+ db->db_last_dirty->dr_accounted = size;
mutex_exit(&db->db_mtx);
dmu_objset_willuse_space(dn->dn_objset, size - osize, tx);
sizeof (dbuf_dirty_record_t),
offsetof(dbuf_dirty_record_t, dr_dirty_node));
}
- if (db->db_blkid != DMU_BONUS_BLKID && os->os_dsl_dataset != NULL)
+ if (db->db_blkid != DMU_BONUS_BLKID)
dr->dr_accounted = db->db.db_size;
dr->dr_dbuf = db;
dr->dr_txg = tx->tx_txg;
/*
* The callback will be called io_phys_children times. Retire one
* portion of our dirty space each time we are called. Any rounding
- * error will be cleaned up by dsl_pool_sync()'s call to
- * dsl_pool_undirty_space().
+ * error will be cleaned up by dbuf_write_done().
*/
delta = dr->dr_accounted / zio->io_phys_children;
dsl_pool_undirty_space(dp, delta, zio->io_txg);
mutex_destroy(&dr->dt.di.dr_mtx);
list_destroy(&dr->dt.di.dr_children);
}
- kmem_free(dr, sizeof (dbuf_dirty_record_t));
cv_broadcast(&db->db_changed);
ASSERT(db->db_dirtycnt > 0);
db->db_dirtycnt -= 1;
db->db_data_pending = NULL;
dbuf_rele_and_unlock(db, (void *)(uintptr_t)tx->tx_txg, B_FALSE);
+
+ /*
+ * If we didn't do a physical write in this ZIO and we
+ * still ended up here, it means that the space of the
+ * dbuf that we just released (and undirtied) above hasn't
+ * been marked as undirtied in the pool's accounting.
+ *
+ * Thus, we undirty that space in the pool's view of the
+ * world here. For physical writes this type of update
+ * happens in dbuf_write_physdone().
+ *
+ * If we did a physical write, cleanup any rounding errors
+ * that came up due to writing multiple copies of a block
+ * on disk [see dbuf_write_physdone()].
+ */
+ if (zio->io_phys_children == 0) {
+ dsl_pool_undirty_space(dmu_objset_pool(os),
+ dr->dr_accounted, zio->io_txg);
+ } else {
+ dsl_pool_undirty_space(dmu_objset_pool(os),
+ dr->dr_accounted % zio->io_phys_children, zio->io_txg);
+ }
+
+ kmem_free(dr, sizeof (dbuf_dirty_record_t));
}
static void
dmu_buf_rele_array(dbp, numbufs, FTAG);
}
+/*
+ * Note: Lustre is an external consumer of this interface.
+ */
void
dmu_write_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size,
const void *buf, dmu_tx_t *tx)
}
/*
- * Call when we think we're going to write/free space in open context to track
- * the amount of dirty data in the open txg, which is also the amount
- * of memory that can not be evicted until this txg syncs.
+ * Call when we think we're going to write/free space in open context
+ * to track the amount of dirty data in the open txg, which is also the
+ * amount of memory that can not be evicted until this txg syncs.
+ *
+ * Note that there are two conditions where this can be called from
+ * syncing context:
+ *
+ * [1] When we just created the dataset, in which case we go on with
+ * updating any accounting of dirty data as usual.
+ * [2] When we are dirtying MOS data, in which case we only update the
+ * pool's accounting of dirty data.
*/
void
dmu_objset_willuse_space(objset_t *os, int64_t space, dmu_tx_t *tx)
if (ds != NULL) {
dsl_dir_willuse_space(ds->ds_dir, aspace, tx);
- dsl_pool_dirty_space(dmu_tx_pool(tx), space, tx);
}
+
+ dsl_pool_dirty_space(dmu_tx_pool(tx), space, tx);
}
#if defined(_KERNEL)
}
VERIFY0(zio_wait(zio));
- /*
- * We have written all of the accounted dirty data, so our
- * dp_space_towrite should now be zero. However, some seldom-used
- * code paths do not adhere to this (e.g. dbuf_undirty(), also
- * rounding error in dbuf_write_physdone).
- * Shore up the accounting of any dirtied space now.
- */
- dsl_pool_undirty_space(dp, dp->dp_dirty_pertxg[txg & TXG_MASK], txg);
-
/*
* Update the long range free counter after
* we're done syncing user data
dsl_pool_sync_mos(dp, tx);
}
+ /*
+ * We have written all of the accounted dirty data, so our
+ * dp_space_towrite should now be zero. However, some seldom-used
+ * code paths do not adhere to this (e.g. dbuf_undirty()). Shore up
+ * the accounting of any dirtied space now.
+ *
+ * Note that, besides any dirty data from datasets, the amount of
+ * dirty data in the MOS is also accounted by the pool. Therefore,
+ * we want to do this cleanup after dsl_pool_sync_mos() so we don't
+ * attempt to update the accounting for the same dirty data twice.
+ * (i.e. at this point we only update the accounting for the space
+ * that we know that we "leaked").
+ */
+ dsl_pool_undirty_space(dp, dp->dp_dirty_pertxg[txg & TXG_MASK], txg);
+
/*
* If we modify a dataset in the same txg that we want to destroy it,
* its dsl_dir's dd_dbuf will be dirty, and thus have a hold on it.