ARC_SPACE_NUMTYPES
} arc_space_type_t;
+typedef enum arc_state_type {
+ ARC_STATE_ANON,
+ ARC_STATE_MRU,
+ ARC_STATE_MRU_GHOST,
+ ARC_STATE_MFU,
+ ARC_STATE_MFU_GHOST,
+ ARC_STATE_L2C_ONLY,
+ ARC_STATE_NUMTYPES
+} arc_state_type_t;
+
+typedef struct arc_buf_info {
+ arc_state_type_t abi_state_type;
+ arc_buf_contents_t abi_state_contents;
+ uint64_t abi_state_index;
+ uint32_t abi_flags;
+ uint32_t abi_datacnt;
+ uint64_t abi_size;
+ uint64_t abi_spa;
+ uint64_t abi_access;
+ uint32_t abi_mru_hits;
+ uint32_t abi_mru_ghost_hits;
+ uint32_t abi_mfu_hits;
+ uint32_t abi_mfu_ghost_hits;
+ uint32_t abi_l2arc_hits;
+ uint32_t abi_holds;
+ uint64_t abi_l2arc_dattr;
+ uint64_t abi_l2arc_asize;
+ enum zio_compress abi_l2arc_compress;
+} arc_buf_info_t;
+
void arc_space_consume(uint64_t space, arc_space_type_t type);
void arc_space_return(uint64_t space, arc_space_type_t type);
arc_buf_t *arc_buf_alloc(spa_t *spa, int size, void *tag,
void arc_loan_inuse_buf(arc_buf_t *buf, void *tag);
void arc_buf_add_ref(arc_buf_t *buf, void *tag);
boolean_t arc_buf_remove_ref(arc_buf_t *buf, void *tag);
+void arc_buf_info(arc_buf_t *buf, arc_buf_info_t *abi, int state_index);
int arc_buf_size(arc_buf_t *buf);
void arc_release(arc_buf_t *buf, void *tag);
int arc_released(arc_buf_t *buf);
void dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx);
+void dbuf_stats_init(dbuf_hash_table_t *hash);
+void dbuf_stats_destroy(void);
+
#define DB_DNODE(_db) ((_db)->db_dnode_handle->dnh_dnode)
#define DB_DNODE_LOCK(_db) ((_db)->db_dnode_handle->dnh_zrlock)
#define DB_DNODE_ENTER(_db) (zrl_add(&DB_DNODE_LOCK(_db)))
* If doi is NULL, just indicates whether the object exists.
*/
int dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi);
+void __dmu_object_info_from_dnode(struct dnode *dn, dmu_object_info_t *doi);
void dmu_object_info_from_dnode(struct dnode *dn, dmu_object_info_t *doi);
void dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi);
void dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize,
$(top_srcdir)/module/zfs/bpobj.c \
$(top_srcdir)/module/zfs/bptree.c \
$(top_srcdir)/module/zfs/dbuf.c \
+ $(top_srcdir)/module/zfs/dbuf_stats.c \
$(top_srcdir)/module/zfs/ddt.c \
$(top_srcdir)/module/zfs/ddt_zap.c \
$(top_srcdir)/module/zfs/dmu.c \
$(MODULE)-objs += @top_srcdir@/module/zfs/bplist.o
$(MODULE)-objs += @top_srcdir@/module/zfs/bpobj.o
$(MODULE)-objs += @top_srcdir@/module/zfs/dbuf.o
+$(MODULE)-objs += @top_srcdir@/module/zfs/dbuf_stats.o
$(MODULE)-objs += @top_srcdir@/module/zfs/bptree.o
$(MODULE)-objs += @top_srcdir@/module/zfs/ddt.o
$(MODULE)-objs += @top_srcdir@/module/zfs/ddt_zap.o
uint64_t arcs_lsize[ARC_BUFC_NUMTYPES]; /* amount of evictable data */
uint64_t arcs_size; /* total amount of data in this state */
kmutex_t arcs_mtx;
+ arc_state_type_t arcs_state;
} arc_state_t;
/* The 6 states: */
/* updated atomically */
clock_t b_arc_access;
+ uint32_t b_mru_hits;
+ uint32_t b_mru_ghost_hits;
+ uint32_t b_mfu_hits;
+ uint32_t b_mfu_ghost_hits;
+ uint32_t b_l2_hits;
/* self protecting */
refcount_t b_refcnt;
/* compression applied to buffer data */
enum zio_compress b_compress;
/* real alloc'd buffer size depending on b_compress applied */
- int b_asize;
+ uint32_t b_asize;
+ uint32_t b_hits;
/* temporary buffer holder for in-flight compressed data */
void *b_tmp_cdata;
};
return (cnt);
}
+/*
+ * Returns detailed information about a specific arc buffer. When the
+ * state_index argument is set the function will calculate the arc header
+ * list position for its arc state. Since this requires a linear traversal
+ * callers are strongly encourage not to do this. However, it can be helpful
+ * for targeted analysis so the functionality is provided.
+ */
+void
+arc_buf_info(arc_buf_t *ab, arc_buf_info_t *abi, int state_index)
+{
+ arc_buf_hdr_t *hdr = ab->b_hdr;
+ arc_state_t *state = hdr->b_state;
+
+ memset(abi, 0, sizeof(arc_buf_info_t));
+ abi->abi_flags = hdr->b_flags;
+ abi->abi_datacnt = hdr->b_datacnt;
+ abi->abi_state_type = state ? state->arcs_state : ARC_STATE_ANON;
+ abi->abi_state_contents = hdr->b_type;
+ abi->abi_state_index = -1;
+ abi->abi_size = hdr->b_size;
+ abi->abi_access = hdr->b_arc_access;
+ abi->abi_mru_hits = hdr->b_mru_hits;
+ abi->abi_mru_ghost_hits = hdr->b_mru_ghost_hits;
+ abi->abi_mfu_hits = hdr->b_mfu_hits;
+ abi->abi_mfu_ghost_hits = hdr->b_mfu_ghost_hits;
+ abi->abi_holds = refcount_count(&hdr->b_refcnt);
+
+ if (hdr->b_l2hdr) {
+ abi->abi_l2arc_dattr = hdr->b_l2hdr->b_daddr;
+ abi->abi_l2arc_asize = hdr->b_l2hdr->b_asize;
+ abi->abi_l2arc_compress = hdr->b_l2hdr->b_compress;
+ abi->abi_l2arc_hits = hdr->b_l2hdr->b_hits;
+ }
+
+ if (state && state_index && list_link_active(&hdr->b_arc_node)) {
+ list_t *list = &state->arcs_list[hdr->b_type];
+ arc_buf_hdr_t *h;
+
+ mutex_enter(&state->arcs_mtx);
+ for (h = list_head(list); h != NULL; h = list_next(list, h)) {
+ abi->abi_state_index++;
+ if (h == hdr)
+ break;
+ }
+ mutex_exit(&state->arcs_mtx);
+ }
+}
+
/*
* Move the supplied buffer to the indicated state. The mutex
* for the buffer must be held by the caller.
hdr->b_spa = spa_load_guid(spa);
hdr->b_state = arc_anon;
hdr->b_arc_access = 0;
+ hdr->b_mru_hits = 0;
+ hdr->b_mru_ghost_hits = 0;
+ hdr->b_mfu_hits = 0;
+ hdr->b_mfu_ghost_hits = 0;
+ hdr->b_l2_hits = 0;
buf = kmem_cache_alloc(buf_cache, KM_PUSHPAGE);
buf->b_hdr = hdr;
buf->b_data = NULL;
ASSERT(list_link_active(&buf->b_arc_node));
} else {
buf->b_flags &= ~ARC_PREFETCH;
+ atomic_inc_32(&buf->b_mru_hits);
ARCSTAT_BUMP(arcstat_mru_hits);
}
buf->b_arc_access = now;
DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, buf);
arc_change_state(arc_mfu, buf, hash_lock);
}
+ atomic_inc_32(&buf->b_mru_hits);
ARCSTAT_BUMP(arcstat_mru_hits);
} else if (buf->b_state == arc_mru_ghost) {
arc_state_t *new_state;
buf->b_arc_access = ddi_get_lbolt();
arc_change_state(new_state, buf, hash_lock);
+ atomic_inc_32(&buf->b_mru_ghost_hits);
ARCSTAT_BUMP(arcstat_mru_ghost_hits);
} else if (buf->b_state == arc_mfu) {
/*
ASSERT(refcount_count(&buf->b_refcnt) == 0);
ASSERT(list_link_active(&buf->b_arc_node));
}
+ atomic_inc_32(&buf->b_mfu_hits);
ARCSTAT_BUMP(arcstat_mfu_hits);
buf->b_arc_access = ddi_get_lbolt();
} else if (buf->b_state == arc_mfu_ghost) {
DTRACE_PROBE1(new_state__mfu, arc_buf_hdr_t *, buf);
arc_change_state(new_state, buf, hash_lock);
+ atomic_inc_32(&buf->b_mfu_ghost_hits);
ARCSTAT_BUMP(arcstat_mfu_ghost_hits);
} else if (buf->b_state == arc_l2c_only) {
/*
DTRACE_PROBE1(l2arc__hit, arc_buf_hdr_t *, hdr);
ARCSTAT_BUMP(arcstat_l2_hits);
+ atomic_inc_32(&hdr->b_l2hdr->b_hits);
cb = kmem_zalloc(sizeof (l2arc_read_callback_t),
KM_PUSHPAGE);
nhdr->b_buf = buf;
nhdr->b_state = arc_anon;
nhdr->b_arc_access = 0;
+ nhdr->b_mru_hits = 0;
+ nhdr->b_mru_ghost_hits = 0;
+ nhdr->b_mfu_hits = 0;
+ nhdr->b_mfu_ghost_hits = 0;
+ nhdr->b_l2_hits = 0;
nhdr->b_flags = flags & ARC_L2_WRITING;
nhdr->b_l2hdr = NULL;
nhdr->b_datacnt = 1;
if (hdr->b_state != arc_anon)
arc_change_state(arc_anon, hdr, hash_lock);
hdr->b_arc_access = 0;
+ hdr->b_mru_hits = 0;
+ hdr->b_mru_ghost_hits = 0;
+ hdr->b_mfu_hits = 0;
+ hdr->b_mfu_ghost_hits = 0;
+ hdr->b_l2_hits = 0;
if (hash_lock)
mutex_exit(hash_lock);
list_create(&arc_l2c_only->arcs_list[ARC_BUFC_DATA],
sizeof (arc_buf_hdr_t), offsetof(arc_buf_hdr_t, b_arc_node));
+ arc_anon->arcs_state = ARC_STATE_ANON;
+ arc_mru->arcs_state = ARC_STATE_MRU;
+ arc_mru_ghost->arcs_state = ARC_STATE_MRU_GHOST;
+ arc_mfu->arcs_state = ARC_STATE_MFU;
+ arc_mfu_ghost->arcs_state = ARC_STATE_MFU_GHOST;
+ arc_l2c_only->arcs_state = ARC_STATE_L2C_ONLY;
+
buf_init();
arc_thread_exit = 0;
l2hdr->b_compress = ZIO_COMPRESS_OFF;
l2hdr->b_asize = ab->b_size;
l2hdr->b_tmp_cdata = ab->b_buf->b_data;
+ l2hdr->b_hits = 0;
buf_sz = ab->b_size;
ab->b_l2hdr = l2hdr;
#if defined(_KERNEL) && defined(HAVE_SPL)
EXPORT_SYMBOL(arc_read);
EXPORT_SYMBOL(arc_buf_remove_ref);
+EXPORT_SYMBOL(arc_buf_info);
EXPORT_SYMBOL(arc_getbuf_func);
EXPORT_SYMBOL(arc_add_prune_callback);
EXPORT_SYMBOL(arc_remove_prune_callback);
for (i = 0; i < DBUF_MUTEXES; i++)
mutex_init(&h->hash_mutexes[i], NULL, MUTEX_DEFAULT, NULL);
+
+ dbuf_stats_init(h);
}
void
dbuf_hash_table_t *h = &dbuf_hash_table;
int i;
+ dbuf_stats_destroy();
+
for (i = 0; i < DBUF_MUTEXES; i++)
mutex_destroy(&h->hash_mutexes[i]);
#if defined(_KERNEL) && defined(HAVE_SPL)
--- /dev/null
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/dbuf.h>
+#include <sys/dmu_objset.h>
+
+/*
+ * Calculate the index of the arc header for the state, disabled by default.
+ */
+int zfs_dbuf_state_index = 0;
+
+/*
+ * ==========================================================================
+ * Dbuf Hash Read Routines
+ * ==========================================================================
+ */
+typedef struct dbuf_stats_t {
+ kmutex_t lock;
+ kstat_t *kstat;
+ dbuf_hash_table_t *hash;
+ int idx;
+} dbuf_stats_t;
+
+static dbuf_stats_t dbuf_stats_hash_table;
+
+static int
+dbuf_stats_hash_table_headers(char *buf, size_t size)
+{
+ size = snprintf(buf, size - 1,
+ "%-88s | %-124s | %s\n"
+ "%-16s %-8s %-8s %-8s %-8s %-8s %-8s %-5s %-5s %5s | "
+ "%-5s %-5s %-6s %-8s %-6s %-8s %-12s "
+ "%-6s %-6s %-6s %-6s %-6s %-8s %-8s %-8s %-5s | "
+ "%-6s %-6s %-8s %-8s %-6s %-6s %-5s %-8s %-8s\n",
+ "dbuf", "arcbuf", "dnode", "pool", "objset", "object", "level",
+ "blkid", "offset", "dbsize", "meta", "state", "dbholds", "list",
+ "atype", "index", "flags", "count", "asize", "access", "mru", "gmru",
+ "mfu", "gmfu", "l2", "l2_dattr", "l2_asize", "l2_comp", "aholds",
+ "dtype", "btype", "data_bs", "meta_bs", "bsize",
+ "lvls", "dholds", "blocks", "dsize");
+ buf[size] = '\0';
+
+ return (0);
+}
+
+int
+__dbuf_stats_hash_table_data(char *buf, size_t size, dmu_buf_impl_t *db)
+{
+ arc_buf_info_t abi = { 0 };
+ dmu_object_info_t doi = { 0 };
+ dnode_t *dn = DB_DNODE(db);
+
+ if (db->db_buf)
+ arc_buf_info(db->db_buf, &abi, zfs_dbuf_state_index);
+
+ if (dn)
+ __dmu_object_info_from_dnode(dn, &doi);
+
+ size = snprintf(buf, size - 1,
+ "%-16s %-8llu %-8lld %-8lld %-8lld %-8llu %-8llu %-5d %-5d %-5lu | "
+ "%-5d %-5d %-6lld 0x%-6x %-6lu %-8llu %-12llu "
+ "%-6lu %-6lu %-6lu %-6lu %-6lu %-8llu %-8llu %-8d %-5lu | "
+ "%-6d %-6d %-8lu %-8lu %-6llu %-6lu %-5lu %-8llu %-8llu\n",
+ /* dmu_buf_impl_t */
+ spa_name(dn->dn_objset->os_spa),
+ (u_longlong_t)dmu_objset_id(db->db_objset),
+ (longlong_t)db->db.db_object,
+ (longlong_t)db->db_level,
+ (longlong_t)db->db_blkid,
+ (u_longlong_t)db->db.db_offset,
+ (u_longlong_t)db->db.db_size,
+ !!dbuf_is_metadata(db),
+ db->db_state,
+ (ulong_t)refcount_count(&db->db_holds),
+ /* arc_buf_info_t */
+ abi.abi_state_type,
+ abi.abi_state_contents,
+ (longlong_t)abi.abi_state_index,
+ abi.abi_flags,
+ (ulong_t)abi.abi_datacnt,
+ (u_longlong_t)abi.abi_size,
+ (u_longlong_t)abi.abi_access,
+ (ulong_t)abi.abi_mru_hits,
+ (ulong_t)abi.abi_mru_ghost_hits,
+ (ulong_t)abi.abi_mfu_hits,
+ (ulong_t)abi.abi_mfu_ghost_hits,
+ (ulong_t)abi.abi_l2arc_hits,
+ (u_longlong_t)abi.abi_l2arc_dattr,
+ (u_longlong_t)abi.abi_l2arc_asize,
+ abi.abi_l2arc_compress,
+ (ulong_t)abi.abi_holds,
+ /* dmu_object_info_t */
+ doi.doi_type,
+ doi.doi_bonus_type,
+ (ulong_t)doi.doi_data_block_size,
+ (ulong_t)doi.doi_metadata_block_size,
+ (u_longlong_t)doi.doi_bonus_size,
+ (ulong_t)doi.doi_indirection,
+ (ulong_t)refcount_count(&dn->dn_holds),
+ (u_longlong_t)doi.doi_fill_count,
+ (u_longlong_t)doi.doi_max_offset);
+ buf[size] = '\0';
+
+ return (size);
+}
+
+static int
+dbuf_stats_hash_table_data(char *buf, size_t size, void *data)
+{
+ dbuf_stats_t *dsh = (dbuf_stats_t *)data;
+ dbuf_hash_table_t *h = dsh->hash;
+ dmu_buf_impl_t *db;
+ int length, error = 0;
+
+ ASSERT3S(dsh->idx, >=, 0);
+ ASSERT3S(dsh->idx, <=, h->hash_table_mask);
+ memset(buf, 0, size);
+
+ mutex_enter(DBUF_HASH_MUTEX(h, dsh->idx));
+ for (db = h->hash_table[dsh->idx]; db != NULL; db = db->db_hash_next) {
+ /*
+ * Returning ENOMEM will cause the data and header functions
+ * to be called with a larger scratch buffers.
+ */
+ if (size < 512) {
+ error = ENOMEM;
+ break;
+ }
+
+ mutex_enter(&db->db_mtx);
+ mutex_exit(DBUF_HASH_MUTEX(h, dsh->idx));
+
+ length = __dbuf_stats_hash_table_data(buf, size, db);
+ buf += length;
+ size -= length;
+
+ mutex_exit(&db->db_mtx);
+ mutex_enter(DBUF_HASH_MUTEX(h, dsh->idx));
+ }
+ mutex_exit(DBUF_HASH_MUTEX(h, dsh->idx));
+
+ return (error);
+}
+
+static void *
+dbuf_stats_hash_table_addr(kstat_t *ksp, loff_t n)
+{
+ dbuf_stats_t *dsh = ksp->ks_private;
+
+ ASSERT(MUTEX_HELD(&dsh->lock));
+
+ if (n <= dsh->hash->hash_table_mask) {
+ dsh->idx = n;
+ return (dsh);
+ }
+
+ return (NULL);
+}
+
+static void
+dbuf_stats_hash_table_init(dbuf_hash_table_t *hash)
+{
+ dbuf_stats_t *dsh = &dbuf_stats_hash_table;
+ kstat_t *ksp;
+
+ mutex_init(&dsh->lock, NULL, MUTEX_DEFAULT, NULL);
+ dsh->hash = hash;
+
+ ksp = kstat_create("zfs", 0, "dbufs", "misc",
+ KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
+ dsh->kstat = ksp;
+
+ if (ksp) {
+ ksp->ks_lock = &dsh->lock;
+ ksp->ks_ndata = UINT32_MAX;
+ ksp->ks_private = dsh;
+ kstat_set_raw_ops(ksp, dbuf_stats_hash_table_headers,
+ dbuf_stats_hash_table_data, dbuf_stats_hash_table_addr);
+ kstat_install(ksp);
+ }
+}
+
+static void
+dbuf_stats_hash_table_destroy(void)
+{
+ dbuf_stats_t *dsh = &dbuf_stats_hash_table;
+ kstat_t *ksp;
+
+ ksp = dsh->kstat;
+ if (ksp)
+ kstat_delete(ksp);
+
+ mutex_destroy(&dsh->lock);
+}
+
+void
+dbuf_stats_init(dbuf_hash_table_t *hash)
+{
+ dbuf_stats_hash_table_init(hash);
+}
+
+void
+dbuf_stats_destroy(void)
+{
+ dbuf_stats_hash_table_destroy();
+}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+module_param(zfs_dbuf_state_index, int, 0644);
+MODULE_PARM_DESC(zfs_dbuf_state_index, "Calculate arc header index");
+#endif
}
void
-dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi)
+__dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi)
{
- dnode_phys_t *dnp;
+ dnode_phys_t *dnp = dn->dn_phys;
int i;
- rw_enter(&dn->dn_struct_rwlock, RW_READER);
- mutex_enter(&dn->dn_mtx);
-
- dnp = dn->dn_phys;
-
doi->doi_data_block_size = dn->dn_datablksz;
doi->doi_metadata_block_size = dn->dn_indblkshift ?
1ULL << dn->dn_indblkshift : 0;
doi->doi_fill_count = 0;
for (i = 0; i < dnp->dn_nblkptr; i++)
doi->doi_fill_count += dnp->dn_blkptr[i].blk_fill;
+}
+
+void
+dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi)
+{
+ rw_enter(&dn->dn_struct_rwlock, RW_READER);
+ mutex_enter(&dn->dn_mtx);
+
+ __dmu_object_info_from_dnode(dn, doi);
mutex_exit(&dn->dn_mtx);
rw_exit(&dn->dn_struct_rwlock);