unsigned long zfs_arc_min = 0;
unsigned long zfs_arc_meta_limit = 0;
unsigned long zfs_arc_meta_min = 0;
+unsigned long zfs_arc_dnode_limit = 0;
+unsigned long zfs_arc_dnode_reduce_percent = 10;
int zfs_arc_grow_retry = 0;
int zfs_arc_shrink_shift = 0;
int zfs_arc_p_min_shift = 0;
*/
kstat_named_t arcstat_metadata_size;
/*
- * Number of bytes consumed by various buffers and structures
- * not actually backed with ARC buffers. This includes bonus
- * buffers (allocated directly via zio_buf_* functions),
- * dmu_buf_impl_t structures (allocated via dmu_buf_impl_t
- * cache), and dnode_t structures (allocated via dnode_t cache).
+ * Number of bytes consumed by dmu_buf_impl_t objects.
*/
- kstat_named_t arcstat_other_size;
+ kstat_named_t arcstat_dbuf_size;
+ /*
+ * Number of bytes consumed by dnode_t objects.
+ */
+ kstat_named_t arcstat_dnode_size;
+ /*
+ * Number of bytes consumed by bonus buffers.
+ */
+ kstat_named_t arcstat_bonus_size;
/*
* Total number of bytes consumed by ARC buffers residing in the
* arc_anon state. This includes *all* buffers in the arc_anon
kstat_named_t arcstat_prune;
kstat_named_t arcstat_meta_used;
kstat_named_t arcstat_meta_limit;
+ kstat_named_t arcstat_dnode_limit;
kstat_named_t arcstat_meta_max;
kstat_named_t arcstat_meta_min;
kstat_named_t arcstat_sync_wait_for_async;
{ "hdr_size", KSTAT_DATA_UINT64 },
{ "data_size", KSTAT_DATA_UINT64 },
{ "metadata_size", KSTAT_DATA_UINT64 },
- { "other_size", KSTAT_DATA_UINT64 },
+ { "dbuf_size", KSTAT_DATA_UINT64 },
+ { "dnode_size", KSTAT_DATA_UINT64 },
+ { "bonus_size", KSTAT_DATA_UINT64 },
{ "anon_size", KSTAT_DATA_UINT64 },
{ "anon_evictable_data", KSTAT_DATA_UINT64 },
{ "anon_evictable_metadata", KSTAT_DATA_UINT64 },
{ "arc_prune", KSTAT_DATA_UINT64 },
{ "arc_meta_used", KSTAT_DATA_UINT64 },
{ "arc_meta_limit", KSTAT_DATA_UINT64 },
+ { "arc_dnode_limit", KSTAT_DATA_UINT64 },
{ "arc_meta_max", KSTAT_DATA_UINT64 },
{ "arc_meta_min", KSTAT_DATA_UINT64 },
{ "sync_wait_for_async", KSTAT_DATA_UINT64 },
#define arc_tempreserve ARCSTAT(arcstat_tempreserve)
#define arc_loaned_bytes ARCSTAT(arcstat_loaned_bytes)
#define arc_meta_limit ARCSTAT(arcstat_meta_limit) /* max size for metadata */
+#define arc_dnode_limit ARCSTAT(arcstat_dnode_limit) /* max size for dnodes */
#define arc_meta_min ARCSTAT(arcstat_meta_min) /* min size for metadata */
#define arc_meta_used ARCSTAT(arcstat_meta_used) /* size of metadata */
#define arc_meta_max ARCSTAT(arcstat_meta_max) /* max size of metadata */
+#define arc_dbuf_size ARCSTAT(arcstat_dbuf_size) /* dbuf metadata */
+#define arc_dnode_size ARCSTAT(arcstat_dnode_size) /* dnode metadata */
+#define arc_bonus_size ARCSTAT(arcstat_bonus_size) /* bonus buffer metadata */
#define arc_need_free ARCSTAT(arcstat_need_free) /* bytes to be freed */
#define arc_sys_free ARCSTAT(arcstat_sys_free) /* target system free bytes */
static boolean_t arc_is_overflowing(void);
static void arc_buf_watch(arc_buf_t *);
static void arc_tuning_update(void);
+static void arc_prune_async(int64_t);
static arc_buf_contents_t arc_buf_type(arc_buf_hdr_t *);
static uint32_t arc_bufc_to_flags(arc_buf_contents_t);
case ARC_SPACE_META:
ARCSTAT_INCR(arcstat_metadata_size, space);
break;
- case ARC_SPACE_OTHER:
- ARCSTAT_INCR(arcstat_other_size, space);
+ case ARC_SPACE_BONUS:
+ ARCSTAT_INCR(arcstat_bonus_size, space);
+ break;
+ case ARC_SPACE_DNODE:
+ ARCSTAT_INCR(arcstat_dnode_size, space);
+ break;
+ case ARC_SPACE_DBUF:
+ ARCSTAT_INCR(arcstat_dbuf_size, space);
break;
case ARC_SPACE_HDRS:
ARCSTAT_INCR(arcstat_hdr_size, space);
case ARC_SPACE_META:
ARCSTAT_INCR(arcstat_metadata_size, -space);
break;
- case ARC_SPACE_OTHER:
- ARCSTAT_INCR(arcstat_other_size, -space);
+ case ARC_SPACE_BONUS:
+ ARCSTAT_INCR(arcstat_bonus_size, -space);
+ break;
+ case ARC_SPACE_DNODE:
+ ARCSTAT_INCR(arcstat_dnode_size, -space);
+ break;
+ case ARC_SPACE_DBUF:
+ ARCSTAT_INCR(arcstat_dbuf_size, -space);
break;
case ARC_SPACE_HDRS:
ARCSTAT_INCR(arcstat_hdr_size, -space);
* we're evicting all available buffers.
*/
while (total_evicted < bytes || bytes == ARC_EVICT_ALL) {
+ int sublist_idx = multilist_get_random_index(ml);
+ uint64_t scan_evicted = 0;
+
+ /*
+ * Try to reduce pinned dnodes with a floor of arc_dnode_limit.
+ * Request that 10% of the LRUs be scanned by the superblock
+ * shrinker.
+ */
+ if (type == ARC_BUFC_DATA && arc_dnode_size > arc_dnode_limit)
+ arc_prune_async((arc_dnode_size - arc_dnode_limit) /
+ sizeof (dnode_t) / zfs_arc_dnode_reduce_percent);
+
/*
* Start eviction using a randomly selected sublist,
* this is to try and evenly balance eviction across all
* (e.g. index 0) would cause evictions to favor certain
* sublists over others.
*/
- int sublist_idx = multilist_get_random_index(ml);
- uint64_t scan_evicted = 0;
-
for (i = 0; i < num_sublists; i++) {
uint64_t bytes_remaining;
uint64_t bytes_evicted;
arc_c = arc_c_max;
arc_p = (arc_c >> 1);
arc_meta_limit = MIN(arc_meta_limit, (3 * arc_c_max) / 4);
+ arc_dnode_limit = arc_meta_limit / 10;
}
/* Valid range: 32M - <arc_c_max> */
(zfs_arc_meta_min <= arc_c_max)) {
arc_meta_min = zfs_arc_meta_min;
arc_meta_limit = MAX(arc_meta_limit, arc_meta_min);
+ arc_dnode_limit = arc_meta_limit / 10;
}
/* Valid range: <arc_meta_min> - <arc_c_max> */
(zfs_arc_meta_limit <= arc_c_max))
arc_meta_limit = zfs_arc_meta_limit;
+ /* Valid range: <arc_meta_min> - <arc_c_max> */
+ if ((zfs_arc_dnode_limit) && (zfs_arc_dnode_limit != arc_dnode_limit) &&
+ (zfs_arc_dnode_limit >= zfs_arc_meta_min) &&
+ (zfs_arc_dnode_limit <= arc_c_max))
+ arc_dnode_limit = zfs_arc_dnode_limit;
+
/* Valid range: 1 - N */
if (zfs_arc_grow_retry)
arc_grow_retry = zfs_arc_grow_retry;
arc_meta_max = 0;
/* Set limit to 3/4 of arc_c_max with a floor of arc_meta_min */
arc_meta_limit = MAX((3 * arc_c_max) / 4, arc_meta_min);
+ /* Default dnode limit is 10% of overall meta limit */
+ arc_dnode_limit = arc_meta_limit / 10;
/* Apply user specified tunings */
arc_tuning_update();
module_param(zfs_arc_sys_free, ulong, 0644);
MODULE_PARM_DESC(zfs_arc_sys_free, "System free memory target size in bytes");
+module_param(zfs_arc_dnode_limit, ulong, 0644);
+MODULE_PARM_DESC(zfs_arc_dnode_limit, "Minimum bytes of dnodes in arc");
+
+module_param(zfs_arc_dnode_reduce_percent, ulong, 0644);
+MODULE_PARM_DESC(zfs_arc_dnode_reduce_percent,
+ "Percentage of excess dnodes to try to unpin");
+
#endif
ASSERT3U(bonuslen, <=, db->db.db_size);
db->db.db_data = zio_buf_alloc(max_bonuslen);
- arc_space_consume(max_bonuslen, ARC_SPACE_OTHER);
+ arc_space_consume(max_bonuslen, ARC_SPACE_BONUS);
if (bonuslen < max_bonuslen)
bzero(db->db.db_data, max_bonuslen);
if (bonuslen)
dnode_t *dn = DB_DNODE(db);
int bonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots);
dr->dt.dl.dr_data = zio_buf_alloc(bonuslen);
- arc_space_consume(bonuslen, ARC_SPACE_OTHER);
+ arc_space_consume(bonuslen, ARC_SPACE_BONUS);
bcopy(db->db.db_data, dr->dt.dl.dr_data, bonuslen);
} else if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
int size = db->db.db_size;
int slots = DB_DNODE(db)->dn_num_slots;
int bonuslen = DN_SLOTS_TO_BONUSLEN(slots);
zio_buf_free(db->db.db_data, bonuslen);
- arc_space_return(bonuslen, ARC_SPACE_OTHER);
+ arc_space_return(bonuslen, ARC_SPACE_BONUS);
}
db->db.db_data = NULL;
db->db_state = DB_UNCACHED;
db->db.db_offset = DMU_BONUS_BLKID;
db->db_state = DB_UNCACHED;
/* the bonus dbuf is not placed in the hash table */
- arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER);
+ arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_DBUF);
return (db);
} else if (blkid == DMU_SPILL_BLKID) {
db->db.db_size = (blkptr != NULL) ?
dn->dn_unlisted_l0_blkid = db->db_blkid + 1;
db->db_state = DB_UNCACHED;
mutex_exit(&dn->dn_dbufs_mtx);
- arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER);
+ arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_DBUF);
if (parent && parent != dn->dn_dbuf)
dbuf_add_ref(parent, db);
ASSERT(db->db_data_pending == NULL);
kmem_cache_free(dbuf_cache, db);
- arc_space_return(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER);
+ arc_space_return(sizeof (dmu_buf_impl_t), ARC_SPACE_DBUF);
}
typedef struct dbuf_prefetch_arg {
int slots = DB_DNODE(db)->dn_num_slots;
int bonuslen = DN_SLOTS_TO_BONUSLEN(slots);
zio_buf_free(*datap, bonuslen);
- arc_space_return(bonuslen, ARC_SPACE_OTHER);
+ arc_space_return(bonuslen, ARC_SPACE_BONUS);
}
db->db_data_pending = NULL;
drp = &db->db_last_dirty;