From: Prakash Surya Date: Fri, 3 Jan 2014 19:40:52 +0000 (-0800) Subject: Prune metadata from ghost lists in arc_adjust_meta X-Git-Tag: zfs-0.6.3~95^2~4 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=94520ca;p=zfs Prune metadata from ghost lists in arc_adjust_meta To maintain a strict limit on the metadata contained in the arc, while preventing the arc buffer headers from completely consuming the "arc_meta_used" space, we need to evict metadata buffers from the arc's ghost lists along with the regular lists. This change modifies arc_adjust_meta such that it more closely models the adjustments made in arc_adjust. "arc_meta_used" is used similarly to "arc_size", and "arc_meta_limit" is used similarly to "arc_c". Testing metadata intensive workloads (e.g. creating, copying, and removing millions of small files and/or directories) has shown this change to make a dramatic improvement to the hit rate maintained in the arc. While I think there is still room for improvement, this is a big step in the right direction. In addition, zpl_free_cached_objects was made into a no-op as I'm not yet sure how to properly implement that function. Signed-off-by: Prakash Surya Signed-off-by: Brian Behlendorf Issue #2110 --- diff --git a/include/sys/arc.h b/include/sys/arc.h index 9d68d3b43..5c8c1c1a3 100644 --- a/include/sys/arc.h +++ b/include/sys/arc.h @@ -160,7 +160,6 @@ void arc_freed(spa_t *spa, const blkptr_t *bp); void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private); int arc_buf_evict(arc_buf_t *buf); -void arc_adjust_meta(int64_t adjustment, boolean_t may_prune); void arc_flush(spa_t *spa); void arc_tempreserve_clear(uint64_t reserve); int arc_tempreserve_space(uint64_t reserve, uint64_t txg); diff --git a/module/zfs/arc.c b/module/zfs/arc.c index ad2e8a92d..9c2d0eaab 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -2268,24 +2268,61 @@ arc_do_user_evicts(void) * This is only used to enforce the tunable arc_meta_limit, if we are * unable to evict enough buffers notify the user via the prune callback. */ -void -arc_adjust_meta(int64_t adjustment, boolean_t may_prune) +static void +arc_adjust_meta(void) { - int64_t delta; + int64_t adjustmnt, delta; - if (adjustment > 0 && arc_mru->arcs_lsize[ARC_BUFC_METADATA] > 0) { - delta = MIN(arc_mru->arcs_lsize[ARC_BUFC_METADATA], adjustment); + /* + * This slightly differs than the way we evict from the mru in + * arc_adjust because we don't have a "target" value (i.e. no + * "meta" arc_p). As a result, I think we can completely + * cannibalize the metadata in the MRU before we evict the + * metadata from the MFU. I think we probably need to implement a + * "metadata arc_p" value to do this properly. + */ + adjustmnt = arc_meta_used - arc_meta_limit; + + if (adjustmnt > 0 && arc_mru->arcs_lsize[ARC_BUFC_METADATA] > 0) { + delta = MIN(arc_mru->arcs_lsize[ARC_BUFC_METADATA], adjustmnt); arc_evict(arc_mru, 0, delta, FALSE, ARC_BUFC_METADATA); - adjustment -= delta; + adjustmnt -= delta; } - if (adjustment > 0 && arc_mfu->arcs_lsize[ARC_BUFC_METADATA] > 0) { - delta = MIN(arc_mfu->arcs_lsize[ARC_BUFC_METADATA], adjustment); + /* + * We can't afford to recalculate adjustmnt here. If we do, + * new metadata buffers can sneak into the MRU or ANON lists, + * thus penalize the MFU metadata. Although the fudge factor is + * small, it has been empirically shown to be significant for + * certain workloads (e.g. creating many empty directories). As + * such, we use the original calculation for adjustmnt, and + * simply decrement the amount of data evicted from the MRU. + */ + + if (adjustmnt > 0 && arc_mfu->arcs_lsize[ARC_BUFC_METADATA] > 0) { + delta = MIN(arc_mfu->arcs_lsize[ARC_BUFC_METADATA], adjustmnt); arc_evict(arc_mfu, 0, delta, FALSE, ARC_BUFC_METADATA); - adjustment -= delta; } - if (may_prune && (adjustment > 0) && (arc_meta_used > arc_meta_limit)) + adjustmnt = arc_mru->arcs_lsize[ARC_BUFC_METADATA] + + arc_mru_ghost->arcs_lsize[ARC_BUFC_METADATA] - arc_meta_limit; + + if (adjustmnt > 0 && arc_mru_ghost->arcs_lsize[ARC_BUFC_METADATA] > 0) { + delta = MIN(adjustmnt, + arc_mru_ghost->arcs_lsize[ARC_BUFC_METADATA]); + arc_evict_ghost(arc_mru_ghost, 0, delta, ARC_BUFC_METADATA); + } + + adjustmnt = arc_mru_ghost->arcs_lsize[ARC_BUFC_METADATA] + + arc_mfu_ghost->arcs_lsize[ARC_BUFC_METADATA] - arc_meta_limit; + + if (adjustmnt > 0 && arc_mfu_ghost->arcs_lsize[ARC_BUFC_METADATA] > 0) { + delta = MIN(adjustmnt, + arc_mfu_ghost->arcs_lsize[ARC_BUFC_METADATA]); + arc_evict_ghost(arc_mfu_ghost, 0, delta, ARC_BUFC_METADATA); + } + + if (arc_meta_used > arc_meta_limit) arc_do_user_prune(zfs_arc_meta_prune); } @@ -2405,7 +2442,6 @@ static void arc_adapt_thread(void) { callb_cpr_t cpr; - int64_t prune; CALLB_CPR_INIT(&cpr, &arc_reclaim_thr_lock, callb_generic_cpr, FTAG); @@ -2441,14 +2477,7 @@ arc_adapt_thread(void) if (arc_no_grow && ddi_get_lbolt() >= arc_grow_time) arc_no_grow = FALSE; - /* - * Keep meta data usage within limits, arc_shrink() is not - * used to avoid collapsing the arc_c value when only the - * arc_meta_limit is being exceeded. - */ - prune = (int64_t)arc_meta_used - (int64_t)arc_meta_limit; - if (prune > 0) - arc_adjust_meta(prune, B_TRUE); + arc_adjust_meta(); arc_adjust(); diff --git a/module/zfs/zpl_super.c b/module/zfs/zpl_super.c index b4e7b6ed0..45639a6dd 100644 --- a/module/zfs/zpl_super.c +++ b/module/zfs/zpl_super.c @@ -342,7 +342,7 @@ zpl_nr_cached_objects(struct super_block *sb) static void zpl_free_cached_objects(struct super_block *sb, int nr_to_scan) { - arc_adjust_meta(nr_to_scan * sizeof (znode_t), B_FALSE); + /* noop */ } #endif /* HAVE_FREE_CACHED_OBJECTS */