]> granicus.if.org Git - zfs/commitdiff
Don't run the reaper if we didn't shrink the cache
authorDebabrata Banerjee <dbanerje@akamai.com>
Wed, 15 Mar 2017 23:31:56 +0000 (19:31 -0400)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Tue, 2 May 2017 19:50:13 +0000 (15:50 -0400)
Calling it when nothing is evictable will cause extra kswapd cpu. Also
if we didn't shrink it's unlikely to have memory to reap because we
likely just called it microseconds ago. The exception is if we are in
direct reclaim.

You can see how hard this is being hit in kswapd with a light test
workload:

  34.95%  [zfs]             [k] arc_kmem_reap_now
   5.40%  [spl]             [k] spl_kmem_cache_reap_now
   3.79%  [kernel]          [k] _raw_spin_lock
   2.86%  [spl]             [k] __spl_kmem_cache_generic_shrinker.isra.7
   2.70%  [kernel]          [k] shrink_slab.part.37
   1.93%  [kernel]          [k] isolate_lru_pages.isra.43
   1.55%  [kernel]          [k] __wake_up_bit
   1.20%  [kernel]          [k] super_cache_count
   1.20%  [kernel]          [k] __radix_tree_lookup

With ZFS just mounted but only ext4/pagecache memory pressure
arc_kmem_reap_now still consumes excessive CPU:

  12.69%  [kernel]  [k] isolate_lru_pages.isra.43
  10.76%  [kernel]  [k] free_pcppages_bulk
   7.98%  [kernel]  [k] drop_buffers
   7.31%  [kernel]  [k] shrink_page_list
   6.44%  [zfs]     [k] arc_kmem_reap_now
   4.19%  [kernel]  [k] free_hot_cold_page
   4.00%  [kernel]  [k] __slab_free
   3.95%  [kernel]  [k] __isolate_lru_page
   3.09%  [kernel]  [k] __radix_tree_lookup

Same pagecache only workload as above with this patch series:

  11.58%  [kernel]  [k] isolate_lru_pages.isra.43
  11.20%  [kernel]  [k] drop_buffers
   9.67%  [kernel]  [k] free_pcppages_bulk
   8.44%  [kernel]  [k] shrink_page_list
   4.86%  [kernel]  [k] __isolate_lru_page
   4.43%  [kernel]  [k] free_hot_cold_page
   4.00%  [kernel]  [k] __slab_free
   3.44%  [kernel]  [k] __radix_tree_lookup

   (arc_kmem_reap_now has 0 samples in perf)

AKAMAI: zfs: CR 3695042
Reviewed-by: Tim Chase <tim@chase2k.com>
Reviewed-by: Richard Yao <ryao@gentoo.org>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Debabrata Banerjee <dbanerje@akamai.com>
Issue #6035

module/zfs/arc.c

index 79b04f45564f6111acb2d2818b881ca71b38e621..efac973bae6822131b332f69aaa4964e4834104d 100644 (file)
@@ -4423,12 +4423,12 @@ __arc_shrinker_func(struct shrinker *shrink, struct shrink_control *sc)
 
        /*
         * Evict the requested number of pages by shrinking arc_c the
-        * requested amount.  If there is nothing left to evict just
-        * reap whatever we can from the various arc slabs.
+        * requested amount.
         */
        if (pages > 0) {
                arc_shrink(ptob(sc->nr_to_scan));
-               arc_kmem_reap_now();
+               if (current_is_kswapd())
+                       arc_kmem_reap_now();
 #ifdef HAVE_SPLIT_SHRINKER_CALLBACK
                pages = MAX(pages - btop(arc_evictable_memory()), 0);
 #else
@@ -4439,10 +4439,8 @@ __arc_shrinker_func(struct shrinker *shrink, struct shrink_control *sc)
                 */
                cv_broadcast(&arc_reclaim_waiters_cv);
 
-       } else {
-               arc_kmem_reap_now();
+       } else
                pages = SHRINK_STOP;
-       }
 
        /*
         * When direct reclaim is observed it usually indicates a rapid
@@ -4455,6 +4453,7 @@ __arc_shrinker_func(struct shrinker *shrink, struct shrink_control *sc)
                ARCSTAT_BUMP(arcstat_memory_indirect_count);
        } else {
                arc_no_grow = B_TRUE;
+               arc_kmem_reap_now();
                ARCSTAT_BUMP(arcstat_memory_direct_count);
        }