]> granicus.if.org Git - spl/commitdiff
kmem slab magazine ageing deadlock
authorBrian Behlendorf <behlendorf1@llnl.gov>
Tue, 17 Feb 2009 23:52:18 +0000 (15:52 -0800)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Tue, 17 Feb 2009 23:52:18 +0000 (15:52 -0800)
- The previous magazine ageing sceme relied on the on_each_cpu()
  function to call spl_magazine_age() on each cpu.  It turns out
  this could deadlock with do_flush_tlb_all() which also relies
  on the IPI based on_each_cpu().  To avoid this problem a per-
  magazine delayed work item is created and indepentantly
  scheduled to the correct cpu removing the need for on_each_cpu().
- Additionally two unused fields were removed from the type
  spl_kmem_cache_t, they were hold overs from previous cleanup.
    - struct work_struct work
    - struct timer_list timer

include/sys/kmem.h
module/spl/spl-kmem.c

index dc66a91536a413a52186e8287ddb3bb8ea698e3b..cad652c918bbc0672710088e5ca244f6dc478f37 100644 (file)
@@ -250,10 +250,12 @@ typedef void (*spl_kmem_dtor_t)(void *, void *);
 typedef void (*spl_kmem_reclaim_t)(void *);
 
 typedef struct spl_kmem_magazine {
-        uint32_t               skm_magic;      /* Sanity magic */
+       uint32_t                skm_magic;      /* Sanity magic */
        uint32_t                skm_avail;      /* Available objects */
        uint32_t                skm_size;       /* Magazine size */
        uint32_t                skm_refill;     /* Batch refill size */
+       struct spl_kmem_cache   *skm_cache;     /* Owned by cache */
+       struct delayed_work     skm_work;       /* Magazine reclaim work */
        unsigned long           skm_age;        /* Last cache access */
        void                    *skm_objs[0];   /* Object pointers */
 } spl_kmem_magazine_t;
@@ -296,8 +298,6 @@ typedef struct spl_kmem_cache {
        uint32_t                skc_reap;       /* Slab reclaim count */
        atomic_t                skc_ref;        /* Ref count callers */
        struct delayed_work     skc_work;       /* Slab reclaim work */
-        struct work_struct work;
-        struct timer_list timer;
        struct list_head        skc_list;       /* List of caches linkage */
        struct list_head        skc_complete_list;/* Completely alloc'ed */
        struct list_head        skc_partial_list; /* Partially alloc'ed */
index ba7e19b4eb42102b498b0a9e96bcc5b9e1d2546d..18613e799c2df30eda3968af14e156702a1ac599 100644 (file)
@@ -932,12 +932,22 @@ spl_slab_reclaim(spl_kmem_cache_t *skc, int count, int flag)
 static void
 spl_magazine_age(void *data)
 {
-       spl_kmem_cache_t *skc = data;
-       spl_kmem_magazine_t *skm = skc->skc_mag[smp_processor_id()];
+       spl_kmem_magazine_t *skm =
+               spl_get_work_data(data, spl_kmem_magazine_t, skm_work.work);
+       spl_kmem_cache_t *skc = skm->skm_cache;
+       int i = smp_processor_id();
+
+       ASSERT(skm->skm_magic == SKM_MAGIC);
+       ASSERT(skc->skc_magic == SKC_MAGIC);
+       ASSERT(skc->skc_mag[i] == skm);
 
        if (skm->skm_avail > 0 &&
            time_after(jiffies, skm->skm_age + skc->skc_delay * HZ))
                (void)spl_cache_flush(skc, skm, skm->skm_refill);
+
+       if (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags))
+               schedule_delayed_work_on(i, &skm->skm_work,
+                                        skc->skc_delay / 3 * HZ);
 }
 
 /*
@@ -949,12 +959,11 @@ spl_magazine_age(void *data)
 static void
 spl_cache_age(void *data)
 {
-        spl_kmem_cache_t *skc =
+       spl_kmem_cache_t *skc =
                spl_get_work_data(data, spl_kmem_cache_t, skc_work.work);
 
        ASSERT(skc->skc_magic == SKC_MAGIC);
        spl_slab_reclaim(skc, skc->skc_reap, 0);
-       spl_on_each_cpu(spl_magazine_age, skc, 0);
 
        if (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags))
                schedule_delayed_work(&skc->skc_work, skc->skc_delay / 3 * HZ);
@@ -1050,6 +1059,8 @@ spl_magazine_alloc(spl_kmem_cache_t *skc, int node)
                skm->skm_avail = 0;
                skm->skm_size = skc->skc_mag_size;
                skm->skm_refill = skc->skc_mag_refill;
+               skm->skm_cache = skc;
+               spl_init_delayed_work(&skm->skm_work, spl_magazine_age, skm);
                skm->skm_age = jiffies;
        }
 
@@ -1095,6 +1106,11 @@ spl_magazine_create(spl_kmem_cache_t *skc)
                }
        }
 
+       /* Only after everything is allocated schedule magazine work */
+       for_each_online_cpu(i)
+               schedule_delayed_work_on(i, &skc->skc_mag[i]->skm_work,
+                                        skc->skc_delay / 3 * HZ);
+
        RETURN(0);
 }
 
@@ -1245,6 +1261,7 @@ void
 spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
 {
        DECLARE_WAIT_QUEUE_HEAD(wq);
+       int i;
        ENTRY;
 
        ASSERT(skc->skc_magic == SKC_MAGIC);
@@ -1256,6 +1273,9 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
        /* Cancel any and wait for any pending delayed work */
        ASSERT(!test_and_set_bit(KMC_BIT_DESTROY, &skc->skc_flags));
        cancel_delayed_work(&skc->skc_work);
+       for_each_online_cpu(i)
+               cancel_delayed_work(&skc->skc_mag[i]->skm_work);
+
        flush_scheduled_work();
 
        /* Wait until all current callers complete, this is mainly