Implement per-cpu local caches. This seems to have bough me another

author behlendo <behlendo@7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c>

Wed, 25 Jun 2008 20:57:45 +0000 (20:57 +0000)

committer behlendo <behlendo@7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c>

Wed, 25 Jun 2008 20:57:45 +0000 (20:57 +0000)
author behlendo <behlendo@7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c>
Wed, 25 Jun 2008 20:57:45 +0000 (20:57 +0000)
committer behlendo <behlendo@7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c>
Wed, 25 Jun 2008 20:57:45 +0000 (20:57 +0000)
diff --git a/include/sys/kmem.h b/include/sys/kmem.h

index fb0c22e3aa5a563a990fc1db300b0b27f85d165c..25269335d41283d4a506933b53bac772601cc1bd 100644 (file)
--- a/include/sys/kmem.h
+++ b/include/sys/kmem.h
@@ -360,6 +360,7 @@ kmem_debugging(void)
  extern int kmem_set_warning(int flag);
  
  
+#define SKM_MAGIC                      0x2e2e2e2e
  #define SKO_MAGIC                      0x20202020
  #define SKS_MAGIC                      0x22222222
  #define SKC_MAGIC                      0x2c2c2c2c
@@ -376,6 +377,15 @@ typedef int (*spl_kmem_ctor_t)(void *, void *, int);
  typedef void (*spl_kmem_dtor_t)(void *, void *);
  typedef void (*spl_kmem_reclaim_t)(void *);
  
+typedef struct spl_kmem_magazine {
+        uint32_t               skm_magic;      /* Sanity magic */
+       uint32_t                skm_avail;      /* Available objects */
+       uint32_t                skm_size;       /* Magazine size */
+       uint32_t                skm_refill;     /* Batch refill size */
+       unsigned long           skm_age;        /* Last cache access */
+       void                    *skm_objs[0];   /* Object pointers */
+} spl_kmem_magazine_t;
+
  typedef struct spl_kmem_obj {
          uint32_t               sko_magic;      /* Sanity magic */
         uint32_t                sko_flags;      /* Per object flags */
@@ -392,13 +402,16 @@ typedef struct spl_kmem_slab {
         struct list_head        sks_list;       /* Slab list linkage */
         struct list_head        sks_free_list;  /* Free object list */
         unsigned long           sks_age;        /* Last modify jiffie */
-       atomic_t                sks_ref;        /* Ref count used objects */
+       uint32_t                sks_ref;        /* Ref count used objects */
  } spl_kmem_slab_t;
  
  typedef struct spl_kmem_cache {
          uint32_t               skc_magic;      /* Sanity magic */
          uint32_t               skc_name_size;  /* Name length */
          char                   *skc_name;      /* Name string */
+       spl_kmem_magazine_t     *skc_mag[NR_CPUS]; /* Per-CPU warm cache */
+       uint32_t                skc_mag_size;   /* Magazine size */
+       uint32_t                skc_mag_refill; /* Magazine refill count */
          spl_kmem_ctor_t                skc_ctor;       /* Constructor */
          spl_kmem_dtor_t                skc_dtor;       /* Destructor */
          spl_kmem_reclaim_t      skc_reclaim;   /* Reclaimator */
@@ -427,8 +440,8 @@ typedef struct spl_kmem_cache {
         uint64_t                skc_obj_total;  /* Obj total current */
         uint64_t                skc_obj_alloc;  /* Obj alloc current */
         uint64_t                skc_obj_max;    /* Obj max historic */
-       uint64_t                skc_hash_depth; /* Hash depth */
-       uint64_t                skc_hash_max;   /* Hash depth max */
+       uint64_t                skc_hash_depth; /* Lazy hash depth */
+       uint64_t                skc_hash_count; /* Hash entries current */
  } spl_kmem_cache_t;
  
  extern spl_kmem_cache_t *
diff --git a/modules/spl/spl-kmem.c b/modules/spl/spl-kmem.c

index ec12aca217efbd4da6a31b30d5c35a4b6c952741..708b01cc53ae4c0a1faf4f7a4ab48c9303908494 100644 (file)
--- a/modules/spl/spl-kmem.c
+++ b/modules/spl/spl-kmem.c
@@ -109,13 +109,10 @@ EXPORT_SYMBOL(kmem_set_warning);
   * small virtual address space on 32bit arches.  This will seriously
   * constrain the size of the slab caches and their performance.
   *
- * XXX: Refactor the below code in to smaller functions.  This works
- *      for a first pass but each function is doing to much.
- *
   * XXX: Implement SPL proc interface to export full per cache stats.
   *
   * XXX: Implement work requests to keep an eye on each cache and
- *      shrink them via slab_reclaim() when they are wasting lots
+ *      shrink them via spl_slab_reclaim() when they are wasting lots
   *      of space.  Currently this process is driven by the reapers.
   *
   * XXX: Implement proper small cache object support by embedding
@@ -138,6 +135,8 @@ EXPORT_SYMBOL(kmem_set_warning);
   *
   * XXX: Slab coloring may also yield performance improvements and would
   *      be desirable to implement.
+ *
+ * XXX: Proper hardware cache alignment would be good too.
   */
  
  /* Ensure the __kmem_cache_create/__kmem_cache_destroy macros are
@@ -155,18 +154,22 @@ static struct rw_semaphore spl_kmem_cache_sem;    /* Cache list lock */
  static kmem_cache_t *spl_slab_cache;           /* Cache for slab structs */
  static kmem_cache_t *spl_obj_cache;            /* Cache for obj structs */
  
+static int spl_cache_flush(spl_kmem_cache_t *skc,
+                          spl_kmem_magazine_t *skm, int flush);
+
  #ifdef HAVE_SET_SHRINKER
  static struct shrinker *spl_kmem_cache_shrinker;
  #else
-static int kmem_cache_generic_shrinker(int nr_to_scan, unsigned int gfp_mask);
+static int spl_kmem_cache_generic_shrinker(int nr_to_scan,
+                                          unsigned int gfp_mask);
  static struct shrinker spl_kmem_cache_shrinker = {
-       .shrink = kmem_cache_generic_shrinker,
+       .shrink = spl_kmem_cache_generic_shrinker,
         .seeks = KMC_DEFAULT_SEEKS,
  };
  #endif
  
  static spl_kmem_slab_t *
-slab_alloc(spl_kmem_cache_t *skc, int flags) {
+spl_slab_alloc(spl_kmem_cache_t *skc, int flags) {
         spl_kmem_slab_t *sks;
         spl_kmem_obj_t *sko, *n;
         int i;
@@ -182,7 +185,7 @@ slab_alloc(spl_kmem_cache_t *skc, int flags) {
         sks->sks_cache = skc;
         INIT_LIST_HEAD(&sks->sks_list);
         INIT_LIST_HEAD(&sks->sks_free_list);
-       atomic_set(&sks->sks_ref, 0);
+       sks->sks_ref = 0;
  
         for (i = 0; i < sks->sks_objs; i++) {
                 sko = kmem_cache_alloc(spl_obj_cache, flags);
@@ -224,21 +227,19 @@ out:
   * be called with the 'skc->skc_lock' held.
   *                         */
  static void
-slab_free(spl_kmem_slab_t *sks) {
+spl_slab_free(spl_kmem_slab_t *sks) {
         spl_kmem_cache_t *skc;
         spl_kmem_obj_t *sko, *n;
         int i = 0;
         ENTRY;
  
         ASSERT(sks->sks_magic == SKS_MAGIC);
-       ASSERT(atomic_read(&sks->sks_ref) == 0);
+       ASSERT(sks->sks_ref == 0);
         skc = sks->sks_cache;
         skc->skc_obj_total -= sks->sks_objs;
         skc->skc_slab_total--;
  
-//#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
         ASSERT(spin_is_locked(&skc->skc_lock));
-//#endif
  
         list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list) {
                 ASSERT(sko->sko_magic == SKO_MAGIC);
@@ -261,15 +262,13 @@ slab_free(spl_kmem_slab_t *sks) {
  }
  
  static int
-__slab_reclaim(spl_kmem_cache_t *skc)
+__spl_slab_reclaim(spl_kmem_cache_t *skc)
  {
         spl_kmem_slab_t *sks, *m;
         int rc = 0;
         ENTRY;
  
-//#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
         ASSERT(spin_is_locked(&skc->skc_lock));
-//#endif
         /*
          * Free empty slabs which have not been touched in skc_delay
          * seconds.  This delay time is important to avoid thrashing.
@@ -277,11 +276,11 @@ __slab_reclaim(spl_kmem_cache_t *skc)
          */
          list_for_each_entry_safe_reverse(sks, m, &skc->skc_partial_list,
                                          sks_list) {
-               if (atomic_read(&sks->sks_ref) > 0)
+               if (sks->sks_ref > 0)
                        break;
  
                 if (time_after(jiffies, sks->sks_age + skc->skc_delay * HZ)) {
-                       slab_free(sks);
+                       spl_slab_free(sks);
                         rc++;
                 }
         }
@@ -291,18 +290,110 @@ __slab_reclaim(spl_kmem_cache_t *skc)
  }
  
  static int
-slab_reclaim(spl_kmem_cache_t *skc)
+spl_slab_reclaim(spl_kmem_cache_t *skc)
  {
         int rc;
         ENTRY;
  
         spin_lock(&skc->skc_lock);
-       rc = __slab_reclaim(skc);
+       rc = __spl_slab_reclaim(skc);
         spin_unlock(&skc->skc_lock);
  
         RETURN(rc);
  }
  
+static int
+spl_magazine_size(spl_kmem_cache_t *skc)
+{
+       int size;
+       ENTRY;
+
+       /* Guesses for reasonable magazine sizes, they
+        * should really adapt based on observed usage. */
+       if (skc->skc_obj_size > (PAGE_SIZE * 256))
+               size = 1;
+       else if (skc->skc_obj_size > (PAGE_SIZE * 32))
+               size = 4;
+       else if (skc->skc_obj_size > (PAGE_SIZE))
+               size = 16;
+       else if (skc->skc_obj_size > (PAGE_SIZE / 4))
+               size = 32;
+       else if (skc->skc_obj_size > (PAGE_SIZE / 16))
+               size = 64;
+       else
+               size = 128;
+
+       RETURN(size);
+}
+
+static spl_kmem_magazine_t *
+spl_magazine_alloc(spl_kmem_cache_t *skc, int node)
+{
+       spl_kmem_magazine_t *skm;
+       int size = sizeof(spl_kmem_magazine_t) +
+                  sizeof(void *) * skc->skc_mag_size;
+       ENTRY;
+
+       skm = kmalloc_node(size, GFP_KERNEL, node);
+       if (skm) {
+               skm->skm_magic = SKM_MAGIC;
+               skm->skm_avail = 0;
+               skm->skm_size = skc->skc_mag_size;
+               skm->skm_refill = skc->skc_mag_refill;
+               skm->skm_age = jiffies;
+       }
+
+       RETURN(skm);
+}
+
+static void
+spl_magazine_free(spl_kmem_magazine_t *skm)
+{
+       ENTRY;
+       ASSERT(skm->skm_magic == SKM_MAGIC);
+       ASSERT(skm->skm_avail == 0);
+       kfree(skm);
+       EXIT;
+}
+
+static int
+spl_magazine_create(spl_kmem_cache_t *skc)
+{
+       int i;
+       ENTRY;
+
+       skc->skc_mag_size = spl_magazine_size(skc);
+       skc->skc_mag_refill = (skc->skc_mag_size + 1)  / 2;
+
+       for_each_online_cpu(i) {
+               skc->skc_mag[i] = spl_magazine_alloc(skc, cpu_to_node(i));
+               if (!skc->skc_mag[i]) {
+                       for (i--; i >= 0; i--)
+                               spl_magazine_free(skc->skc_mag[i]);
+
+                       RETURN(-ENOMEM);
+               }
+       }
+
+       RETURN(0);
+}
+
+static void
+spl_magazine_destroy(spl_kmem_cache_t *skc)
+{
+        spl_kmem_magazine_t *skm;
+       int i;
+       ENTRY;
+
+       for_each_online_cpu(i) {
+               skm = skc->skc_mag[i];
+               (void)spl_cache_flush(skc, skm, skm->skm_avail);
+               spl_magazine_free(skm);
+       }
+
+       EXIT;
+}
+
  spl_kmem_cache_t *
  spl_kmem_cache_create(char *name, size_t size, size_t align,
                        spl_kmem_ctor_t ctor,
@@ -311,7 +402,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
                        void *priv, void *vmp, int flags)
  {
          spl_kmem_cache_t *skc;
-       int i, kmem_flags = KM_SLEEP;
+       int i, rc, kmem_flags = KM_SLEEP;
         ENTRY;
  
          /* We may be called when there is a non-zero preempt_count or
@@ -326,7 +417,6 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
                 RETURN(NULL);
  
         skc->skc_magic = SKC_MAGIC;
-
         skc->skc_name_size = strlen(name) + 1;
         skc->skc_name = (char *)kmem_alloc(skc->skc_name_size, kmem_flags);
         if (skc->skc_name == NULL) {
@@ -355,6 +445,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
         if (skc->skc_hash == NULL) {
                 kmem_free(skc->skc_name, skc->skc_name_size);
                 kmem_free(skc, sizeof(*skc));
+               RETURN(NULL);
         }
  
         for (i = 0; i < skc->skc_hash_elts; i++)
@@ -374,7 +465,15 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
         skc->skc_obj_alloc = 0;
         skc->skc_obj_max = 0;
         skc->skc_hash_depth = 0;
-       skc->skc_hash_max = 0;
+       skc->skc_hash_count = 0;
+
+       rc = spl_magazine_create(skc);
+       if (rc) {
+               kmem_free(skc->skc_hash, skc->skc_hash_size);
+               kmem_free(skc->skc_name, skc->skc_name_size);
+               kmem_free(skc, sizeof(*skc));
+               RETURN(NULL);
+       }
  
         down_write(&spl_kmem_cache_sem);
          list_add_tail(&skc->skc_list, &spl_kmem_cache_list);
@@ -385,8 +484,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
  EXPORT_SYMBOL(spl_kmem_cache_create);
  
  /* The caller must ensure there are no racing calls to
- * spl_kmem_cache_alloc() for this spl_kmem_cache_t when
- * it is being destroyed.
+ * spl_kmem_cache_alloc() for this spl_kmem_cache_t.
   */
  void
  spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
@@ -398,20 +496,22 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
          list_del_init(&skc->skc_list);
          up_write(&spl_kmem_cache_sem);
  
+       spl_magazine_destroy(skc);
         spin_lock(&skc->skc_lock);
  
         /* Validate there are no objects in use and free all the
-        * spl_kmem_slab_t, spl_kmem_obj_t, and object buffers.
-        */
+        * spl_kmem_slab_t, spl_kmem_obj_t, and object buffers. */
         ASSERT(list_empty(&skc->skc_complete_list));
+       ASSERTF(skc->skc_hash_count == 0, "skc->skc_hash_count=%d\n",
+               skc->skc_hash_count);
  
          list_for_each_entry_safe(sks, m, &skc->skc_partial_list, sks_list)
-               slab_free(sks);
+               spl_slab_free(sks);
  
         kmem_free(skc->skc_hash, skc->skc_hash_size);
         kmem_free(skc->skc_name, skc->skc_name_size);
-       kmem_free(skc, sizeof(*skc));
         spin_unlock(&skc->skc_lock);
+       kmem_free(skc, sizeof(*skc));
  
         EXIT;
  }
@@ -427,88 +527,92 @@ spl_hash_ptr(void *ptr, unsigned int bits)
         return hash_long((unsigned long)ptr >> PAGE_SHIFT, bits);
  }
  
-#ifndef list_first_entry
-#define list_first_entry(ptr, type, member) \
-               list_entry((ptr)->next, type, member)
-#endif
+static spl_kmem_obj_t *
+spl_hash_obj(spl_kmem_cache_t *skc, void *obj)
+{
+        struct hlist_node *node;
+       spl_kmem_obj_t *sko = NULL;
+       unsigned long key = spl_hash_ptr(obj, skc->skc_hash_bits);
+       int i = 0;
  
-void *
-spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags)
+       ASSERT(spin_is_locked(&skc->skc_lock));
+
+        hlist_for_each_entry(sko, node, &skc->skc_hash[key], sko_hlist) {
+
+               if (unlikely((++i) > skc->skc_hash_depth))
+                       skc->skc_hash_depth = i;
+
+                if (sko->sko_addr == obj) {
+                       ASSERT(sko->sko_magic == SKO_MAGIC);
+                       RETURN(sko);
+               }
+       }
+
+       RETURN(NULL);
+}
+
+static void *
+spl_cache_obj(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks)
  {
-        spl_kmem_slab_t *sks;
         spl_kmem_obj_t *sko;
-       void *obj;
         unsigned long key;
-       ENTRY;
  
-       spin_lock(&skc->skc_lock);
-restart:
-       /* Check for available objects from the partial slabs */
-       if (!list_empty(&skc->skc_partial_list)) {
-               sks = list_first_entry(&skc->skc_partial_list,
-                                      spl_kmem_slab_t, sks_list);
-               ASSERT(sks->sks_magic == SKS_MAGIC);
-               ASSERT(atomic_read(&sks->sks_ref) < sks->sks_objs);
-               ASSERT(!list_empty(&sks->sks_free_list));
-
-               sko = list_first_entry(&sks->sks_free_list,
-                                      spl_kmem_obj_t, sko_list);
-               ASSERT(sko->sko_magic == SKO_MAGIC);
-               ASSERT(sko->sko_addr != NULL);
+       ASSERT(spin_is_locked(&skc->skc_lock));
  
-               /* Remove from sks_free_list, add to used hash */
-               list_del_init(&sko->sko_list);
-               key = spl_hash_ptr(sko->sko_addr, skc->skc_hash_bits);
-               hlist_add_head(&sko->sko_hlist, &skc->skc_hash[key]);
+       sko = list_entry((&sks->sks_free_list)->next,spl_kmem_obj_t,sko_list);
+       ASSERT(sko->sko_magic == SKO_MAGIC);
+       ASSERT(sko->sko_addr != NULL);
  
-               sks->sks_age = jiffies;
-               atomic_inc(&sks->sks_ref);
-               skc->skc_obj_alloc++;
+       /* Remove from sks_free_list and add to used hash */
+       list_del_init(&sko->sko_list);
+       key = spl_hash_ptr(sko->sko_addr, skc->skc_hash_bits);
+       hlist_add_head(&sko->sko_hlist, &skc->skc_hash[key]);
  
-               if (skc->skc_obj_alloc > skc->skc_obj_max)
-                       skc->skc_obj_max = skc->skc_obj_alloc;
+       sks->sks_age = jiffies;
+       sks->sks_ref++;
+       skc->skc_obj_alloc++;
+       skc->skc_hash_count++;
  
-               if (atomic_read(&sks->sks_ref) == 1) {
-                       skc->skc_slab_alloc++;
+       /* Track max obj usage statistics */
+       if (skc->skc_obj_alloc > skc->skc_obj_max)
+               skc->skc_obj_max = skc->skc_obj_alloc;
  
-                       if (skc->skc_slab_alloc > skc->skc_slab_max)
-                               skc->skc_slab_max = skc->skc_slab_alloc;
-               }
+       /* Track max slab usage statistics */
+       if (sks->sks_ref == 1) {
+               skc->skc_slab_alloc++;
  
-               /* Move slab to skc_complete_list when full */
-               if (atomic_read(&sks->sks_ref) == sks->sks_objs) {
-                       list_del(&sks->sks_list);
-                       list_add(&sks->sks_list, &skc->skc_complete_list);
-               }
-
-               GOTO(out_lock, obj = sko->sko_addr);
+               if (skc->skc_slab_alloc > skc->skc_slab_max)
+                       skc->skc_slab_max = skc->skc_slab_alloc;
         }
  
-       spin_unlock(&skc->skc_lock);
-
-       /* No available objects create a new slab.  Since this is an
-        * expensive operation we do it without holding the semaphore
-        * and only briefly aquire it when we link in the fully
-        * allocated and constructed slab.
-        */
+       return sko->sko_addr;
+}
  
-       /* Under Solaris if the KM_SLEEP flag is passed we may never
-        * fail, so sleep as long as needed.  Additionally, since we are
-        * using vmem_alloc() KM_NOSLEEP is not an option and we must
-        * fail.  Shifting to allocating our own pages and mapping the
-        * virtual address space may allow us to bypass this issue.
-        */
-       if (!flags)
-               flags |= KM_SLEEP;
+/* No available objects create a new slab.  Since this is an
+ * expensive operation we do it without holding the spinlock
+ * and only briefly aquire it when we link in the fully
+ * allocated and constructed slab.
+ */
+static spl_kmem_slab_t *
+spl_cache_grow(spl_kmem_cache_t *skc, int flags)
+{
+        spl_kmem_slab_t *sks;
+       spl_kmem_obj_t *sko;
+       ENTRY;
  
-       if (flags & KM_SLEEP)
+        if (flags & __GFP_WAIT) {
                 flags |= __GFP_NOFAIL;
-       else
-               GOTO(out, obj = NULL);
+               might_sleep();
+               local_irq_enable();
+       }
  
-       sks = slab_alloc(skc, flags);
-       if (sks == NULL)
-               GOTO(out, obj = NULL);
+       sks = spl_slab_alloc(skc, flags);
+       if (sks == NULL) {
+               if (flags & __GFP_WAIT)
+                       local_irq_disable();
+
+               RETURN(NULL);
+       }
  
         /* Run all the constructors now that the slab is fully allocated */
         list_for_each_entry(sko, &sks->sks_free_list, sko_list) {
@@ -518,81 +622,205 @@ restart:
                         skc->skc_ctor(sko->sko_addr, skc->skc_private, flags);
         }
  
-       /* Link the newly created slab in to the skc_partial_list,
-        * and retry the allocation which will now succeed.
-        */
+        if (flags & __GFP_WAIT)
+               local_irq_disable();
+
+       /* Link the new empty slab in to the end of skc_partial_list */
         spin_lock(&skc->skc_lock);
         skc->skc_slab_total++;
         skc->skc_obj_total += sks->sks_objs;
         list_add_tail(&sks->sks_list, &skc->skc_partial_list);
-       GOTO(restart, obj = NULL);
-
-out_lock:
         spin_unlock(&skc->skc_lock);
-out:
-       RETURN(obj);
+
+       RETURN(sks);
  }
-EXPORT_SYMBOL(spl_kmem_cache_alloc);
  
-void
-spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
+static int
+spl_cache_refill(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flags)
  {
-        struct hlist_node *node;
-        spl_kmem_slab_t *sks = NULL;
-       spl_kmem_obj_t *sko = NULL;
-       unsigned long key = spl_hash_ptr(obj, skc->skc_hash_bits);
-       int i = 0;
+        spl_kmem_slab_t *sks;
+       int refill = skm->skm_refill;
         ENTRY;
  
+       /* XXX: Check for refill bouncing by age perhaps */
+
         spin_lock(&skc->skc_lock);
+       while (refill > 0) {
+               /* No slabs available we must grow the cache */
+               if (list_empty(&skc->skc_partial_list)) {
+                       spin_unlock(&skc->skc_lock);
+                       sks = spl_cache_grow(skc, flags);
+                       if (!sks)
+                               GOTO(out, refill);
+
+                       /* Rescheduled to different CPU skm is not local */
+                       if (skm != skc->skc_mag[smp_processor_id()])
+                               GOTO(out, refill);
+
+                       spin_lock(&skc->skc_lock);
+                       continue;
+               }
  
-        hlist_for_each_entry(sko, node, &skc->skc_hash[key], sko_hlist) {
+               /* Grab the next available slab */
+               sks = list_entry((&skc->skc_partial_list)->next,
+                                spl_kmem_slab_t, sks_list);
+               ASSERT(sks->sks_magic == SKS_MAGIC);
+               ASSERT(sks->sks_ref < sks->sks_objs);
+               ASSERT(!list_empty(&sks->sks_free_list));
  
-               if (unlikely((++i) > skc->skc_hash_depth))
-                       skc->skc_hash_depth = i;
+               /* Consume as many objects as needed to refill the requested
+                * cache.  We must be careful to lock here because our local
+                * magazine may not be local anymore due to spl_cache_grow. */
+               while ((sks->sks_ref < sks->sks_objs) && (refill-- > 0))
+                       skm->skm_objs[skm->skm_avail++]=spl_cache_obj(skc,sks);
  
-                if (sko->sko_addr == obj) {
-                       ASSERT(sko->sko_magic == SKO_MAGIC);
-                       sks = sko->sko_slab;
-                       break;
+               /* Move slab to skc_complete_list when full */
+               if (sks->sks_ref == sks->sks_objs) {
+                       list_del(&sks->sks_list);
+                       list_add(&sks->sks_list, &skc->skc_complete_list);
                 }
         }
  
-       ASSERT(sko != NULL); /* Obj must be in hash */
-       ASSERT(sks != NULL); /* Obj must reference slab */
+       spin_unlock(&skc->skc_lock);
+out:
+       /* Returns the number of entries added to cache */
+       RETURN(skm->skm_refill - refill);
+}
+
+static void
+spl_cache_shrink(spl_kmem_cache_t *skc, void *obj)
+{
+        spl_kmem_slab_t *sks = NULL;
+       spl_kmem_obj_t *sko = NULL;
+       ENTRY;
+
+       ASSERT(spin_is_locked(&skc->skc_lock));
+
+       sko = spl_hash_obj(skc, obj);
+       ASSERTF(sko, "Obj %p missing from in-use hash (%d) for cache %s\n",
+               obj, skc->skc_hash_count, skc->skc_name);
+
+       sks = sko->sko_slab;
+       ASSERTF(sks, "Obj %p/%p linked to invalid slab for cache %s\n",
+               obj, sko, skc->skc_name);
+
         ASSERT(sks->sks_cache == skc);
         hlist_del_init(&sko->sko_hlist);
         list_add(&sko->sko_list, &sks->sks_free_list);
  
         sks->sks_age = jiffies;
-       atomic_dec(&sks->sks_ref);
+       sks->sks_ref--;
         skc->skc_obj_alloc--;
+       skc->skc_hash_count--;
  
         /* Move slab to skc_partial_list when no longer full.  Slabs
-        * are added to the kead to keep the partial list is quasi
-        * full sorted order.  Fuller at the head, emptier at the tail.
-        */
-       if (atomic_read(&sks->sks_ref) == (sks->sks_objs - 1)) {
+        * are added to the head to keep the partial list is quasi-full
+        * sorted order.  Fuller at the head, emptier at the tail. */
+       if (sks->sks_ref == (sks->sks_objs - 1)) {
                 list_del(&sks->sks_list);
                 list_add(&sks->sks_list, &skc->skc_partial_list);
         }
  
         /* Move emply slabs to the end of the partial list so
-        * they can be easily found and freed during reclamation.
-        */
-       if (atomic_read(&sks->sks_ref) == 0) {
+        * they can be easily found and freed during reclamation. */
+       if (sks->sks_ref == 0) {
                 list_del(&sks->sks_list);
                 list_add_tail(&sks->sks_list, &skc->skc_partial_list);
                 skc->skc_slab_alloc--;
         }
  
-       __slab_reclaim(skc);
+       EXIT;
+}
+
+static int
+spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush)
+{
+       int i, count = MIN(flush, skm->skm_avail);
+       ENTRY;
+
+
+       spin_lock(&skc->skc_lock);
+       for (i = 0; i < count; i++)
+               spl_cache_shrink(skc, skm->skm_objs[i]);
+
+       __spl_slab_reclaim(skc);
+        skm->skm_avail -= count;
+        memmove(skm->skm_objs, &(skm->skm_objs[count]),
+               sizeof(void *) * skm->skm_avail);
+
         spin_unlock(&skc->skc_lock);
+
+       RETURN(count);
+}
+
+void *
+spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags)
+{
+       spl_kmem_magazine_t *skm;
+       unsigned long irq_flags;
+       void *obj = NULL;
+       ENTRY;
+
+       ASSERT(flags & KM_SLEEP);
+       local_irq_save(irq_flags);
+
+restart:
+       /* Safe to update per-cpu structure without lock, but
+        * in the restart case we must be careful to reaquire
+        * the local magazine since this may have changed
+        * when we need to grow the cache. */
+       skm = skc->skc_mag[smp_processor_id()];
+
+       if (likely(skm->skm_avail)) {
+               /* Object available in CPU cache, use it */
+               obj = skm->skm_objs[--skm->skm_avail];
+               skm->skm_age = jiffies;
+       } else {
+               /* Per-CPU cache empty, directly allocate from
+                * the slab and refill the per-CPU cache. */
+               (void)spl_cache_refill(skc, skm, flags);
+               GOTO(restart, obj = NULL);
+       }
+
+       local_irq_restore(irq_flags);
+
+       /* Pre-emptively migrate object to CPU L1 cache */
+       prefetchw(obj);
+
+       RETURN(obj);
+}
+EXPORT_SYMBOL(spl_kmem_cache_alloc);
+
+void
+spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj)
+{
+       spl_kmem_magazine_t *skm;
+       unsigned long flags;
+       ENTRY;
+
+       local_irq_save(flags);
+
+       /* Safe to update per-cpu structure without lock, but
+        * no remote memory allocation tracking is being performed
+        * it is entirely possible to allocate an object from one
+        * CPU cache and return it to another. */
+       skm = skc->skc_mag[smp_processor_id()];
+
+       /* Per-CPU cache full, flush it to make space */
+       if (unlikely(skm->skm_avail >= skm->skm_size))
+               (void)spl_cache_flush(skc, skm, skm->skm_refill);
+
+       /* Available space in cache, use it */
+       skm->skm_objs[skm->skm_avail++] = obj;
+
+       local_irq_restore(flags);
+
+       EXIT;
  }
  EXPORT_SYMBOL(spl_kmem_cache_free);
  
  static int
-kmem_cache_generic_shrinker(int nr_to_scan, unsigned int gfp_mask)
+spl_kmem_cache_generic_shrinker(int nr_to_scan, unsigned int gfp_mask)
  {
          spl_kmem_cache_t *skc;
  
@@ -619,13 +847,24 @@ kmem_cache_generic_shrinker(int nr_to_scan, unsigned int gfp_mask)
  void
  spl_kmem_cache_reap_now(spl_kmem_cache_t *skc)
  {
+       spl_kmem_magazine_t *skm;
+       int i;
         ENTRY;
          ASSERT(skc && skc->skc_magic == SKC_MAGIC);
  
         if (skc->skc_reclaim)
                 skc->skc_reclaim(skc->skc_private);
  
-       slab_reclaim(skc);
+       /* Ensure per-CPU caches which are idle gradually flush */
+       for_each_online_cpu(i) {
+               skm = skc->skc_mag[i];
+
+               if (time_after(jiffies, skm->skm_age + skc->skc_delay * HZ))
+                       (void)spl_cache_flush(skc, skm, skm->skm_refill);
+       }
+
+       spl_slab_reclaim(skc);
+
         EXIT;
  }
  EXPORT_SYMBOL(spl_kmem_cache_reap_now);
@@ -633,7 +872,7 @@ EXPORT_SYMBOL(spl_kmem_cache_reap_now);
  void
  spl_kmem_reap(void)
  {
-       kmem_cache_generic_shrinker(KMC_REAP_CHUNK, GFP_KERNEL);
+       spl_kmem_cache_generic_shrinker(KMC_REAP_CHUNK, GFP_KERNEL);
  }
  EXPORT_SYMBOL(spl_kmem_reap);
  
@@ -663,7 +902,7 @@ spl_kmem_init(void)
  
  #ifdef HAVE_SET_SHRINKER
         spl_kmem_cache_shrinker = set_shrinker(KMC_DEFAULT_SEEKS,
-                                              kmem_cache_generic_shrinker);
+                                              spl_kmem_cache_generic_shrinker);
         if (spl_kmem_cache_shrinker == NULL)
                 GOTO(out_cache, rc = -ENOMEM);
  #else
@@ -703,7 +942,7 @@ out_cache:
  
  #ifdef DEBUG_KMEM
  static char *
-sprintf_addr(kmem_debug_t *kd, char *str, int len, int min)
+spl_sprintf_addr(kmem_debug_t *kd, char *str, int len, int min)
  {
          int size = ((len - 1) < kd->kd_size) ? (len - 1) : kd->kd_size;
         int i, flag = 1;
@@ -769,7 +1008,7 @@ spl_kmem_fini(void)
         list_for_each_entry(kd, &kmem_list, kd_list)
                 CDEBUG(D_WARNING, "%p %-5d %-16s %s:%d\n",
                        kd->kd_addr, kd->kd_size,
-                      sprintf_addr(kd, str, 17, 8),
+                      spl_sprintf_addr(kd, str, 17, 8),
                        kd->kd_func, kd->kd_line);
  
         spin_unlock_irqrestore(&kmem_lock, flags);
@@ -786,7 +1025,7 @@ spl_kmem_fini(void)
         list_for_each_entry(kd, &vmem_list, kd_list)
                 CDEBUG(D_WARNING, "%p %-5d %-16s %s:%d\n",
                        kd->kd_addr, kd->kd_size,
-                      sprintf_addr(kd, str, 17, 8),
+                      spl_sprintf_addr(kd, str, 17, 8),
                        kd->kd_func, kd->kd_line);
  
         spin_unlock_irqrestore(&vmem_lock, flags);
diff --git a/modules/spl/spl-proc.c b/modules/spl/spl-proc.c

index 927fe86d3e170073d6f8c2bb6e78c9a4173900c9..8ef1698dc7223bc739a9a918c4c4125675642c40 100644 (file)
--- a/modules/spl/spl-proc.c
+++ b/modules/spl/spl-proc.c
@@ -913,7 +913,9 @@ out:
         if (rc) {
                 remove_proc_entry("kstat", proc_spl);
                 remove_proc_entry("kmem", proc_spl);
+#ifdef DEBUG_MUTEX
                 remove_proc_entry("stats_per", proc_spl_mutex);
+#endif
                 remove_proc_entry("mutex", proc_spl);
                 remove_proc_entry("spl", NULL);
  #ifdef CONFIG_SYSCTL
@@ -933,7 +935,9 @@ proc_fini(void)
  #if defined(DEBUG_MUTEX) || defined(DEBUG_KMEM) || defined(DEBUG_KSTAT)
         remove_proc_entry("kstat", proc_spl);
         remove_proc_entry("kmem", proc_spl);
+#ifdef DEBUG_MUTEX
          remove_proc_entry("stats_per", proc_spl_mutex);
+#endif
         remove_proc_entry("mutex", proc_spl);
         remove_proc_entry("spl", NULL);
  #endif /* DEBUG_MUTEX || DEBUG_KMEM || DEBUG_KSTAT */
diff --git a/modules/spl/spl-vnode.c b/modules/spl/spl-vnode.c

index f6dbc00c3323c31c63d59989ed4b6411a6354afc..678d94682ecd996ae1923979e90da1da5eab6297 100644 (file)
--- a/modules/spl/spl-vnode.c
+++ b/modules/spl/spl-vnode.c
@@ -486,7 +486,7 @@ vn_getf(int fd)
         spin_unlock(&vn_file_lock);
  
         /* File was not yet opened create the object and setup */
-       fp = kmem_cache_alloc(vn_file_cache, 0);
+       fp = kmem_cache_alloc(vn_file_cache, KM_SLEEP);
         if (fp == NULL)
                 GOTO(out, rc);
  
diff --git a/modules/splat/splat-kmem.c b/modules/splat/splat-kmem.c

index 51fa6f0eff02a7fa4c53b2b1de6837620a23e390..3f059aa2232cca866989559d736a5f6cc4f127f0 100644 (file)
--- a/modules/splat/splat-kmem.c
+++ b/modules/splat/splat-kmem.c
@@ -525,6 +525,9 @@ splat_kmem_test8_thread(void *arg)
  
         objs = vmem_zalloc(count * sizeof(void *), KM_SLEEP);
         if (!objs) {
+               splat_vprint(kcp->kcp_file, SPLAT_KMEM_TEST8_NAME,
+                            "Unable to alloc objp array for cache '%s'\n",
+                            kcp->kcp_cache->skc_name);
                 rc = -ENOMEM;
                 goto out;
         }
@@ -533,14 +536,13 @@ splat_kmem_test8_thread(void *arg)
                 objs[i] = kmem_cache_alloc(kcp->kcp_cache, KM_SLEEP);
                 if (!objs[i]) {
                         splat_vprint(kcp->kcp_file, SPLAT_KMEM_TEST8_NAME,
-                                    "Unable to allocate from '%s'\n",
-                                    SPLAT_KMEM_CACHE_NAME);
+                                    "Unable to allocate from cache '%s'\n",
+                                    kcp->kcp_cache->skc_name);
                         rc = -ENOMEM;
-                       goto out_free;
+                       break;
                 }
         }
  
-out_free:
         for (i = 0; i < count; i++)
                 if (objs[i])
                         kmem_cache_free(kcp->kcp_cache, objs[i]);
@@ -578,6 +580,7 @@ splat_kmem_test8(struct file *file, void *arg)
         kmem_cache_priv_t kcp;
         kthread_t *thr;
         struct timespec start, stop, delta;
+       char cache_name[16];
         int alloc, i;
  
         kcp.kcp_magic = SPLAT_KMEM_TEST_MAGIC;
@@ -588,7 +591,7 @@ splat_kmem_test8(struct file *file, void *arg)
          splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%s",
                      "          \ttot/max/calc\ttot/max/calc\tsize/depth\n");
  
-       for (alloc = 64; alloc <= 4096; alloc *= 2) {
+       for (alloc = 1; alloc <= 4096; alloc *= 2) {
                 kcp.kcp_size = 256;
                 kcp.kcp_count = 0;
                 kcp.kcp_threads = 0;
@@ -597,9 +600,8 @@ splat_kmem_test8(struct file *file, void *arg)
                 spin_lock_init(&kcp.kcp_lock);
                 init_waitqueue_head(&kcp.kcp_waitq);
  
-
-               kcp.kcp_cache = kmem_cache_create(SPLAT_KMEM_CACHE_NAME,
-                                                 kcp.kcp_size, 0,
+               sprintf(cache_name, "%s-%d", SPLAT_KMEM_CACHE_NAME, alloc);
+               kcp.kcp_cache = kmem_cache_create(cache_name, kcp.kcp_size, 0,
                                                   splat_kmem_cache_test_constructor,
                                                   splat_kmem_cache_test_destructor,
                                                   NULL, &kcp, NULL, 0);
author	behlendo <behlendo@7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c>
	Wed, 25 Jun 2008 20:57:45 +0000 (20:57 +0000)
committer	behlendo <behlendo@7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c>
	Wed, 25 Jun 2008 20:57:45 +0000 (20:57 +0000)
include/sys/kmem.h		patch \| blob \| history
modules/spl/spl-kmem.c		patch \| blob \| history
modules/spl/spl-proc.c		patch \| blob \| history
modules/spl/spl-vnode.c		patch \| blob \| history
modules/splat/splat-kmem.c		patch \| blob \| history