- Remove hash functionality from slab in favor of direct lookups

author behlendo <behlendo@7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c>

Tue, 1 Jul 2008 03:28:54 +0000 (03:28 +0000)

committer behlendo <behlendo@7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c>

Tue, 1 Jul 2008 03:28:54 +0000 (03:28 +0000)
author behlendo <behlendo@7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c>
Tue, 1 Jul 2008 03:28:54 +0000 (03:28 +0000)
committer behlendo <behlendo@7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c>
Tue, 1 Jul 2008 03:28:54 +0000 (03:28 +0000)
diff --git a/include/sys/kmem.h b/include/sys/kmem.h

index b90e3fdaf77dbaa61e7a5b6446b91355cb8649ef..9397caa0a56b1a940b4f6e68de936046444a25f2 100644 (file)
--- a/include/sys/kmem.h
+++ b/include/sys/kmem.h
@@ -403,11 +403,14 @@ kmem_alloc_tryhard(size_t size, size_t *alloc_size, int kmflags)
  /*
   * Slab allocation interfaces
   */
-#undef  KMC_NOTOUCH                     /* XXX: Unsupported */
-#define KMC_NODEBUG                     0x00000000 /* Default behavior */
-#define KMC_NOMAGAZINE                  /* XXX: Unsupported */
-#define KMC_NOHASH                      /* XXX: Unsupported */
-#define KMC_QCACHE                      /* XXX: Unsupported */
+#define KMC_NOTOUCH                     0x00000001
+#define KMC_NODEBUG                     0x00000002 /* Default behavior */
+#define KMC_NOMAGAZINE                  0x00000004 /* XXX: No disable support available */
+#define KMC_NOHASH                      0x00000008 /* XXX: No hash available */
+#define KMC_QCACHE                      0x00000010 /* XXX: Unsupported */
+#define KMC_KMEM                       0x00000100 /* Use kmem cache */
+#define KMC_VMEM                       0x00000200 /* Use vmem cache */
+#define KMC_OFFSLAB                    0x00000400 /* Objects not on slab */
  
  #define KMC_REAP_CHUNK                  256
  #define KMC_DEFAULT_SEEKS               DEFAULT_SEEKS
@@ -462,11 +465,6 @@ extern struct rw_semaphore spl_kmem_cache_sem;
  #define SKS_MAGIC                      0x22222222
  #define SKC_MAGIC                      0x2c2c2c2c
  
-#define SPL_KMEM_CACHE_HASH_BITS       12
-#define SPL_KMEM_CACHE_HASH_ELTS       (1 << SPL_KMEM_CACHE_HASH_BITS)
-#define SPL_KMEM_CACHE_HASH_SIZE       (sizeof(struct hlist_head) * \
-                                        SPL_KMEM_CACHE_HASH_ELTS)
-
  #define SPL_KMEM_CACHE_DELAY           5
  #define SPL_KMEM_CACHE_OBJ_PER_SLAB    32
  
@@ -488,7 +486,6 @@ typedef struct spl_kmem_obj {
         void                    *sko_addr;      /* Buffer address */
         struct spl_kmem_slab    *sko_slab;      /* Owned by slab */
         struct list_head        sko_list;       /* Free object list linkage */
-       struct hlist_node       sko_hlist;      /* Used object hash linkage */
  } spl_kmem_obj_t;
  
  typedef struct spl_kmem_slab {
@@ -515,14 +512,9 @@ typedef struct spl_kmem_cache {
          void                   *skc_vmp;       /* Unused */
         uint32_t                skc_flags;      /* Flags */
         uint32_t                skc_obj_size;   /* Object size */
-       uint32_t                skc_chunk_size; /* sizeof(*obj) + alignment */
-       uint32_t                skc_slab_size;  /* slab size */
-       uint32_t                skc_max_chunks; /* max chunks per slab */
+       uint32_t                skc_slab_objs;  /* Objects per slab */
+       uint32_t                skc_slab_size;  /* Slab size */
         uint32_t                skc_delay;      /* slab reclaim interval */
-       uint32_t                skc_hash_bits;  /* Hash table bits */
-       uint32_t                skc_hash_size;  /* Hash table size */
-       uint32_t                skc_hash_elts;  /* Hash table elements */
-       struct hlist_head       *skc_hash;      /* Hash table address */
          struct list_head       skc_list;       /* List of caches linkage */
         struct list_head        skc_complete_list;/* Completely alloc'ed */
         struct list_head        skc_partial_list; /* Partially alloc'ed */
@@ -536,8 +528,6 @@ typedef struct spl_kmem_cache {
         uint64_t                skc_obj_total;  /* Obj total current */
         uint64_t                skc_obj_alloc;  /* Obj alloc current */
         uint64_t                skc_obj_max;    /* Obj max historic */
-       uint64_t                skc_hash_depth; /* Lazy hash depth */
-       uint64_t                skc_hash_count; /* Hash entries current */
  } spl_kmem_cache_t;
  
  extern spl_kmem_cache_t *
@@ -561,6 +551,8 @@ void spl_kmem_fini(void);
  #define kmem_cache_free(skc, obj)      spl_kmem_cache_free(skc, obj)
  #define kmem_cache_reap_now(skc)       spl_kmem_cache_reap_now(skc)
  #define kmem_reap()                    spl_kmem_reap()
+#define kmem_virt(ptr)                 (((ptr) >= (void *)VMALLOC_START) && \
+                                        ((ptr) <  (void *)VMALLOC_END))
  
  #ifdef HAVE_KMEM_CACHE_CREATE_DTOR
  #define __kmem_cache_create(name, size, align, flags, ctor, dtor) \
diff --git a/modules/spl/spl-kmem.c b/modules/spl/spl-kmem.c

index 36987610855f39d1f74e6c243c9aa79a34764443..3be038d5e851c8a872d77ad2bc822fc7d3ced527 100644 (file)
--- a/modules/spl/spl-kmem.c
+++ b/modules/spl/spl-kmem.c
@@ -114,10 +114,6 @@ EXPORT_SYMBOL(kmem_set_warning);
   *      shrink them via spl_slab_reclaim() when they are wasting lots
   *      of space.  Currently this process is driven by the reapers.
   *
- * XXX: Implement a resizable used object hash.  Currently the hash
- *      is statically sized for thousands of objects but it should
- *      grow based on observed worst case slab depth.
- *
   * XXX: Improve the partial slab list by carefully maintaining a
   *      strict ordering of fullest to emptiest slabs based on
   *      the slab reference count.  This gaurentees the when freeing
@@ -134,20 +130,8 @@ EXPORT_SYMBOL(kmem_set_warning);
   * XXX: Proper hardware cache alignment would be good too.
   */
  
-/* Ensure the __kmem_cache_create/__kmem_cache_destroy macros are
- * removed here to prevent a recursive substitution, we want to call
- * the native linux version.
- */
-#undef kmem_cache_t
-#undef kmem_cache_create
-#undef kmem_cache_destroy
-#undef kmem_cache_alloc
-#undef kmem_cache_free
-
  struct list_head spl_kmem_cache_list;  /* List of caches */
  struct rw_semaphore spl_kmem_cache_sem;        /* Cache list lock */
-static kmem_cache_t *spl_slab_cache;   /* Cache for slab structs */
-static kmem_cache_t *spl_obj_cache;    /* Cache for obj structs */
  
  static int spl_cache_flush(spl_kmem_cache_t *skc,
                            spl_kmem_magazine_t *skm, int flush);
@@ -163,180 +147,119 @@ static struct shrinker spl_kmem_cache_shrinker = {
  };
  #endif
  
-static void
-spl_slab_init(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks)
-{
-       sks->sks_magic = SKS_MAGIC;
-       sks->sks_objs = SPL_KMEM_CACHE_OBJ_PER_SLAB;
-       sks->sks_age = jiffies;
-       sks->sks_cache = skc;
-       INIT_LIST_HEAD(&sks->sks_list);
-       INIT_LIST_HEAD(&sks->sks_free_list);
-       sks->sks_ref = 0;
-}
-
-static int
-spl_slab_alloc_kmem(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks, int flags)
+static void *
+kv_alloc(spl_kmem_cache_t *skc, int size, int flags)
  {
-       spl_kmem_obj_t *sko, *n;
-       int i, rc = 0;
-
-       /* This is based on the linux slab cache for now simply because
-        * it means I get slab coloring, hardware cache alignment, etc
-        * for free.  There's no reason we can't do this ourselves.  And
-        * we probably should at in the future.  For now I'll just
-        * leverage the existing linux slab here. */
-       for (i = 0; i < sks->sks_objs; i++) {
-               sko = kmem_cache_alloc(spl_obj_cache, flags);
-               if (sko == NULL) {
-                       rc = -ENOMEM;
-                       break;
-               }
+       void *ptr;
  
-               sko->sko_addr = kmem_alloc(skc->skc_obj_size, flags);
-               if (sko->sko_addr == NULL) {
-                       kmem_cache_free(spl_obj_cache, sko);
-                       rc = -ENOMEM;
-                       break;
-               }
-
-               sko->sko_magic = SKO_MAGIC;
-               sko->sko_slab = sks;
-               INIT_LIST_HEAD(&sko->sko_list);
-               INIT_HLIST_NODE(&sko->sko_hlist);
-               list_add(&sko->sko_list, &sks->sks_free_list);
+       if (skc->skc_flags & KMC_KMEM) {
+               if (size > (2 * PAGE_SIZE)) {
+                       ptr = (void *)__get_free_pages(flags, get_order(size));
+               } else
+                       ptr = kmem_alloc(size, flags);
+       } else {
+               ptr = vmem_alloc(size, flags);
         }
  
-       /* Unable to fully construct slab, unwind everything */
-       if (rc) {
-               list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list) {
-                       ASSERT(sko->sko_magic == SKO_MAGIC);
-                       kmem_free(sko->sko_addr, skc->skc_obj_size);
-                       list_del(&sko->sko_list);
-                       kmem_cache_free(spl_obj_cache, sko);
-               }
-       }
+       return ptr;
+}
  
-       RETURN(rc);
+static void
+kv_free(spl_kmem_cache_t *skc, void *ptr, int size)
+{
+       if (skc->skc_flags & KMC_KMEM) {
+               if (size > (2 * PAGE_SIZE))
+                       free_pages((unsigned long)ptr, get_order(size));
+               else
+                       kmem_free(ptr, size);
+       } else {
+               vmem_free(ptr, size);
+       }
  }
  
  static spl_kmem_slab_t *
-spl_slab_alloc_vmem(spl_kmem_cache_t *skc, int flags)
+spl_slab_alloc(spl_kmem_cache_t *skc, int flags)
  {
         spl_kmem_slab_t *sks;
-       spl_kmem_obj_t *sko, *sko_base;
-       void *slab, *obj, *obj_base;
-       int i, size;
-
-       /* For large vmem_alloc'ed buffers it's important that we pack the
-        * spl_kmem_obj_t structure and the actual objects in to one large
-        * virtual address zone to minimize the number of calls to
-        * vmalloc().  Mapping the virtual address in done under a single
-        * global lock which walks a list of all virtual zones.  So doing
-        * lots of allocations simply results in lock contention and a
-        * longer list of mapped addresses.  It is far better to do a
-        * few large allocations and then subdivide it ourselves.  The
-        * large vmem_alloc'ed space is divied as follows:
+       spl_kmem_obj_t *sko, *n;
+       void *base, *obj;
+       int i, size, rc = 0;
+
+       /* It's important that we pack the spl_kmem_obj_t structure
+        * and the actual objects in to one large address space
+        * to minimize the number of calls to the allocator.  It
+        * is far better to do a few large allocations and then
+        * subdivide it ourselves.  Now which allocator we use
+        * requires balancling a few trade offs.
+        *
+        * For small objects we use kmem_alloc() because as long
+        * as you are only requesting a small number of pages
+        * (ideally just one) its cheap.  However, when you start
+        * requesting multiple pages kmem_alloc() get increasingly
+        * expensive since it requires contigeous pages.  For this
+        * reason we shift to vmem_alloc() for slabs of large
+        * objects which removes the need for contigeous pages.
+        * We do not use vmem_alloc() in all cases because there
+        * is significant locking overhead in __get_vm_area_node().
+        * This function takes a single global lock when aquiring
+        * an available virtual address range which serialize all
+        * vmem_alloc()'s for all slab caches.  Using slightly
+        * different allocation functions for small and large
+        * objects should give us the best of both worlds.
          *
-        * 1 slab struct: sizeof(spl_kmem_slab_t)
-        * N obj structs: sizeof(spl_kmem_obj_t) * skc->skc_objs
-        * N objects:     skc->skc_obj_size * skc->skc_objs
+        * sks struct:  sizeof(spl_kmem_slab_t)
+        * obj data:    skc->skc_obj_size
+        * obj struct:  sizeof(spl_kmem_obj_t)
+        * <N obj data + obj structs>
          *
          * XXX: It would probably be a good idea to more carefully
-        *      align the starts of these objects in memory.
+        *      align these data structures in memory.
          */
-       size = sizeof(spl_kmem_slab_t) + SPL_KMEM_CACHE_OBJ_PER_SLAB *
-              (skc->skc_obj_size + sizeof(spl_kmem_obj_t));
-
-       slab = vmem_alloc(size, flags);
-       if (slab == NULL)
+       base = kv_alloc(skc, skc->skc_slab_size, flags);
+       if (base == NULL)
                 RETURN(NULL);
  
-       sks = (spl_kmem_slab_t *)slab;
-       spl_slab_init(skc, sks);
-
-       sko_base = (spl_kmem_obj_t *)(slab + sizeof(spl_kmem_slab_t));
-       obj_base = (void *)sko_base + sizeof(spl_kmem_obj_t) * sks->sks_objs;
+       sks = (spl_kmem_slab_t *)base;
+       sks->sks_magic = SKS_MAGIC;
+       sks->sks_objs = skc->skc_slab_objs;
+       sks->sks_age = jiffies;
+       sks->sks_cache = skc;
+       INIT_LIST_HEAD(&sks->sks_list);
+       INIT_LIST_HEAD(&sks->sks_free_list);
+       sks->sks_ref = 0;
+       size = sizeof(spl_kmem_obj_t) + skc->skc_obj_size;
  
         for (i = 0; i < sks->sks_objs; i++) {
-               sko = &sko_base[i];
-               obj = obj_base + skc->skc_obj_size * i;
+               if (skc->skc_flags & KMC_OFFSLAB) {
+                       obj = kv_alloc(skc, size, flags);
+                       if (!obj)
+                               GOTO(out, rc = -ENOMEM);
+               } else {
+                       obj = base + sizeof(spl_kmem_slab_t) + i * size;
+               }
+
+               sko = obj + skc->skc_obj_size;
                 sko->sko_addr = obj;
                 sko->sko_magic = SKO_MAGIC;
                 sko->sko_slab = sks;
                 INIT_LIST_HEAD(&sko->sko_list);
-               INIT_HLIST_NODE(&sko->sko_hlist);
                 list_add_tail(&sko->sko_list, &sks->sks_free_list);
         }
  
-       RETURN(sks);
-}
-
-static spl_kmem_slab_t *
-spl_slab_alloc(spl_kmem_cache_t *skc, int flags) {
-       spl_kmem_slab_t *sks;
-       spl_kmem_obj_t *sko;
-       int rc;
-       ENTRY;
-
-       /* Objects less than a page can use kmem_alloc() and avoid
-        * the locking overhead in __get_vm_area_node() when locking
-        * for a free address.  For objects over a page we use
-        * vmem_alloc() because it is usually worth paying this
-        * overhead to avoid the need to find contigeous pages.
-        * This should give us the best of both worlds. */
-       if (skc->skc_obj_size <= PAGE_SIZE) {
-               sks = kmem_cache_alloc(spl_slab_cache, flags);
-               if (sks == NULL)
-                       GOTO(out, sks = NULL);
-
-               spl_slab_init(skc, sks);
-
-               rc = spl_slab_alloc_kmem(skc, sks, flags);
-               if (rc) {
-                       kmem_cache_free(spl_slab_cache, sks);
-                       GOTO(out, sks = NULL);
-               }
-       } else {
-               sks = spl_slab_alloc_vmem(skc, flags);
-               if (sks == NULL)
-                       GOTO(out, sks = NULL);
-       }
-
-       ASSERT(sks);
         list_for_each_entry(sko, &sks->sks_free_list, sko_list)
                 if (skc->skc_ctor)
                         skc->skc_ctor(sko->sko_addr, skc->skc_private, flags);
  out:
-       RETURN(sks);
-}
-
-static void
-spl_slab_free_kmem(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks)
-{
-       spl_kmem_obj_t *sko, *n;
-
-       ASSERT(skc->skc_magic == SKC_MAGIC);
-       ASSERT(sks->sks_magic == SKS_MAGIC);
+       if (rc) {
+               if (skc->skc_flags & KMC_OFFSLAB)
+                       list_for_each_entry_safe(sko,n,&sks->sks_free_list,sko_list)
+                               kv_free(skc, sko->sko_addr, size);
  
-       list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list) {
-               ASSERT(sko->sko_magic == SKO_MAGIC);
-               kmem_free(sko->sko_addr, skc->skc_obj_size);
-               list_del(&sko->sko_list);
-               kmem_cache_free(spl_obj_cache, sko);
+               kv_free(skc, base, skc->skc_slab_size);
+               sks = NULL;
         }
  
-       kmem_cache_free(spl_slab_cache, sks);
-}
-
-static void
-spl_slab_free_vmem(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks)
-{
-       ASSERT(skc->skc_magic == SKC_MAGIC);
-       ASSERT(sks->sks_magic == SKS_MAGIC);
-
-       vmem_free(sks, SPL_KMEM_CACHE_OBJ_PER_SLAB *
-                 (skc->skc_obj_size + sizeof(spl_kmem_obj_t)));
+       RETURN(sks);
  }
  
  /* Removes slab from complete or partial list, so it must
@@ -346,6 +269,7 @@ static void
  spl_slab_free(spl_kmem_slab_t *sks) {
         spl_kmem_cache_t *skc;
         spl_kmem_obj_t *sko, *n;
+       int size;
         ENTRY;
  
         ASSERT(sks->sks_magic == SKS_MAGIC);
@@ -358,17 +282,20 @@ spl_slab_free(spl_kmem_slab_t *sks) {
         skc->skc_obj_total -= sks->sks_objs;
         skc->skc_slab_total--;
         list_del(&sks->sks_list);
+       size = sizeof(spl_kmem_obj_t) + skc->skc_obj_size;
  
         /* Run destructors slab is being released */
-       list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list)
+       list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list) {
+               ASSERT(sko->sko_magic == SKO_MAGIC);
+
                 if (skc->skc_dtor)
                         skc->skc_dtor(sko->sko_addr, skc->skc_private);
  
-       if (skc->skc_obj_size <= PAGE_SIZE)
-               spl_slab_free_kmem(skc, sks);
-       else
-               spl_slab_free_vmem(skc, sks);
+               if (skc->skc_flags & KMC_OFFSLAB)
+                       kv_free(skc, sko->sko_addr, size);
+       }
  
+       kv_free(skc, sks, skc->skc_slab_size);
         EXIT;
  }
  
@@ -449,7 +376,8 @@ spl_magazine_alloc(spl_kmem_cache_t *skc, int node)
                 skm->skm_avail = 0;
                 skm->skm_size = skc->skc_mag_size;
                 skm->skm_refill = skc->skc_mag_refill;
-               skm->skm_age = jiffies;
+               if (!(skc->skc_flags & KMC_NOTOUCH))
+                       skm->skm_age = jiffies;
         }
  
         RETURN(skm);
@@ -511,9 +439,14 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
                        void *priv, void *vmp, int flags)
  {
          spl_kmem_cache_t *skc;
-       int i, rc, kmem_flags = KM_SLEEP;
+       uint32_t slab_max, slab_size, slab_objs;
+       int rc, kmem_flags = KM_SLEEP;
         ENTRY;
  
+       ASSERTF(!(flags & KMC_NOMAGAZINE), "Bad KMC_NOMAGAZINE (%x)\n", flags);
+       ASSERTF(!(flags & KMC_NOHASH), "Bad KMC_NOHASH (%x)\n", flags);
+       ASSERTF(!(flags & KMC_QCACHE), "Bad KMC_QCACHE (%x)\n", flags);
+
          /* We may be called when there is a non-zero preempt_count or
           * interrupts are disabled is which case we must not sleep.
          */
@@ -541,25 +474,8 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
         skc->skc_vmp = vmp;
         skc->skc_flags = flags;
         skc->skc_obj_size = size;
-       skc->skc_chunk_size = 0; /* XXX: Needed only when implementing   */
-       skc->skc_slab_size = 0;  /*      small slab object optimizations */
-       skc->skc_max_chunks = 0; /*      which are yet supported. */
         skc->skc_delay = SPL_KMEM_CACHE_DELAY;
  
-       skc->skc_hash_bits = SPL_KMEM_CACHE_HASH_BITS;
-       skc->skc_hash_size = SPL_KMEM_CACHE_HASH_SIZE;
-       skc->skc_hash_elts = SPL_KMEM_CACHE_HASH_ELTS;
-       skc->skc_hash = (struct hlist_head *)
-                       vmem_alloc(skc->skc_hash_size, kmem_flags);
-       if (skc->skc_hash == NULL) {
-               kmem_free(skc->skc_name, skc->skc_name_size);
-               kmem_free(skc, sizeof(*skc));
-               RETURN(NULL);
-       }
-
-       for (i = 0; i < skc->skc_hash_elts; i++)
-               INIT_HLIST_HEAD(&skc->skc_hash[i]);
-
         INIT_LIST_HEAD(&skc->skc_list);
         INIT_LIST_HEAD(&skc->skc_complete_list);
         INIT_LIST_HEAD(&skc->skc_partial_list);
@@ -573,12 +489,37 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
         skc->skc_obj_total = 0;
         skc->skc_obj_alloc = 0;
         skc->skc_obj_max = 0;
-       skc->skc_hash_depth = 0;
-       skc->skc_hash_count = 0;
+
+       /* If none passed select a cache type based on object size */
+       if (!(skc->skc_flags & (KMC_KMEM | KMC_VMEM))) {
+               if (skc->skc_obj_size < (PAGE_SIZE / 8)) {
+                       skc->skc_flags |= KMC_KMEM;
+               } else {
+                       skc->skc_flags |= KMC_VMEM;
+               }
+       }
+
+       /* Size slabs properly so ensure they are not too large */
+       slab_max = ((uint64_t)1 << (MAX_ORDER - 1)) * PAGE_SIZE;
+       if (skc->skc_flags & KMC_OFFSLAB) {
+               skc->skc_slab_objs = SPL_KMEM_CACHE_OBJ_PER_SLAB;
+               skc->skc_slab_size = sizeof(spl_kmem_slab_t);
+               ASSERT(skc->skc_obj_size < slab_max);
+       } else {
+               slab_objs = SPL_KMEM_CACHE_OBJ_PER_SLAB + 1;
+
+               do {
+                       slab_objs--;
+                       slab_size = sizeof(spl_kmem_slab_t) + slab_objs *
+                                   (skc->skc_obj_size+sizeof(spl_kmem_obj_t));
+               } while (slab_size > slab_max);
+
+               skc->skc_slab_objs = slab_objs;
+               skc->skc_slab_size = slab_size;
+       }
  
         rc = spl_magazine_create(skc);
         if (rc) {
-               vmem_free(skc->skc_hash, skc->skc_hash_size);
                 kmem_free(skc->skc_name, skc->skc_name_size);
                 kmem_free(skc, sizeof(*skc));
                 RETURN(NULL);
@@ -592,9 +533,6 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
  }
  EXPORT_SYMBOL(spl_kmem_cache_create);
  
-/* The caller must ensure there are no racing calls to
- * spl_kmem_cache_alloc() for this spl_kmem_cache_t.
- */
  void
  spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
  {
@@ -613,13 +551,15 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
         /* Validate there are no objects in use and free all the
          * spl_kmem_slab_t, spl_kmem_obj_t, and object buffers. */
         ASSERT(list_empty(&skc->skc_complete_list));
-       ASSERTF(skc->skc_hash_count == 0, "skc->skc_hash_count=%d\n",
-               skc->skc_hash_count);
+       ASSERT(skc->skc_slab_alloc == 0);
+       ASSERT(skc->skc_obj_alloc == 0);
  
         list_for_each_entry_safe(sks, m, &skc->skc_partial_list, sks_list)
                 spl_slab_free(sks);
  
-       vmem_free(skc->skc_hash, skc->skc_hash_size);
+       ASSERT(skc->skc_slab_total == 0);
+       ASSERT(skc->skc_obj_total == 0);
+
         kmem_free(skc->skc_name, skc->skc_name_size);
         spin_unlock(&skc->skc_lock);
  
@@ -629,64 +569,25 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc)
  }
  EXPORT_SYMBOL(spl_kmem_cache_destroy);
  
-/* The kernel provided hash_ptr() function behaves exceptionally badly
- * when all the addresses are page aligned which is likely the case
- * here.  To avoid this issue shift off the low order non-random bits.
- */
-static unsigned long
-spl_hash_ptr(void *ptr, unsigned int bits)
-{
-       return hash_long((unsigned long)ptr >> PAGE_SHIFT, bits);
-}
-
-static spl_kmem_obj_t *
-spl_hash_obj(spl_kmem_cache_t *skc, void *obj)
-{
-       struct hlist_node *node;
-       spl_kmem_obj_t *sko = NULL;
-       unsigned long key = spl_hash_ptr(obj, skc->skc_hash_bits);
-       int i = 0;
-
-       ASSERT(skc->skc_magic == SKC_MAGIC);
-       ASSERT(spin_is_locked(&skc->skc_lock));
-
-       hlist_for_each_entry(sko, node, &skc->skc_hash[key], sko_hlist) {
-
-               if (unlikely((++i) > skc->skc_hash_depth))
-                       skc->skc_hash_depth = i;
-
-               if (sko->sko_addr == obj) {
-                       ASSERT(sko->sko_magic == SKO_MAGIC);
-                       RETURN(sko);
-               }
-       }
-
-       RETURN(NULL);
-}
-
  static void *
  spl_cache_obj(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks)
  {
         spl_kmem_obj_t *sko;
-       unsigned long key;
  
         ASSERT(skc->skc_magic == SKC_MAGIC);
         ASSERT(sks->sks_magic == SKS_MAGIC);
         ASSERT(spin_is_locked(&skc->skc_lock));
  
-       sko = list_entry((&sks->sks_free_list)->next,spl_kmem_obj_t,sko_list);
+       sko = list_entry(sks->sks_free_list.next, spl_kmem_obj_t, sko_list);
         ASSERT(sko->sko_magic == SKO_MAGIC);
         ASSERT(sko->sko_addr != NULL);
  
-       /* Remove from sks_free_list and add to used hash */
+       /* Remove from sks_free_list */
         list_del_init(&sko->sko_list);
-       key = spl_hash_ptr(sko->sko_addr, skc->skc_hash_bits);
-       hlist_add_head(&sko->sko_hlist, &skc->skc_hash[key]);
  
         sks->sks_age = jiffies;
         sks->sks_ref++;
         skc->skc_obj_alloc++;
-       skc->skc_hash_count++;
  
         /* Track max obj usage statistics */
         if (skc->skc_obj_alloc > skc->skc_obj_max)
@@ -818,22 +719,17 @@ spl_cache_shrink(spl_kmem_cache_t *skc, void *obj)
         ASSERT(skc->skc_magic == SKC_MAGIC);
         ASSERT(spin_is_locked(&skc->skc_lock));
  
-       sko = spl_hash_obj(skc, obj);
-       ASSERTF(sko, "Obj %p missing from in-use hash (%d/%d) for cache %s\n",
-               obj, skc->skc_hash_depth, skc->skc_hash_count, skc->skc_name);
+       sko = obj + skc->skc_obj_size;
+       ASSERT(sko->sko_magic == SKO_MAGIC);
  
         sks = sko->sko_slab;
-       ASSERTF(sks, "Obj %p/%p linked to invalid slab for cache %s\n",
-               obj, sko, skc->skc_name);
-
+       ASSERT(sks->sks_magic == SKS_MAGIC);
         ASSERT(sks->sks_cache == skc);
-       hlist_del_init(&sko->sko_hlist);
         list_add(&sko->sko_list, &sks->sks_free_list);
  
         sks->sks_age = jiffies;
         sks->sks_ref--;
         skc->skc_obj_alloc--;
-       skc->skc_hash_count--;
  
         /* Move slab to skc_partial_list when no longer full.  Slabs
          * are added to the head to keep the partial list is quasi-full
@@ -906,7 +802,8 @@ restart:
         if (likely(skm->skm_avail)) {
                 /* Object available in CPU cache, use it */
                 obj = skm->skm_objs[--skm->skm_avail];
-               skm->skm_age = jiffies;
+               if (!(skc->skc_flags & KMC_NOTOUCH))
+                       skm->skm_age = jiffies;
         } else {
                 /* Per-CPU cache empty, directly allocate from
                  * the slab and refill the per-CPU cache. */
@@ -1012,71 +909,6 @@ spl_kmem_reap(void)
  }
  EXPORT_SYMBOL(spl_kmem_reap);
  
-int
-spl_kmem_init(void)
-{
-       int rc = 0;
-       ENTRY;
-
-       init_rwsem(&spl_kmem_cache_sem);
-       INIT_LIST_HEAD(&spl_kmem_cache_list);
-
-       spl_slab_cache = NULL;
-       spl_obj_cache = NULL;
-
-       spl_slab_cache = __kmem_cache_create("spl_slab_cache",
-                                            sizeof(spl_kmem_slab_t),
-                                            0, 0, NULL, NULL);
-       if (spl_slab_cache == NULL)
-               GOTO(out_cache, rc = -ENOMEM);
-
-       spl_obj_cache = __kmem_cache_create("spl_obj_cache",
-                                           sizeof(spl_kmem_obj_t),
-                                           0, 0, NULL, NULL);
-       if (spl_obj_cache == NULL)
-               GOTO(out_cache, rc = -ENOMEM);
-
-#ifdef HAVE_SET_SHRINKER
-       spl_kmem_cache_shrinker = set_shrinker(KMC_DEFAULT_SEEKS,
-                                              spl_kmem_cache_generic_shrinker);
-       if (spl_kmem_cache_shrinker == NULL)
-               GOTO(out_cache, rc = -ENOMEM);
-#else
-       register_shrinker(&spl_kmem_cache_shrinker);
-#endif
-
-#ifdef DEBUG_KMEM
-       atomic64_set(&kmem_alloc_used, 0);
-       atomic64_set(&vmem_alloc_used, 0);
-
-#ifdef DEBUG_KMEM_TRACKING
-       { int i;
-       spin_lock_init(&kmem_lock);
-       INIT_LIST_HEAD(&kmem_list);
-
-       for (i = 0; i < KMEM_TABLE_SIZE; i++)
-               INIT_HLIST_HEAD(&kmem_table[i]);
-
-       spin_lock_init(&vmem_lock);
-       INIT_LIST_HEAD(&vmem_list);
-
-       for (i = 0; i < VMEM_TABLE_SIZE; i++)
-               INIT_HLIST_HEAD(&vmem_table[i]);
-       }
-#endif
-#endif
-       RETURN(rc);
-
-out_cache:
-       if (spl_obj_cache)
-               (void)kmem_cache_destroy(spl_obj_cache);
-
-       if (spl_slab_cache)
-               (void)kmem_cache_destroy(spl_slab_cache);
-
-       RETURN(rc);
-}
-
  #if defined(DEBUG_KMEM) && defined(DEBUG_KMEM_TRACKING)
  static char *
  spl_sprintf_addr(kmem_debug_t *kd, char *str, int len, int min)
@@ -1119,12 +951,28 @@ spl_sprintf_addr(kmem_debug_t *kd, char *str, int len, int min)
         return str;
  }
  
+static int
+spl_kmem_init_tracking(struct list_head *list, spinlock_t *lock, int size)
+{
+       int i;
+       ENTRY;
+
+       spin_lock_init(lock);
+       INIT_LIST_HEAD(list);
+
+       for (i = 0; i < size; i++)
+               INIT_HLIST_HEAD(&kmem_table[i]);
+
+       RETURN(0);
+}
+
  static void
  spl_kmem_fini_tracking(struct list_head *list, spinlock_t *lock)
  {
         unsigned long flags;
         kmem_debug_t *kd;
         char str[17];
+       ENTRY;
  
         spin_lock_irqsave(lock, flags);
         if (!list_empty(list))
@@ -1138,11 +986,42 @@ spl_kmem_fini_tracking(struct list_head *list, spinlock_t *lock)
                        kd->kd_func, kd->kd_line);
  
         spin_unlock_irqrestore(lock, flags);
+       EXIT;
  }
  #else /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */
+#define spl_kmem_init_tracking(list, lock, size)
  #define spl_kmem_fini_tracking(list, lock)
  #endif /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */
  
+int
+spl_kmem_init(void)
+{
+       int rc = 0;
+       ENTRY;
+
+       init_rwsem(&spl_kmem_cache_sem);
+       INIT_LIST_HEAD(&spl_kmem_cache_list);
+
+#ifdef HAVE_SET_SHRINKER
+       spl_kmem_cache_shrinker = set_shrinker(KMC_DEFAULT_SEEKS,
+                                              spl_kmem_cache_generic_shrinker);
+       if (spl_kmem_cache_shrinker == NULL)
+               GOTO(out, rc = -ENOMEM);
+#else
+       register_shrinker(&spl_kmem_cache_shrinker);
+#endif
+
+#ifdef DEBUG_KMEM
+       atomic64_set(&kmem_alloc_used, 0);
+       atomic64_set(&vmem_alloc_used, 0);
+
+       spl_kmem_init_tracking(&kmem_list, &kmem_lock, KMEM_TABLE_SIZE);
+       spl_kmem_init_tracking(&vmem_list, &vmem_lock, VMEM_TABLE_SIZE);
+#endif
+out:
+       RETURN(rc);
+}
+
  void
  spl_kmem_fini(void)
  {
@@ -1171,8 +1050,5 @@ spl_kmem_fini(void)
         unregister_shrinker(&spl_kmem_cache_shrinker);
  #endif
  
-       (void)kmem_cache_destroy(spl_obj_cache);
-       (void)kmem_cache_destroy(spl_slab_cache);
-
         EXIT;
  }
diff --git a/modules/spl/spl-proc.c b/modules/spl/spl-proc.c

index f2685f39bdf80f361cbb30d758ce8120b9a7fcc0..01983433d84a1137fa0cc7c80e74badb8d9c9a18 100644 (file)
--- a/modules/spl/spl-proc.c
+++ b/modules/spl/spl-proc.c
@@ -577,14 +577,10 @@ slab_seq_show(struct seq_file *f, void *p)
  
         spin_lock(&skc->skc_lock);
          seq_printf(f, "%-36s      ", skc->skc_name);
-        seq_printf(f, "%u %u %u - %u %u %u - "
-                  "%lu %lu %lu - %lu %lu %lu - %lu %lu %lu - %lu %lu\n",
+        seq_printf(f, "%u %u %u - %lu %lu %lu - %lu %lu %lu - %lu %lu %lu\n",
                    (unsigned)skc->skc_obj_size,
-                  (unsigned)skc->skc_chunk_size,
+                  (unsigned)skc->skc_slab_objs,
                    (unsigned)skc->skc_slab_size,
-                  (unsigned)skc->skc_hash_bits,
-                  (unsigned)skc->skc_hash_size,
-                  (unsigned)skc->skc_hash_elts,
                    (long unsigned)skc->skc_slab_fail,
                    (long unsigned)skc->skc_slab_create,
                    (long unsigned)skc->skc_slab_destroy,
@@ -593,9 +589,7 @@ slab_seq_show(struct seq_file *f, void *p)
                    (long unsigned)skc->skc_slab_max,
                    (long unsigned)skc->skc_obj_total,
                    (long unsigned)skc->skc_obj_alloc,
-                  (long unsigned)skc->skc_obj_max,
-                  (long unsigned)skc->skc_hash_depth,
-                  (long unsigned)skc->skc_hash_count);
+                  (long unsigned)skc->skc_obj_max);
  
         spin_unlock(&skc->skc_lock);
  
diff --git a/modules/splat/splat-kmem.c b/modules/splat/splat-kmem.c

index 49715152d8a4f11d93338ab8c1770f21bf01376e..af28c717c220c6ea529f94a3ffe026641576bd88 100644 (file)
--- a/modules/splat/splat-kmem.c
+++ b/modules/splat/splat-kmem.c
@@ -371,18 +371,40 @@ out_free:
         return rc;
  }
  
+/* Validate small object cache behavior for dynamic/kmem/vmem caches */
  static int
  splat_kmem_test5(struct file *file, void *arg)
  {
-       return splat_kmem_cache_size_test(file, arg, SPLAT_KMEM_TEST5_NAME,
-                                         sizeof(kmem_cache_data_t) * 1, 0);
+       char *name = SPLAT_KMEM_TEST5_NAME;
+       int rc;
+
+       rc = splat_kmem_cache_size_test(file, arg, name, 128, 0);
+       if (rc)
+               return rc;
+
+       rc = splat_kmem_cache_size_test(file, arg, name, 128, KMC_KMEM);
+       if (rc)
+               return rc;
+
+       return splat_kmem_cache_size_test(file, arg, name, 128, KMC_VMEM);
  }
  
+/* Validate large object cache behavior for dynamic/kmem/vmem caches */
  static int
  splat_kmem_test6(struct file *file, void *arg)
  {
-       return splat_kmem_cache_size_test(file, arg, SPLAT_KMEM_TEST6_NAME,
-                                         sizeof(kmem_cache_data_t) * 1024, 0);
+       char *name = SPLAT_KMEM_TEST6_NAME;
+       int rc;
+
+       rc = splat_kmem_cache_size_test(file, arg, name, 128 * 1024, 0);
+       if (rc)
+               return rc;
+
+       rc = splat_kmem_cache_size_test(file, arg, name, 128 * 1024, KMC_KMEM);
+       if (rc)
+               return rc;
+
+       return splat_kmem_cache_size_test(file, arg, name, 128 * 1028, KMC_VMEM);
  }
  
  static void
@@ -533,11 +555,12 @@ splat_kmem_test8_thread(void *arg)
         vmem_free(objs, count * sizeof(void *));
  out:
         spin_lock(&kcp->kcp_lock);
-       kcp->kcp_threads--;
         if (!kcp->kcp_rc)
                 kcp->kcp_rc = rc;
  
-        wake_up(&kcp->kcp_waitq);
+       if (--kcp->kcp_threads == 0)
+               wake_up(&kcp->kcp_waitq);
+
         spin_unlock(&kcp->kcp_lock);
  
          thread_exit();
@@ -573,7 +596,7 @@ splat_kmem_test8_sc(struct file *file, void *arg, int size, int count)
          splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%-22s  %s", "name",
                      "time (sec)\tslabs       \tobjs        \thash\n");
          splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%-22s  %s", "",
-                    "          \ttot/max/calc\ttot/max/calc\tsize/depth\n");
+                    "          \ttot/max/calc\ttot/max/calc\n");
  
         for (i = 1; i <= count; i *= 2) {
                 kcp.kcp_size = size;
@@ -611,7 +634,7 @@ splat_kmem_test8_sc(struct file *file, void *arg, int size, int count)
                 delta = timespec_sub(stop, start);
  
                 splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%-22s %2ld.%09ld\t"
-                            "%lu/%lu/%lu\t%lu/%lu/%lu\t%lu/%lu\n",
+                            "%lu/%lu/%lu\t%lu/%lu/%lu\n",
                              kcp.kcp_cache->skc_name,
                              delta.tv_sec, delta.tv_nsec,
                              (unsigned long)kcp.kcp_cache->skc_slab_total,
@@ -620,9 +643,7 @@ splat_kmem_test8_sc(struct file *file, void *arg, int size, int count)
                                             SPL_KMEM_CACHE_OBJ_PER_SLAB),
                              (unsigned long)kcp.kcp_cache->skc_obj_total,
                              (unsigned long)kcp.kcp_cache->skc_obj_max,
-                            (unsigned long)(kcp.kcp_alloc * threads),
-                            (unsigned long)kcp.kcp_cache->skc_hash_size,
-                            (unsigned long)kcp.kcp_cache->skc_hash_depth);
+                            (unsigned long)(kcp.kcp_alloc * threads));
  
                 kmem_cache_destroy(kcp.kcp_cache);
author	behlendo <behlendo@7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c>
	Tue, 1 Jul 2008 03:28:54 +0000 (03:28 +0000)
committer	behlendo <behlendo@7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c>
	Tue, 1 Jul 2008 03:28:54 +0000 (03:28 +0000)
include/sys/kmem.h		patch \| blob \| history
modules/spl/spl-kmem.c		patch \| blob \| history
modules/spl/spl-proc.c		patch \| blob \| history
modules/splat/splat-kmem.c		patch \| blob \| history