From: behlendo Date: Tue, 1 Jul 2008 03:28:54 +0000 (+0000) Subject: - Remove hash functionality from slab in favor of direct lookups X-Git-Tag: zfs-0.8.0-rc1~152^2~846 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a1502d76aef90d32ac779c1ae3cf89a80d9d591d;p=zfs - Remove hash functionality from slab in favor of direct lookups based of the spl_kmem_obj_t tacked on the end of each object. This actually isn't so back because we are now allocing large chunks for the slab and partitioning it ourselves. So there's not a ton of wasted space. We may suffer a performance hit however due to alignment issues. - Remove remaining depenancies on the linux slab implementation. We're standing on our own now for better or worse. - Rework slabs to be either kmem or vmem based. If neither KMC_VMEM of KMC_KMEM are specified we make a decent guess about what will work best for their based on the object size. Additionally we provide a kmem_virt() function caller can use to see if they have a virtual or physical address. - Minor fixups in the test suite. git-svn-id: https://outreach.scidac.gov/svn/spl/trunk@141 7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c --- diff --git a/include/sys/kmem.h b/include/sys/kmem.h index b90e3fdaf..9397caa0a 100644 --- a/include/sys/kmem.h +++ b/include/sys/kmem.h @@ -403,11 +403,14 @@ kmem_alloc_tryhard(size_t size, size_t *alloc_size, int kmflags) /* * Slab allocation interfaces */ -#undef KMC_NOTOUCH /* XXX: Unsupported */ -#define KMC_NODEBUG 0x00000000 /* Default behavior */ -#define KMC_NOMAGAZINE /* XXX: Unsupported */ -#define KMC_NOHASH /* XXX: Unsupported */ -#define KMC_QCACHE /* XXX: Unsupported */ +#define KMC_NOTOUCH 0x00000001 +#define KMC_NODEBUG 0x00000002 /* Default behavior */ +#define KMC_NOMAGAZINE 0x00000004 /* XXX: No disable support available */ +#define KMC_NOHASH 0x00000008 /* XXX: No hash available */ +#define KMC_QCACHE 0x00000010 /* XXX: Unsupported */ +#define KMC_KMEM 0x00000100 /* Use kmem cache */ +#define KMC_VMEM 0x00000200 /* Use vmem cache */ +#define KMC_OFFSLAB 0x00000400 /* Objects not on slab */ #define KMC_REAP_CHUNK 256 #define KMC_DEFAULT_SEEKS DEFAULT_SEEKS @@ -462,11 +465,6 @@ extern struct rw_semaphore spl_kmem_cache_sem; #define SKS_MAGIC 0x22222222 #define SKC_MAGIC 0x2c2c2c2c -#define SPL_KMEM_CACHE_HASH_BITS 12 -#define SPL_KMEM_CACHE_HASH_ELTS (1 << SPL_KMEM_CACHE_HASH_BITS) -#define SPL_KMEM_CACHE_HASH_SIZE (sizeof(struct hlist_head) * \ - SPL_KMEM_CACHE_HASH_ELTS) - #define SPL_KMEM_CACHE_DELAY 5 #define SPL_KMEM_CACHE_OBJ_PER_SLAB 32 @@ -488,7 +486,6 @@ typedef struct spl_kmem_obj { void *sko_addr; /* Buffer address */ struct spl_kmem_slab *sko_slab; /* Owned by slab */ struct list_head sko_list; /* Free object list linkage */ - struct hlist_node sko_hlist; /* Used object hash linkage */ } spl_kmem_obj_t; typedef struct spl_kmem_slab { @@ -515,14 +512,9 @@ typedef struct spl_kmem_cache { void *skc_vmp; /* Unused */ uint32_t skc_flags; /* Flags */ uint32_t skc_obj_size; /* Object size */ - uint32_t skc_chunk_size; /* sizeof(*obj) + alignment */ - uint32_t skc_slab_size; /* slab size */ - uint32_t skc_max_chunks; /* max chunks per slab */ + uint32_t skc_slab_objs; /* Objects per slab */ + uint32_t skc_slab_size; /* Slab size */ uint32_t skc_delay; /* slab reclaim interval */ - uint32_t skc_hash_bits; /* Hash table bits */ - uint32_t skc_hash_size; /* Hash table size */ - uint32_t skc_hash_elts; /* Hash table elements */ - struct hlist_head *skc_hash; /* Hash table address */ struct list_head skc_list; /* List of caches linkage */ struct list_head skc_complete_list;/* Completely alloc'ed */ struct list_head skc_partial_list; /* Partially alloc'ed */ @@ -536,8 +528,6 @@ typedef struct spl_kmem_cache { uint64_t skc_obj_total; /* Obj total current */ uint64_t skc_obj_alloc; /* Obj alloc current */ uint64_t skc_obj_max; /* Obj max historic */ - uint64_t skc_hash_depth; /* Lazy hash depth */ - uint64_t skc_hash_count; /* Hash entries current */ } spl_kmem_cache_t; extern spl_kmem_cache_t * @@ -561,6 +551,8 @@ void spl_kmem_fini(void); #define kmem_cache_free(skc, obj) spl_kmem_cache_free(skc, obj) #define kmem_cache_reap_now(skc) spl_kmem_cache_reap_now(skc) #define kmem_reap() spl_kmem_reap() +#define kmem_virt(ptr) (((ptr) >= (void *)VMALLOC_START) && \ + ((ptr) < (void *)VMALLOC_END)) #ifdef HAVE_KMEM_CACHE_CREATE_DTOR #define __kmem_cache_create(name, size, align, flags, ctor, dtor) \ diff --git a/modules/spl/spl-kmem.c b/modules/spl/spl-kmem.c index 369876108..3be038d5e 100644 --- a/modules/spl/spl-kmem.c +++ b/modules/spl/spl-kmem.c @@ -114,10 +114,6 @@ EXPORT_SYMBOL(kmem_set_warning); * shrink them via spl_slab_reclaim() when they are wasting lots * of space. Currently this process is driven by the reapers. * - * XXX: Implement a resizable used object hash. Currently the hash - * is statically sized for thousands of objects but it should - * grow based on observed worst case slab depth. - * * XXX: Improve the partial slab list by carefully maintaining a * strict ordering of fullest to emptiest slabs based on * the slab reference count. This gaurentees the when freeing @@ -134,20 +130,8 @@ EXPORT_SYMBOL(kmem_set_warning); * XXX: Proper hardware cache alignment would be good too. */ -/* Ensure the __kmem_cache_create/__kmem_cache_destroy macros are - * removed here to prevent a recursive substitution, we want to call - * the native linux version. - */ -#undef kmem_cache_t -#undef kmem_cache_create -#undef kmem_cache_destroy -#undef kmem_cache_alloc -#undef kmem_cache_free - struct list_head spl_kmem_cache_list; /* List of caches */ struct rw_semaphore spl_kmem_cache_sem; /* Cache list lock */ -static kmem_cache_t *spl_slab_cache; /* Cache for slab structs */ -static kmem_cache_t *spl_obj_cache; /* Cache for obj structs */ static int spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush); @@ -163,180 +147,119 @@ static struct shrinker spl_kmem_cache_shrinker = { }; #endif -static void -spl_slab_init(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks) -{ - sks->sks_magic = SKS_MAGIC; - sks->sks_objs = SPL_KMEM_CACHE_OBJ_PER_SLAB; - sks->sks_age = jiffies; - sks->sks_cache = skc; - INIT_LIST_HEAD(&sks->sks_list); - INIT_LIST_HEAD(&sks->sks_free_list); - sks->sks_ref = 0; -} - -static int -spl_slab_alloc_kmem(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks, int flags) +static void * +kv_alloc(spl_kmem_cache_t *skc, int size, int flags) { - spl_kmem_obj_t *sko, *n; - int i, rc = 0; - - /* This is based on the linux slab cache for now simply because - * it means I get slab coloring, hardware cache alignment, etc - * for free. There's no reason we can't do this ourselves. And - * we probably should at in the future. For now I'll just - * leverage the existing linux slab here. */ - for (i = 0; i < sks->sks_objs; i++) { - sko = kmem_cache_alloc(spl_obj_cache, flags); - if (sko == NULL) { - rc = -ENOMEM; - break; - } + void *ptr; - sko->sko_addr = kmem_alloc(skc->skc_obj_size, flags); - if (sko->sko_addr == NULL) { - kmem_cache_free(spl_obj_cache, sko); - rc = -ENOMEM; - break; - } - - sko->sko_magic = SKO_MAGIC; - sko->sko_slab = sks; - INIT_LIST_HEAD(&sko->sko_list); - INIT_HLIST_NODE(&sko->sko_hlist); - list_add(&sko->sko_list, &sks->sks_free_list); + if (skc->skc_flags & KMC_KMEM) { + if (size > (2 * PAGE_SIZE)) { + ptr = (void *)__get_free_pages(flags, get_order(size)); + } else + ptr = kmem_alloc(size, flags); + } else { + ptr = vmem_alloc(size, flags); } - /* Unable to fully construct slab, unwind everything */ - if (rc) { - list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list) { - ASSERT(sko->sko_magic == SKO_MAGIC); - kmem_free(sko->sko_addr, skc->skc_obj_size); - list_del(&sko->sko_list); - kmem_cache_free(spl_obj_cache, sko); - } - } + return ptr; +} - RETURN(rc); +static void +kv_free(spl_kmem_cache_t *skc, void *ptr, int size) +{ + if (skc->skc_flags & KMC_KMEM) { + if (size > (2 * PAGE_SIZE)) + free_pages((unsigned long)ptr, get_order(size)); + else + kmem_free(ptr, size); + } else { + vmem_free(ptr, size); + } } static spl_kmem_slab_t * -spl_slab_alloc_vmem(spl_kmem_cache_t *skc, int flags) +spl_slab_alloc(spl_kmem_cache_t *skc, int flags) { spl_kmem_slab_t *sks; - spl_kmem_obj_t *sko, *sko_base; - void *slab, *obj, *obj_base; - int i, size; - - /* For large vmem_alloc'ed buffers it's important that we pack the - * spl_kmem_obj_t structure and the actual objects in to one large - * virtual address zone to minimize the number of calls to - * vmalloc(). Mapping the virtual address in done under a single - * global lock which walks a list of all virtual zones. So doing - * lots of allocations simply results in lock contention and a - * longer list of mapped addresses. It is far better to do a - * few large allocations and then subdivide it ourselves. The - * large vmem_alloc'ed space is divied as follows: + spl_kmem_obj_t *sko, *n; + void *base, *obj; + int i, size, rc = 0; + + /* It's important that we pack the spl_kmem_obj_t structure + * and the actual objects in to one large address space + * to minimize the number of calls to the allocator. It + * is far better to do a few large allocations and then + * subdivide it ourselves. Now which allocator we use + * requires balancling a few trade offs. + * + * For small objects we use kmem_alloc() because as long + * as you are only requesting a small number of pages + * (ideally just one) its cheap. However, when you start + * requesting multiple pages kmem_alloc() get increasingly + * expensive since it requires contigeous pages. For this + * reason we shift to vmem_alloc() for slabs of large + * objects which removes the need for contigeous pages. + * We do not use vmem_alloc() in all cases because there + * is significant locking overhead in __get_vm_area_node(). + * This function takes a single global lock when aquiring + * an available virtual address range which serialize all + * vmem_alloc()'s for all slab caches. Using slightly + * different allocation functions for small and large + * objects should give us the best of both worlds. * - * 1 slab struct: sizeof(spl_kmem_slab_t) - * N obj structs: sizeof(spl_kmem_obj_t) * skc->skc_objs - * N objects: skc->skc_obj_size * skc->skc_objs + * sks struct: sizeof(spl_kmem_slab_t) + * obj data: skc->skc_obj_size + * obj struct: sizeof(spl_kmem_obj_t) + * * * XXX: It would probably be a good idea to more carefully - * align the starts of these objects in memory. + * align these data structures in memory. */ - size = sizeof(spl_kmem_slab_t) + SPL_KMEM_CACHE_OBJ_PER_SLAB * - (skc->skc_obj_size + sizeof(spl_kmem_obj_t)); - - slab = vmem_alloc(size, flags); - if (slab == NULL) + base = kv_alloc(skc, skc->skc_slab_size, flags); + if (base == NULL) RETURN(NULL); - sks = (spl_kmem_slab_t *)slab; - spl_slab_init(skc, sks); - - sko_base = (spl_kmem_obj_t *)(slab + sizeof(spl_kmem_slab_t)); - obj_base = (void *)sko_base + sizeof(spl_kmem_obj_t) * sks->sks_objs; + sks = (spl_kmem_slab_t *)base; + sks->sks_magic = SKS_MAGIC; + sks->sks_objs = skc->skc_slab_objs; + sks->sks_age = jiffies; + sks->sks_cache = skc; + INIT_LIST_HEAD(&sks->sks_list); + INIT_LIST_HEAD(&sks->sks_free_list); + sks->sks_ref = 0; + size = sizeof(spl_kmem_obj_t) + skc->skc_obj_size; for (i = 0; i < sks->sks_objs; i++) { - sko = &sko_base[i]; - obj = obj_base + skc->skc_obj_size * i; + if (skc->skc_flags & KMC_OFFSLAB) { + obj = kv_alloc(skc, size, flags); + if (!obj) + GOTO(out, rc = -ENOMEM); + } else { + obj = base + sizeof(spl_kmem_slab_t) + i * size; + } + + sko = obj + skc->skc_obj_size; sko->sko_addr = obj; sko->sko_magic = SKO_MAGIC; sko->sko_slab = sks; INIT_LIST_HEAD(&sko->sko_list); - INIT_HLIST_NODE(&sko->sko_hlist); list_add_tail(&sko->sko_list, &sks->sks_free_list); } - RETURN(sks); -} - -static spl_kmem_slab_t * -spl_slab_alloc(spl_kmem_cache_t *skc, int flags) { - spl_kmem_slab_t *sks; - spl_kmem_obj_t *sko; - int rc; - ENTRY; - - /* Objects less than a page can use kmem_alloc() and avoid - * the locking overhead in __get_vm_area_node() when locking - * for a free address. For objects over a page we use - * vmem_alloc() because it is usually worth paying this - * overhead to avoid the need to find contigeous pages. - * This should give us the best of both worlds. */ - if (skc->skc_obj_size <= PAGE_SIZE) { - sks = kmem_cache_alloc(spl_slab_cache, flags); - if (sks == NULL) - GOTO(out, sks = NULL); - - spl_slab_init(skc, sks); - - rc = spl_slab_alloc_kmem(skc, sks, flags); - if (rc) { - kmem_cache_free(spl_slab_cache, sks); - GOTO(out, sks = NULL); - } - } else { - sks = spl_slab_alloc_vmem(skc, flags); - if (sks == NULL) - GOTO(out, sks = NULL); - } - - ASSERT(sks); list_for_each_entry(sko, &sks->sks_free_list, sko_list) if (skc->skc_ctor) skc->skc_ctor(sko->sko_addr, skc->skc_private, flags); out: - RETURN(sks); -} - -static void -spl_slab_free_kmem(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks) -{ - spl_kmem_obj_t *sko, *n; - - ASSERT(skc->skc_magic == SKC_MAGIC); - ASSERT(sks->sks_magic == SKS_MAGIC); + if (rc) { + if (skc->skc_flags & KMC_OFFSLAB) + list_for_each_entry_safe(sko,n,&sks->sks_free_list,sko_list) + kv_free(skc, sko->sko_addr, size); - list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list) { - ASSERT(sko->sko_magic == SKO_MAGIC); - kmem_free(sko->sko_addr, skc->skc_obj_size); - list_del(&sko->sko_list); - kmem_cache_free(spl_obj_cache, sko); + kv_free(skc, base, skc->skc_slab_size); + sks = NULL; } - kmem_cache_free(spl_slab_cache, sks); -} - -static void -spl_slab_free_vmem(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks) -{ - ASSERT(skc->skc_magic == SKC_MAGIC); - ASSERT(sks->sks_magic == SKS_MAGIC); - - vmem_free(sks, SPL_KMEM_CACHE_OBJ_PER_SLAB * - (skc->skc_obj_size + sizeof(spl_kmem_obj_t))); + RETURN(sks); } /* Removes slab from complete or partial list, so it must @@ -346,6 +269,7 @@ static void spl_slab_free(spl_kmem_slab_t *sks) { spl_kmem_cache_t *skc; spl_kmem_obj_t *sko, *n; + int size; ENTRY; ASSERT(sks->sks_magic == SKS_MAGIC); @@ -358,17 +282,20 @@ spl_slab_free(spl_kmem_slab_t *sks) { skc->skc_obj_total -= sks->sks_objs; skc->skc_slab_total--; list_del(&sks->sks_list); + size = sizeof(spl_kmem_obj_t) + skc->skc_obj_size; /* Run destructors slab is being released */ - list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list) + list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list) { + ASSERT(sko->sko_magic == SKO_MAGIC); + if (skc->skc_dtor) skc->skc_dtor(sko->sko_addr, skc->skc_private); - if (skc->skc_obj_size <= PAGE_SIZE) - spl_slab_free_kmem(skc, sks); - else - spl_slab_free_vmem(skc, sks); + if (skc->skc_flags & KMC_OFFSLAB) + kv_free(skc, sko->sko_addr, size); + } + kv_free(skc, sks, skc->skc_slab_size); EXIT; } @@ -449,7 +376,8 @@ spl_magazine_alloc(spl_kmem_cache_t *skc, int node) skm->skm_avail = 0; skm->skm_size = skc->skc_mag_size; skm->skm_refill = skc->skc_mag_refill; - skm->skm_age = jiffies; + if (!(skc->skc_flags & KMC_NOTOUCH)) + skm->skm_age = jiffies; } RETURN(skm); @@ -511,9 +439,14 @@ spl_kmem_cache_create(char *name, size_t size, size_t align, void *priv, void *vmp, int flags) { spl_kmem_cache_t *skc; - int i, rc, kmem_flags = KM_SLEEP; + uint32_t slab_max, slab_size, slab_objs; + int rc, kmem_flags = KM_SLEEP; ENTRY; + ASSERTF(!(flags & KMC_NOMAGAZINE), "Bad KMC_NOMAGAZINE (%x)\n", flags); + ASSERTF(!(flags & KMC_NOHASH), "Bad KMC_NOHASH (%x)\n", flags); + ASSERTF(!(flags & KMC_QCACHE), "Bad KMC_QCACHE (%x)\n", flags); + /* We may be called when there is a non-zero preempt_count or * interrupts are disabled is which case we must not sleep. */ @@ -541,25 +474,8 @@ spl_kmem_cache_create(char *name, size_t size, size_t align, skc->skc_vmp = vmp; skc->skc_flags = flags; skc->skc_obj_size = size; - skc->skc_chunk_size = 0; /* XXX: Needed only when implementing */ - skc->skc_slab_size = 0; /* small slab object optimizations */ - skc->skc_max_chunks = 0; /* which are yet supported. */ skc->skc_delay = SPL_KMEM_CACHE_DELAY; - skc->skc_hash_bits = SPL_KMEM_CACHE_HASH_BITS; - skc->skc_hash_size = SPL_KMEM_CACHE_HASH_SIZE; - skc->skc_hash_elts = SPL_KMEM_CACHE_HASH_ELTS; - skc->skc_hash = (struct hlist_head *) - vmem_alloc(skc->skc_hash_size, kmem_flags); - if (skc->skc_hash == NULL) { - kmem_free(skc->skc_name, skc->skc_name_size); - kmem_free(skc, sizeof(*skc)); - RETURN(NULL); - } - - for (i = 0; i < skc->skc_hash_elts; i++) - INIT_HLIST_HEAD(&skc->skc_hash[i]); - INIT_LIST_HEAD(&skc->skc_list); INIT_LIST_HEAD(&skc->skc_complete_list); INIT_LIST_HEAD(&skc->skc_partial_list); @@ -573,12 +489,37 @@ spl_kmem_cache_create(char *name, size_t size, size_t align, skc->skc_obj_total = 0; skc->skc_obj_alloc = 0; skc->skc_obj_max = 0; - skc->skc_hash_depth = 0; - skc->skc_hash_count = 0; + + /* If none passed select a cache type based on object size */ + if (!(skc->skc_flags & (KMC_KMEM | KMC_VMEM))) { + if (skc->skc_obj_size < (PAGE_SIZE / 8)) { + skc->skc_flags |= KMC_KMEM; + } else { + skc->skc_flags |= KMC_VMEM; + } + } + + /* Size slabs properly so ensure they are not too large */ + slab_max = ((uint64_t)1 << (MAX_ORDER - 1)) * PAGE_SIZE; + if (skc->skc_flags & KMC_OFFSLAB) { + skc->skc_slab_objs = SPL_KMEM_CACHE_OBJ_PER_SLAB; + skc->skc_slab_size = sizeof(spl_kmem_slab_t); + ASSERT(skc->skc_obj_size < slab_max); + } else { + slab_objs = SPL_KMEM_CACHE_OBJ_PER_SLAB + 1; + + do { + slab_objs--; + slab_size = sizeof(spl_kmem_slab_t) + slab_objs * + (skc->skc_obj_size+sizeof(spl_kmem_obj_t)); + } while (slab_size > slab_max); + + skc->skc_slab_objs = slab_objs; + skc->skc_slab_size = slab_size; + } rc = spl_magazine_create(skc); if (rc) { - vmem_free(skc->skc_hash, skc->skc_hash_size); kmem_free(skc->skc_name, skc->skc_name_size); kmem_free(skc, sizeof(*skc)); RETURN(NULL); @@ -592,9 +533,6 @@ spl_kmem_cache_create(char *name, size_t size, size_t align, } EXPORT_SYMBOL(spl_kmem_cache_create); -/* The caller must ensure there are no racing calls to - * spl_kmem_cache_alloc() for this spl_kmem_cache_t. - */ void spl_kmem_cache_destroy(spl_kmem_cache_t *skc) { @@ -613,13 +551,15 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc) /* Validate there are no objects in use and free all the * spl_kmem_slab_t, spl_kmem_obj_t, and object buffers. */ ASSERT(list_empty(&skc->skc_complete_list)); - ASSERTF(skc->skc_hash_count == 0, "skc->skc_hash_count=%d\n", - skc->skc_hash_count); + ASSERT(skc->skc_slab_alloc == 0); + ASSERT(skc->skc_obj_alloc == 0); list_for_each_entry_safe(sks, m, &skc->skc_partial_list, sks_list) spl_slab_free(sks); - vmem_free(skc->skc_hash, skc->skc_hash_size); + ASSERT(skc->skc_slab_total == 0); + ASSERT(skc->skc_obj_total == 0); + kmem_free(skc->skc_name, skc->skc_name_size); spin_unlock(&skc->skc_lock); @@ -629,64 +569,25 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc) } EXPORT_SYMBOL(spl_kmem_cache_destroy); -/* The kernel provided hash_ptr() function behaves exceptionally badly - * when all the addresses are page aligned which is likely the case - * here. To avoid this issue shift off the low order non-random bits. - */ -static unsigned long -spl_hash_ptr(void *ptr, unsigned int bits) -{ - return hash_long((unsigned long)ptr >> PAGE_SHIFT, bits); -} - -static spl_kmem_obj_t * -spl_hash_obj(spl_kmem_cache_t *skc, void *obj) -{ - struct hlist_node *node; - spl_kmem_obj_t *sko = NULL; - unsigned long key = spl_hash_ptr(obj, skc->skc_hash_bits); - int i = 0; - - ASSERT(skc->skc_magic == SKC_MAGIC); - ASSERT(spin_is_locked(&skc->skc_lock)); - - hlist_for_each_entry(sko, node, &skc->skc_hash[key], sko_hlist) { - - if (unlikely((++i) > skc->skc_hash_depth)) - skc->skc_hash_depth = i; - - if (sko->sko_addr == obj) { - ASSERT(sko->sko_magic == SKO_MAGIC); - RETURN(sko); - } - } - - RETURN(NULL); -} - static void * spl_cache_obj(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks) { spl_kmem_obj_t *sko; - unsigned long key; ASSERT(skc->skc_magic == SKC_MAGIC); ASSERT(sks->sks_magic == SKS_MAGIC); ASSERT(spin_is_locked(&skc->skc_lock)); - sko = list_entry((&sks->sks_free_list)->next,spl_kmem_obj_t,sko_list); + sko = list_entry(sks->sks_free_list.next, spl_kmem_obj_t, sko_list); ASSERT(sko->sko_magic == SKO_MAGIC); ASSERT(sko->sko_addr != NULL); - /* Remove from sks_free_list and add to used hash */ + /* Remove from sks_free_list */ list_del_init(&sko->sko_list); - key = spl_hash_ptr(sko->sko_addr, skc->skc_hash_bits); - hlist_add_head(&sko->sko_hlist, &skc->skc_hash[key]); sks->sks_age = jiffies; sks->sks_ref++; skc->skc_obj_alloc++; - skc->skc_hash_count++; /* Track max obj usage statistics */ if (skc->skc_obj_alloc > skc->skc_obj_max) @@ -818,22 +719,17 @@ spl_cache_shrink(spl_kmem_cache_t *skc, void *obj) ASSERT(skc->skc_magic == SKC_MAGIC); ASSERT(spin_is_locked(&skc->skc_lock)); - sko = spl_hash_obj(skc, obj); - ASSERTF(sko, "Obj %p missing from in-use hash (%d/%d) for cache %s\n", - obj, skc->skc_hash_depth, skc->skc_hash_count, skc->skc_name); + sko = obj + skc->skc_obj_size; + ASSERT(sko->sko_magic == SKO_MAGIC); sks = sko->sko_slab; - ASSERTF(sks, "Obj %p/%p linked to invalid slab for cache %s\n", - obj, sko, skc->skc_name); - + ASSERT(sks->sks_magic == SKS_MAGIC); ASSERT(sks->sks_cache == skc); - hlist_del_init(&sko->sko_hlist); list_add(&sko->sko_list, &sks->sks_free_list); sks->sks_age = jiffies; sks->sks_ref--; skc->skc_obj_alloc--; - skc->skc_hash_count--; /* Move slab to skc_partial_list when no longer full. Slabs * are added to the head to keep the partial list is quasi-full @@ -906,7 +802,8 @@ restart: if (likely(skm->skm_avail)) { /* Object available in CPU cache, use it */ obj = skm->skm_objs[--skm->skm_avail]; - skm->skm_age = jiffies; + if (!(skc->skc_flags & KMC_NOTOUCH)) + skm->skm_age = jiffies; } else { /* Per-CPU cache empty, directly allocate from * the slab and refill the per-CPU cache. */ @@ -1012,71 +909,6 @@ spl_kmem_reap(void) } EXPORT_SYMBOL(spl_kmem_reap); -int -spl_kmem_init(void) -{ - int rc = 0; - ENTRY; - - init_rwsem(&spl_kmem_cache_sem); - INIT_LIST_HEAD(&spl_kmem_cache_list); - - spl_slab_cache = NULL; - spl_obj_cache = NULL; - - spl_slab_cache = __kmem_cache_create("spl_slab_cache", - sizeof(spl_kmem_slab_t), - 0, 0, NULL, NULL); - if (spl_slab_cache == NULL) - GOTO(out_cache, rc = -ENOMEM); - - spl_obj_cache = __kmem_cache_create("spl_obj_cache", - sizeof(spl_kmem_obj_t), - 0, 0, NULL, NULL); - if (spl_obj_cache == NULL) - GOTO(out_cache, rc = -ENOMEM); - -#ifdef HAVE_SET_SHRINKER - spl_kmem_cache_shrinker = set_shrinker(KMC_DEFAULT_SEEKS, - spl_kmem_cache_generic_shrinker); - if (spl_kmem_cache_shrinker == NULL) - GOTO(out_cache, rc = -ENOMEM); -#else - register_shrinker(&spl_kmem_cache_shrinker); -#endif - -#ifdef DEBUG_KMEM - atomic64_set(&kmem_alloc_used, 0); - atomic64_set(&vmem_alloc_used, 0); - -#ifdef DEBUG_KMEM_TRACKING - { int i; - spin_lock_init(&kmem_lock); - INIT_LIST_HEAD(&kmem_list); - - for (i = 0; i < KMEM_TABLE_SIZE; i++) - INIT_HLIST_HEAD(&kmem_table[i]); - - spin_lock_init(&vmem_lock); - INIT_LIST_HEAD(&vmem_list); - - for (i = 0; i < VMEM_TABLE_SIZE; i++) - INIT_HLIST_HEAD(&vmem_table[i]); - } -#endif -#endif - RETURN(rc); - -out_cache: - if (spl_obj_cache) - (void)kmem_cache_destroy(spl_obj_cache); - - if (spl_slab_cache) - (void)kmem_cache_destroy(spl_slab_cache); - - RETURN(rc); -} - #if defined(DEBUG_KMEM) && defined(DEBUG_KMEM_TRACKING) static char * spl_sprintf_addr(kmem_debug_t *kd, char *str, int len, int min) @@ -1119,12 +951,28 @@ spl_sprintf_addr(kmem_debug_t *kd, char *str, int len, int min) return str; } +static int +spl_kmem_init_tracking(struct list_head *list, spinlock_t *lock, int size) +{ + int i; + ENTRY; + + spin_lock_init(lock); + INIT_LIST_HEAD(list); + + for (i = 0; i < size; i++) + INIT_HLIST_HEAD(&kmem_table[i]); + + RETURN(0); +} + static void spl_kmem_fini_tracking(struct list_head *list, spinlock_t *lock) { unsigned long flags; kmem_debug_t *kd; char str[17]; + ENTRY; spin_lock_irqsave(lock, flags); if (!list_empty(list)) @@ -1138,11 +986,42 @@ spl_kmem_fini_tracking(struct list_head *list, spinlock_t *lock) kd->kd_func, kd->kd_line); spin_unlock_irqrestore(lock, flags); + EXIT; } #else /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */ +#define spl_kmem_init_tracking(list, lock, size) #define spl_kmem_fini_tracking(list, lock) #endif /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */ +int +spl_kmem_init(void) +{ + int rc = 0; + ENTRY; + + init_rwsem(&spl_kmem_cache_sem); + INIT_LIST_HEAD(&spl_kmem_cache_list); + +#ifdef HAVE_SET_SHRINKER + spl_kmem_cache_shrinker = set_shrinker(KMC_DEFAULT_SEEKS, + spl_kmem_cache_generic_shrinker); + if (spl_kmem_cache_shrinker == NULL) + GOTO(out, rc = -ENOMEM); +#else + register_shrinker(&spl_kmem_cache_shrinker); +#endif + +#ifdef DEBUG_KMEM + atomic64_set(&kmem_alloc_used, 0); + atomic64_set(&vmem_alloc_used, 0); + + spl_kmem_init_tracking(&kmem_list, &kmem_lock, KMEM_TABLE_SIZE); + spl_kmem_init_tracking(&vmem_list, &vmem_lock, VMEM_TABLE_SIZE); +#endif +out: + RETURN(rc); +} + void spl_kmem_fini(void) { @@ -1171,8 +1050,5 @@ spl_kmem_fini(void) unregister_shrinker(&spl_kmem_cache_shrinker); #endif - (void)kmem_cache_destroy(spl_obj_cache); - (void)kmem_cache_destroy(spl_slab_cache); - EXIT; } diff --git a/modules/spl/spl-proc.c b/modules/spl/spl-proc.c index f2685f39b..01983433d 100644 --- a/modules/spl/spl-proc.c +++ b/modules/spl/spl-proc.c @@ -577,14 +577,10 @@ slab_seq_show(struct seq_file *f, void *p) spin_lock(&skc->skc_lock); seq_printf(f, "%-36s ", skc->skc_name); - seq_printf(f, "%u %u %u - %u %u %u - " - "%lu %lu %lu - %lu %lu %lu - %lu %lu %lu - %lu %lu\n", + seq_printf(f, "%u %u %u - %lu %lu %lu - %lu %lu %lu - %lu %lu %lu\n", (unsigned)skc->skc_obj_size, - (unsigned)skc->skc_chunk_size, + (unsigned)skc->skc_slab_objs, (unsigned)skc->skc_slab_size, - (unsigned)skc->skc_hash_bits, - (unsigned)skc->skc_hash_size, - (unsigned)skc->skc_hash_elts, (long unsigned)skc->skc_slab_fail, (long unsigned)skc->skc_slab_create, (long unsigned)skc->skc_slab_destroy, @@ -593,9 +589,7 @@ slab_seq_show(struct seq_file *f, void *p) (long unsigned)skc->skc_slab_max, (long unsigned)skc->skc_obj_total, (long unsigned)skc->skc_obj_alloc, - (long unsigned)skc->skc_obj_max, - (long unsigned)skc->skc_hash_depth, - (long unsigned)skc->skc_hash_count); + (long unsigned)skc->skc_obj_max); spin_unlock(&skc->skc_lock); diff --git a/modules/splat/splat-kmem.c b/modules/splat/splat-kmem.c index 49715152d..af28c717c 100644 --- a/modules/splat/splat-kmem.c +++ b/modules/splat/splat-kmem.c @@ -371,18 +371,40 @@ out_free: return rc; } +/* Validate small object cache behavior for dynamic/kmem/vmem caches */ static int splat_kmem_test5(struct file *file, void *arg) { - return splat_kmem_cache_size_test(file, arg, SPLAT_KMEM_TEST5_NAME, - sizeof(kmem_cache_data_t) * 1, 0); + char *name = SPLAT_KMEM_TEST5_NAME; + int rc; + + rc = splat_kmem_cache_size_test(file, arg, name, 128, 0); + if (rc) + return rc; + + rc = splat_kmem_cache_size_test(file, arg, name, 128, KMC_KMEM); + if (rc) + return rc; + + return splat_kmem_cache_size_test(file, arg, name, 128, KMC_VMEM); } +/* Validate large object cache behavior for dynamic/kmem/vmem caches */ static int splat_kmem_test6(struct file *file, void *arg) { - return splat_kmem_cache_size_test(file, arg, SPLAT_KMEM_TEST6_NAME, - sizeof(kmem_cache_data_t) * 1024, 0); + char *name = SPLAT_KMEM_TEST6_NAME; + int rc; + + rc = splat_kmem_cache_size_test(file, arg, name, 128 * 1024, 0); + if (rc) + return rc; + + rc = splat_kmem_cache_size_test(file, arg, name, 128 * 1024, KMC_KMEM); + if (rc) + return rc; + + return splat_kmem_cache_size_test(file, arg, name, 128 * 1028, KMC_VMEM); } static void @@ -533,11 +555,12 @@ splat_kmem_test8_thread(void *arg) vmem_free(objs, count * sizeof(void *)); out: spin_lock(&kcp->kcp_lock); - kcp->kcp_threads--; if (!kcp->kcp_rc) kcp->kcp_rc = rc; - wake_up(&kcp->kcp_waitq); + if (--kcp->kcp_threads == 0) + wake_up(&kcp->kcp_waitq); + spin_unlock(&kcp->kcp_lock); thread_exit(); @@ -573,7 +596,7 @@ splat_kmem_test8_sc(struct file *file, void *arg, int size, int count) splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%-22s %s", "name", "time (sec)\tslabs \tobjs \thash\n"); splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%-22s %s", "", - " \ttot/max/calc\ttot/max/calc\tsize/depth\n"); + " \ttot/max/calc\ttot/max/calc\n"); for (i = 1; i <= count; i *= 2) { kcp.kcp_size = size; @@ -611,7 +634,7 @@ splat_kmem_test8_sc(struct file *file, void *arg, int size, int count) delta = timespec_sub(stop, start); splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%-22s %2ld.%09ld\t" - "%lu/%lu/%lu\t%lu/%lu/%lu\t%lu/%lu\n", + "%lu/%lu/%lu\t%lu/%lu/%lu\n", kcp.kcp_cache->skc_name, delta.tv_sec, delta.tv_nsec, (unsigned long)kcp.kcp_cache->skc_slab_total, @@ -620,9 +643,7 @@ splat_kmem_test8_sc(struct file *file, void *arg, int size, int count) SPL_KMEM_CACHE_OBJ_PER_SLAB), (unsigned long)kcp.kcp_cache->skc_obj_total, (unsigned long)kcp.kcp_cache->skc_obj_max, - (unsigned long)(kcp.kcp_alloc * threads), - (unsigned long)kcp.kcp_cache->skc_hash_size, - (unsigned long)kcp.kcp_cache->skc_hash_depth); + (unsigned long)(kcp.kcp_alloc * threads)); kmem_cache_destroy(kcp.kcp_cache);