From ea3e6ca9e595ebfba82b964ee2eaf1ddd7076f0f Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 30 Jan 2009 20:54:49 -0800 Subject: [PATCH] kmem_cache hardening and performance improvements - Added slab work queue task which gradually ages and free's slabs from the cache which have not been used recently. - Optimized slab packing algorithm to ensure each slab contains the maximum number of objects without create to large a slab. - Fix deadlock, we can never call kv_free() under the skc_lock. We now unlink the objects and slabs from the cache itself and attach them to a private work list. The contents of the list are then subsequently freed outside the spin lock. - Move magazine create/destroy operation on to local cpu. - Further performace optimizations by minimize the usage of the large per-cache skc_lock. This includes the addition of KMC_BIT_REAPING bit mask which is used to prevent concurrent reaping, and to defer new slab creation when reaping is occuring. - Add KMC_BIT_DESTROYING bit mask which is set when the cache is being destroyed, this is used to catch any task accessing the cache while it is being destroyed. - Add comments to all the functions and additional comments to try and make everything as clear as possible. - Major cleanup and additions to the SPLAT kmem tests to more rigerously stress the cache implementation and look for any problems. This includes correctness and performance tests. - Updated portable work queue interfaces --- include/sys/kmem.h | 73 ++- include/sys/sysmacros.h | 12 - include/sys/vmsystm.h | 3 +- module/spl/spl-kmem.c | 465 +++++++++++----- module/splat/splat-internal.h | 1 + module/splat/splat-kmem.c | 967 +++++++++++++++++++++++----------- 6 files changed, 1025 insertions(+), 496 deletions(-) diff --git a/include/sys/kmem.h b/include/sys/kmem.h index ef58763..4f939e0 100644 --- a/include/sys/kmem.h +++ b/include/sys/kmem.h @@ -45,6 +45,7 @@ extern "C" { #include #include #include +#include /* * Memory allocation interfaces @@ -161,17 +162,32 @@ kmem_alloc_tryhard(size_t size, size_t *alloc_size, int kmflags) /* * Slab allocation interfaces */ -#define KMC_NOTOUCH 0x00000001 -#define KMC_NODEBUG 0x00000002 /* Default behavior */ -#define KMC_NOMAGAZINE 0x00000004 /* XXX: No disable support available */ -#define KMC_NOHASH 0x00000008 /* XXX: No hash available */ -#define KMC_QCACHE 0x00000010 /* XXX: Unsupported */ -#define KMC_KMEM 0x00000100 /* Use kmem cache */ -#define KMC_VMEM 0x00000200 /* Use vmem cache */ -#define KMC_OFFSLAB 0x00000400 /* Objects not on slab */ - -#define KMC_REAP_CHUNK 256 -#define KMC_DEFAULT_SEEKS DEFAULT_SEEKS +enum { + KMC_BIT_NOTOUCH = 0, /* Don't update ages */ + KMC_BIT_NODEBUG = 1, /* Default behavior */ + KMC_BIT_NOMAGAZINE = 2, /* XXX: Unsupported */ + KMC_BIT_NOHASH = 3, /* XXX: Unsupported */ + KMC_BIT_QCACHE = 4, /* XXX: Unsupported */ + KMC_BIT_KMEM = 5, /* Use kmem cache */ + KMC_BIT_VMEM = 6, /* Use vmem cache */ + KMC_BIT_OFFSLAB = 7, /* Objects not on slab */ + KMC_BIT_REAPING = 16, /* Reaping in progress */ + KMC_BIT_DESTROY = 17, /* Destroy in progress */ +}; + +#define KMC_NOTOUCH (1 << KMC_BIT_NOTOUCH) +#define KMC_NODEBUG (1 << KMC_BIT_NODEBUG) +#define KMC_NOMAGAZINE (1 << KMC_BIT_NOMAGAZINE) +#define KMC_NOHASH (1 << KMC_BIT_NOHASH) +#define KMC_QCACHE (1 << KMC_BIT_QCACHE) +#define KMC_KMEM (1 << KMC_BIT_KMEM) +#define KMC_VMEM (1 << KMC_BIT_VMEM) +#define KMC_OFFSLAB (1 << KMC_BIT_OFFSLAB) +#define KMC_REAPING (1 << KMC_BIT_REAPING) +#define KMC_DESTROY (1 << KMC_BIT_DESTROY) + +#define KMC_REAP_CHUNK INT_MAX +#define KMC_DEFAULT_SEEKS 1 #ifdef DEBUG_KMEM_UNIMPLEMENTED static __inline__ void kmem_init(void) { @@ -223,9 +239,10 @@ extern struct rw_semaphore spl_kmem_cache_sem; #define SKS_MAGIC 0x22222222 #define SKC_MAGIC 0x2c2c2c2c -#define SPL_KMEM_CACHE_DELAY 5 -#define SPL_KMEM_CACHE_OBJ_PER_SLAB 32 -#define SPL_KMEM_CACHE_ALIGN 8 +#define SPL_KMEM_CACHE_DELAY 5 /* Minimum slab release age */ +#define SPL_KMEM_CACHE_OBJ_PER_SLAB 32 /* Target objects per slab */ +#define SPL_KMEM_CACHE_OBJ_PER_SLAB_MIN 8 /* Minimum objects per slab */ +#define SPL_KMEM_CACHE_ALIGN 8 /* Default object alignment */ typedef int (*spl_kmem_ctor_t)(void *, void *, int); typedef void (*spl_kmem_dtor_t)(void *, void *); @@ -258,24 +275,28 @@ typedef struct spl_kmem_slab { } spl_kmem_slab_t; typedef struct spl_kmem_cache { - uint32_t skc_magic; /* Sanity magic */ - uint32_t skc_name_size; /* Name length */ - char *skc_name; /* Name string */ + uint32_t skc_magic; /* Sanity magic */ + uint32_t skc_name_size; /* Name length */ + char *skc_name; /* Name string */ spl_kmem_magazine_t *skc_mag[NR_CPUS]; /* Per-CPU warm cache */ uint32_t skc_mag_size; /* Magazine size */ uint32_t skc_mag_refill; /* Magazine refill count */ - spl_kmem_ctor_t skc_ctor; /* Constructor */ - spl_kmem_dtor_t skc_dtor; /* Destructor */ - spl_kmem_reclaim_t skc_reclaim; /* Reclaimator */ - void *skc_private; /* Private data */ - void *skc_vmp; /* Unused */ + spl_kmem_ctor_t skc_ctor; /* Constructor */ + spl_kmem_dtor_t skc_dtor; /* Destructor */ + spl_kmem_reclaim_t skc_reclaim; /* Reclaimator */ + void *skc_private; /* Private data */ + void *skc_vmp; /* Unused */ uint32_t skc_flags; /* Flags */ uint32_t skc_obj_size; /* Object size */ uint32_t skc_obj_align; /* Object alignment */ uint32_t skc_slab_objs; /* Objects per slab */ - uint32_t skc_slab_size; /* Slab size */ - uint32_t skc_delay; /* slab reclaim interval */ - struct list_head skc_list; /* List of caches linkage */ + uint32_t skc_slab_size; /* Slab size */ + uint32_t skc_delay; /* Slab reclaim interval */ + atomic_t skc_ref; /* Ref count callers */ + struct delayed_work skc_work; /* Slab reclaim work */ + struct work_struct work; + struct timer_list timer; + struct list_head skc_list; /* List of caches linkage */ struct list_head skc_complete_list;/* Completely alloc'ed */ struct list_head skc_partial_list; /* Partially alloc'ed */ spinlock_t skc_lock; /* Cache lock */ @@ -283,7 +304,7 @@ typedef struct spl_kmem_cache { uint64_t skc_slab_create;/* Slab creates */ uint64_t skc_slab_destroy;/* Slab destroys */ uint64_t skc_slab_total; /* Slab total current */ - uint64_t skc_slab_alloc; /* Slab alloc current */ + uint64_t skc_slab_alloc; /* Slab alloc current */ uint64_t skc_slab_max; /* Slab max historic */ uint64_t skc_obj_total; /* Obj total current */ uint64_t skc_obj_alloc; /* Obj alloc current */ diff --git a/include/sys/sysmacros.h b/include/sys/sysmacros.h index 94ff3f8..b828123 100644 --- a/include/sys/sysmacros.h +++ b/include/sys/sysmacros.h @@ -203,18 +203,6 @@ extern int ddi_strtoul(const char *str, char **nptr, #define offsetof(s, m) ((size_t)(&(((s *)0)->m))) #endif -#ifdef HAVE_3ARGS_INIT_WORK - -#define spl_init_work(wq,cb,d) INIT_WORK((wq), (void *)(cb), (void *)(d)) -#define spl_get_work_data(type,field,data) (data) - -#else - -#define spl_init_work(wq,cb,d) INIT_WORK((wq), (void *)(cb)); -#define spl_get_work_data(type,field,data) container_of(data,type,field) - -#endif - #ifdef __cplusplus } #endif diff --git a/include/sys/vmsystm.h b/include/sys/vmsystm.h index e92c17b..1cb716f 100644 --- a/include/sys/vmsystm.h +++ b/include/sys/vmsystm.h @@ -35,8 +35,7 @@ extern vmem_t *zio_alloc_arena; /* arena for zio caches */ #define physmem num_physpages -#define freemem nr_free_pages() // Expensive on linux, - // cheap on solaris +#define freemem nr_free_pages() #define minfree 0 #define needfree 0 /* # of needed pages */ #define ptob(pages) (pages * PAGE_SIZE) diff --git a/module/spl/spl-kmem.c b/module/spl/spl-kmem.c index a68f8ef..83eefe2 100644 --- a/module/spl/spl-kmem.c +++ b/module/spl/spl-kmem.c @@ -132,10 +132,6 @@ EXPORT_SYMBOL(kmem_set_warning); * small virtual address space on 32bit arches. This will seriously * constrain the size of the slab caches and their performance. * - * XXX: Implement work requests to keep an eye on each cache and - * shrink them via spl_slab_reclaim() when they are wasting lots - * of space. Currently this process is driven by the reapers. - * * XXX: Improve the partial slab list by carefully maintaining a * strict ordering of fullest to emptiest slabs based on * the slab reference count. This gaurentees the when freeing @@ -571,7 +567,8 @@ kv_free(spl_kmem_cache_t *skc, void *ptr, int size) } } -/* It's important that we pack the spl_kmem_obj_t structure and the +/* + * It's important that we pack the spl_kmem_obj_t structure and the * actual objects in to one large address space to minimize the number * of calls to the allocator. It is far better to do a few large * allocations and then subdivide it ourselves. Now which allocator @@ -662,14 +659,17 @@ out: RETURN(sks); } -/* Removes slab from complete or partial list, so it must - * be called with the 'skc->skc_lock' held. +/* + * Remove a slab from complete or partial list, it must be called with + * the 'skc->skc_lock' held but the actual free must be performed + * outside the lock to prevent deadlocking on vmem addresses. */ static void -spl_slab_free(spl_kmem_slab_t *sks) { +spl_slab_free(spl_kmem_slab_t *sks, + struct list_head *sks_list, struct list_head *sko_list) +{ spl_kmem_cache_t *skc; spl_kmem_obj_t *sko, *n; - int size; ENTRY; ASSERT(sks->sks_magic == SKS_MAGIC); @@ -682,114 +682,190 @@ spl_slab_free(spl_kmem_slab_t *sks) { skc->skc_obj_total -= sks->sks_objs; skc->skc_slab_total--; list_del(&sks->sks_list); - size = P2ROUNDUP(skc->skc_obj_size, skc->skc_obj_align) + - P2ROUNDUP(sizeof(spl_kmem_obj_t), skc->skc_obj_align); /* Run destructors slab is being released */ list_for_each_entry_safe(sko, n, &sks->sks_free_list, sko_list) { ASSERT(sko->sko_magic == SKO_MAGIC); + list_del(&sko->sko_list); if (skc->skc_dtor) skc->skc_dtor(sko->sko_addr, skc->skc_private); if (skc->skc_flags & KMC_OFFSLAB) - kv_free(skc, sko->sko_addr, size); + list_add(&sko->sko_list, sko_list); } - kv_free(skc, sks, skc->skc_slab_size); + list_add(&sks->sks_list, sks_list); EXIT; } -static int -__spl_slab_reclaim(spl_kmem_cache_t *skc) +/* + * Traverses all the partial slabs attached to a cache and free those + * which which are currently empty, and have not been touched for + * skc_delay seconds. This is to avoid thrashing. + */ +static void +spl_slab_reclaim(spl_kmem_cache_t *skc, int flag) { spl_kmem_slab_t *sks, *m; - int rc = 0; + spl_kmem_obj_t *sko, *n; + LIST_HEAD(sks_list); + LIST_HEAD(sko_list); + int size; ENTRY; - ASSERT(spin_is_locked(&skc->skc_lock)); /* - * Free empty slabs which have not been touched in skc_delay - * seconds. This delay time is important to avoid thrashing. - * Empty slabs will be at the end of the skc_partial_list. + * Move empty slabs and objects which have not been touched in + * skc_delay seconds on to private lists to be freed outside + * the spin lock. This delay time is important to avoid + * thrashing however when flag is set the delay will not be + * used. Empty slabs will be at the end of the skc_partial_list. */ + spin_lock(&skc->skc_lock); list_for_each_entry_safe_reverse(sks, m, &skc->skc_partial_list, sks_list) { if (sks->sks_ref > 0) break; - if (time_after(jiffies, sks->sks_age + skc->skc_delay * HZ)) { - spl_slab_free(sks); - rc++; - } + if (flag || time_after(jiffies,sks->sks_age+skc->skc_delay*HZ)) + spl_slab_free(sks, &sks_list, &sko_list); } + spin_unlock(&skc->skc_lock); - /* Returns number of slabs reclaimed */ - RETURN(rc); + /* + * We only have list of spl_kmem_obj_t's if they are located off + * the slab, otherwise they get feed with the spl_kmem_slab_t. + */ + if (!list_empty(&sko_list)) { + ASSERT(skc->skc_flags & KMC_OFFSLAB); + + size = P2ROUNDUP(skc->skc_obj_size, skc->skc_obj_align) + + P2ROUNDUP(sizeof(spl_kmem_obj_t), skc->skc_obj_align); + + list_for_each_entry_safe(sko, n, &sko_list, sko_list) + kv_free(skc, sko->sko_addr, size); + } + + list_for_each_entry_safe(sks, m, &sks_list, sks_list) + kv_free(skc, sks, skc->skc_slab_size); + + EXIT; } -static int -spl_slab_reclaim(spl_kmem_cache_t *skc) +/* + * Called regularly on all caches to age objects out of the magazines + * which have not been access in skc->skc_delay seconds. This prevents + * idle magazines from holding memory which might be better used by + * other caches or parts of the system. The delay is present to + * prevent thrashing the magazine. + */ +static void +spl_magazine_age(void *data) { - int rc; - ENTRY; + spl_kmem_cache_t *skc = data; + spl_kmem_magazine_t *skm = skc->skc_mag[smp_processor_id()]; - spin_lock(&skc->skc_lock); - rc = __spl_slab_reclaim(skc); - spin_unlock(&skc->skc_lock); + if (skm->skm_avail > 0 && + time_after(jiffies, skm->skm_age + skc->skc_delay * HZ)) + (void)spl_cache_flush(skc, skm, skm->skm_refill); +} - RETURN(rc); +/* + * Called regularly to keep a downward pressure on the size of idle + * magazines and to release free slabs from the cache. This function + * never calls the registered reclaim function, that only occures + * under memory pressure or with a direct call to spl_kmem_reap(). + */ +static void +spl_cache_age(void *data) +{ + spl_kmem_cache_t *skc = + spl_get_work_data(data, spl_kmem_cache_t, skc_work.work); + + ASSERT(skc->skc_magic == SKC_MAGIC); + on_each_cpu(spl_magazine_age, skc, 0, 1); + spl_slab_reclaim(skc, 0); + + if (!test_bit(KMC_BIT_DESTROY, &skc->skc_flags)) + schedule_delayed_work(&skc->skc_work, 2 * skc->skc_delay * HZ); } -/* Size slabs properly to ensure they are not too large */ +/* + * Size a slab based on the size of each aliged object plus spl_kmem_obj_t. + * When on-slab we want to target SPL_KMEM_CACHE_OBJ_PER_SLAB. However, + * for very small objects we may end up with more than this so as not + * to waste space in the minimal allocation of a single page. Also for + * very large objects we may use as few as SPL_KMEM_CACHE_OBJ_PER_SLAB_MIN, + * lower than this and we will fail. + */ static int spl_slab_size(spl_kmem_cache_t *skc, uint32_t *objs, uint32_t *size) { - int max = ((uint64_t)1 << (MAX_ORDER - 1)) * PAGE_SIZE; - int align = skc->skc_obj_align; - - *objs = SPL_KMEM_CACHE_OBJ_PER_SLAB; + int sks_size, obj_size, max_size, align; if (skc->skc_flags & KMC_OFFSLAB) { + *objs = SPL_KMEM_CACHE_OBJ_PER_SLAB; *size = sizeof(spl_kmem_slab_t); } else { -resize: - *size = P2ROUNDUP(sizeof(spl_kmem_slab_t), align) + - *objs * (P2ROUNDUP(skc->skc_obj_size, align) + - P2ROUNDUP(sizeof(spl_kmem_obj_t), align)); + align = skc->skc_obj_align; + sks_size = P2ROUNDUP(sizeof(spl_kmem_slab_t), align); + obj_size = P2ROUNDUP(skc->skc_obj_size, align) + + P2ROUNDUP(sizeof(spl_kmem_obj_t), align); + + if (skc->skc_flags & KMC_KMEM) + max_size = ((uint64_t)1 << (MAX_ORDER-1)) * PAGE_SIZE; + else + max_size = (32 * 1024 * 1024); - if (*size > max) - GOTO(resize, *objs = *objs - 1); + for (*size = PAGE_SIZE; *size <= max_size; *size += PAGE_SIZE) { + *objs = (*size - sks_size) / obj_size; + if (*objs >= SPL_KMEM_CACHE_OBJ_PER_SLAB) + RETURN(0); + } - ASSERT(*objs > 0); + /* + * Unable to satisfy target objets per slab, fallback to + * allocating a maximally sized slab and assuming it can + * contain the minimum objects count use it. If not fail. + */ + *size = max_size; + *objs = (*size - sks_size) / obj_size; + if (*objs >= SPL_KMEM_CACHE_OBJ_PER_SLAB_MIN) + RETURN(0); } - ASSERTF(*size <= max, "%d < %d\n", *size, max); - RETURN(0); + RETURN(-ENOSPC); } +/* + * Make a guess at reasonable per-cpu magazine size based on the size of + * each object and the cost of caching N of them in each magazine. Long + * term this should really adapt based on an observed usage heuristic. + */ static int spl_magazine_size(spl_kmem_cache_t *skc) { int size, align = skc->skc_obj_align; ENTRY; - /* Guesses for reasonable magazine sizes, they - * should really adapt based on observed usage. */ + /* Per-magazine sizes below assume a 4Kib page size */ if (P2ROUNDUP(skc->skc_obj_size, align) > (PAGE_SIZE * 256)) - size = 4; + size = 4; /* Minimum 4Mib per-magazine */ else if (P2ROUNDUP(skc->skc_obj_size, align) > (PAGE_SIZE * 32)) - size = 16; + size = 16; /* Minimum 2Mib per-magazine */ else if (P2ROUNDUP(skc->skc_obj_size, align) > (PAGE_SIZE)) - size = 64; + size = 64; /* Minimum 256Kib per-magazine */ else if (P2ROUNDUP(skc->skc_obj_size, align) > (PAGE_SIZE / 4)) - size = 128; + size = 128; /* Minimum 128Kib per-magazine */ else - size = 512; + size = 256; RETURN(size); } +/* + * Allocate a per-cpu magazine to assoicate with a specific core. + */ static spl_kmem_magazine_t * spl_magazine_alloc(spl_kmem_cache_t *skc, int node) { @@ -798,19 +874,21 @@ spl_magazine_alloc(spl_kmem_cache_t *skc, int node) sizeof(void *) * skc->skc_mag_size; ENTRY; - skm = kmem_alloc_node(size, GFP_KERNEL, node); + skm = kmem_alloc_node(size, GFP_KERNEL | __GFP_NOFAIL, node); if (skm) { skm->skm_magic = SKM_MAGIC; skm->skm_avail = 0; skm->skm_size = skc->skc_mag_size; skm->skm_refill = skc->skc_mag_refill; - if (!(skc->skc_flags & KMC_NOTOUCH)) - skm->skm_age = jiffies; + skm->skm_age = jiffies; } RETURN(skm); } +/* + * Free a per-cpu magazine assoicated with a specific core. + */ static void spl_magazine_free(spl_kmem_magazine_t *skm) { @@ -825,44 +903,72 @@ spl_magazine_free(spl_kmem_magazine_t *skm) EXIT; } +static void +__spl_magazine_create(void *data) +{ + spl_kmem_cache_t *skc = data; + int id = smp_processor_id(); + + skc->skc_mag[id] = spl_magazine_alloc(skc, cpu_to_node(id)); + ASSERT(skc->skc_mag[id]); +} + +/* + * Create all pre-cpu magazines of reasonable sizes. + */ static int spl_magazine_create(spl_kmem_cache_t *skc) { - int i; ENTRY; skc->skc_mag_size = spl_magazine_size(skc); - skc->skc_mag_refill = (skc->skc_mag_size + 1) / 2; + skc->skc_mag_refill = (skc->skc_mag_size + 1) / 2; + on_each_cpu(__spl_magazine_create, skc, 0, 1); - for_each_online_cpu(i) { - skc->skc_mag[i] = spl_magazine_alloc(skc, cpu_to_node(i)); - if (!skc->skc_mag[i]) { - for (i--; i >= 0; i--) - spl_magazine_free(skc->skc_mag[i]); + RETURN(0); +} - RETURN(-ENOMEM); - } - } +static void +__spl_magazine_destroy(void *data) +{ + spl_kmem_cache_t *skc = data; + spl_kmem_magazine_t *skm = skc->skc_mag[smp_processor_id()]; - RETURN(0); + (void)spl_cache_flush(skc, skm, skm->skm_avail); + spl_magazine_free(skm); } +/* + * Destroy all pre-cpu magazines. + */ static void spl_magazine_destroy(spl_kmem_cache_t *skc) { - spl_kmem_magazine_t *skm; - int i; ENTRY; - - for_each_online_cpu(i) { - skm = skc->skc_mag[i]; - (void)spl_cache_flush(skc, skm, skm->skm_avail); - spl_magazine_free(skm); - } - + on_each_cpu(__spl_magazine_destroy, skc, 0, 1); EXIT; } +/* + * Create a object cache based on the following arguments: + * name cache name + * size cache object size + * align cache object alignment + * ctor cache object constructor + * dtor cache object destructor + * reclaim cache object reclaim + * priv cache private data for ctor/dtor/reclaim + * vmp unused must be NULL + * flags + * KMC_NOTOUCH Disable cache object aging (unsupported) + * KMC_NODEBUG Disable debugging (unsupported) + * KMC_NOMAGAZINE Disable magazine (unsupported) + * KMC_NOHASH Disable hashing (unsupported) + * KMC_QCACHE Disable qcache (unsupported) + * KMC_KMEM Force kmem backed cache + * KMC_VMEM Force vmem backed cache + * KMC_OFFSLAB Locate objects off the slab + */ spl_kmem_cache_t * spl_kmem_cache_create(char *name, size_t size, size_t align, spl_kmem_ctor_t ctor, @@ -908,6 +1014,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align, skc->skc_obj_size = size; skc->skc_obj_align = SPL_KMEM_CACHE_ALIGN; skc->skc_delay = SPL_KMEM_CACHE_DELAY; + atomic_set(&skc->skc_ref, 0); INIT_LIST_HEAD(&skc->skc_list); INIT_LIST_HEAD(&skc->skc_complete_list); @@ -947,6 +1054,9 @@ spl_kmem_cache_create(char *name, size_t size, size_t align, if (rc) GOTO(out, rc); + spl_init_delayed_work(&skc->skc_work, spl_cache_age, skc); + schedule_delayed_work(&skc->skc_work, 2 * skc->skc_delay * HZ); + down_write(&spl_kmem_cache_sem); list_add_tail(&skc->skc_list, &spl_kmem_cache_list); up_write(&spl_kmem_cache_sem); @@ -959,10 +1069,13 @@ out: } EXPORT_SYMBOL(spl_kmem_cache_create); +/* + * Destroy a cache and all objects assoicated with the cache. + */ void spl_kmem_cache_destroy(spl_kmem_cache_t *skc) { - spl_kmem_slab_t *sks, *m; + DECLARE_WAIT_QUEUE_HEAD(wq); ENTRY; ASSERT(skc->skc_magic == SKC_MAGIC); @@ -971,20 +1084,27 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc) list_del_init(&skc->skc_list); up_write(&spl_kmem_cache_sem); + /* Cancel any and wait for any pending delayed work */ + ASSERT(!test_and_set_bit(KMC_BIT_DESTROY, &skc->skc_flags)); + cancel_delayed_work(&skc->skc_work); + flush_scheduled_work(); + + /* Wait until all current callers complete, this is mainly + * to catch the case where a low memory situation triggers a + * cache reaping action which races with this destroy. */ + wait_event(wq, atomic_read(&skc->skc_ref) == 0); + spl_magazine_destroy(skc); + spl_slab_reclaim(skc, 1); spin_lock(&skc->skc_lock); /* Validate there are no objects in use and free all the * spl_kmem_slab_t, spl_kmem_obj_t, and object buffers. */ + ASSERT3U(skc->skc_slab_alloc, ==, 0); + ASSERT3U(skc->skc_obj_alloc, ==, 0); + ASSERT3U(skc->skc_slab_total, ==, 0); + ASSERT3U(skc->skc_obj_total, ==, 0); ASSERT(list_empty(&skc->skc_complete_list)); - ASSERT(skc->skc_slab_alloc == 0); - ASSERT(skc->skc_obj_alloc == 0); - - list_for_each_entry_safe(sks, m, &skc->skc_partial_list, sks_list) - spl_slab_free(sks); - - ASSERT(skc->skc_slab_total == 0); - ASSERT(skc->skc_obj_total == 0); kmem_free(skc->skc_name, skc->skc_name_size); spin_unlock(&skc->skc_lock); @@ -995,6 +1115,10 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc) } EXPORT_SYMBOL(spl_kmem_cache_destroy); +/* + * Allocate an object from a slab attached to the cache. This is used to + * repopulate the per-cpu magazine caches in batches when they run low. + */ static void * spl_cache_obj(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks) { @@ -1030,10 +1154,11 @@ spl_cache_obj(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks) return sko->sko_addr; } -/* No available objects create a new slab. Since this is an - * expensive operation we do it without holding the spinlock - * and only briefly aquire it when we link in the fully - * allocated and constructed slab. +/* + * No available objects on any slabsi, create a new slab. Since this + * is an expensive operation we do it without holding the spinlock and + * only briefly aquire it when we link in the fully allocated and + * constructed slab. */ static spl_kmem_slab_t * spl_cache_grow(spl_kmem_cache_t *skc, int flags) @@ -1042,34 +1167,42 @@ spl_cache_grow(spl_kmem_cache_t *skc, int flags) ENTRY; ASSERT(skc->skc_magic == SKC_MAGIC); + local_irq_enable(); + might_sleep(); - if (flags & __GFP_WAIT) { - flags |= __GFP_NOFAIL; - local_irq_enable(); - might_sleep(); - } - - sks = spl_slab_alloc(skc, flags); - if (sks == NULL) { - if (flags & __GFP_WAIT) - local_irq_disable(); - - RETURN(NULL); + /* + * Before allocating a new slab check if the slab is being reaped. + * If it is there is a good chance we can wait until it finishes + * and then use one of the newly freed but not aged-out slabs. + */ + if (test_bit(KMC_BIT_REAPING, &skc->skc_flags)) { + schedule(); + GOTO(out, sks= NULL); } - if (flags & __GFP_WAIT) - local_irq_disable(); + /* Allocate a new slab for the cache */ + sks = spl_slab_alloc(skc, flags | __GFP_NORETRY | __GFP_NOWARN); + if (sks == NULL) + GOTO(out, sks = NULL); - /* Link the new empty slab in to the end of skc_partial_list */ + /* Link the new empty slab in to the end of skc_partial_list. */ spin_lock(&skc->skc_lock); skc->skc_slab_total++; skc->skc_obj_total += sks->sks_objs; list_add_tail(&sks->sks_list, &skc->skc_partial_list); spin_unlock(&skc->skc_lock); +out: + local_irq_disable(); RETURN(sks); } +/* + * Refill a per-cpu magazine with objects from the slabs for this + * cache. Ideally the magazine can be repopulated using existing + * objects which have been released, however if we are unable to + * locate enough free objects new slabs of objects will be created. + */ static int spl_cache_refill(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flags) { @@ -1080,13 +1213,11 @@ spl_cache_refill(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flags) ASSERT(skc->skc_magic == SKC_MAGIC); ASSERT(skm->skm_magic == SKM_MAGIC); - /* XXX: Check for refill bouncing by age perhaps */ refill = MIN(skm->skm_refill, skm->skm_size - skm->skm_avail); - spin_lock(&skc->skc_lock); while (refill > 0) { - /* No slabs available we must grow the cache */ + /* No slabs available we may need to grow the cache */ if (list_empty(&skc->skc_partial_list)) { spin_unlock(&skc->skc_lock); @@ -1135,6 +1266,9 @@ out: RETURN(rc); } +/* + * Release an object back to the slab from which it came. + */ static void spl_cache_shrink(spl_kmem_cache_t *skc, void *obj) { @@ -1176,6 +1310,13 @@ spl_cache_shrink(spl_kmem_cache_t *skc, void *obj) EXIT; } +/* + * Release a batch of objects from a per-cpu magazine back to their + * respective slabs. This occurs when we exceed the magazine size, + * are under memory pressure, when the cache is idle, or during + * cache cleanup. The flush argument contains the number of entries + * to remove from the magazine. + */ static int spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush) { @@ -1185,12 +1326,17 @@ spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush) ASSERT(skc->skc_magic == SKC_MAGIC); ASSERT(skm->skm_magic == SKM_MAGIC); + /* + * XXX: Currently we simply return objects from the magazine to + * the slabs in fifo order. The ideal thing to do from a memory + * fragmentation standpoint is to cheaply determine the set of + * objects in the magazine which will result in the largest + * number of free slabs if released from the magazine. + */ spin_lock(&skc->skc_lock); - for (i = 0; i < count; i++) spl_cache_shrink(skc, skm->skm_objs[i]); -// __spl_slab_reclaim(skc); skm->skm_avail -= count; memmove(skm->skm_objs, &(skm->skm_objs[count]), sizeof(void *) * skm->skm_avail); @@ -1200,6 +1346,10 @@ spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush) RETURN(count); } +/* + * Allocate an object from the per-cpu magazine, or if the magazine + * is empty directly allocate from a slab and repopulate the magazine. + */ void * spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags) { @@ -1209,7 +1359,9 @@ spl_kmem_cache_alloc(spl_kmem_cache_t *skc, int flags) ENTRY; ASSERT(skc->skc_magic == SKC_MAGIC); - ASSERT(flags & KM_SLEEP); /* XXX: KM_NOSLEEP not yet supported */ + ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags)); + ASSERT(flags & KM_SLEEP); + atomic_inc(&skc->skc_ref); local_irq_save(irq_flags); restart: @@ -1225,8 +1377,7 @@ restart: if (likely(skm->skm_avail)) { /* Object available in CPU cache, use it */ obj = skm->skm_objs[--skm->skm_avail]; - if (!(skc->skc_flags & KMC_NOTOUCH)) - skm->skm_age = jiffies; + skm->skm_age = jiffies; } else { /* Per-CPU cache empty, directly allocate from * the slab and refill the per-CPU cache. */ @@ -1240,11 +1391,18 @@ restart: /* Pre-emptively migrate object to CPU L1 cache */ prefetchw(obj); + atomic_dec(&skc->skc_ref); RETURN(obj); } EXPORT_SYMBOL(spl_kmem_cache_alloc); +/* + * Free an object back to the local per-cpu magazine, there is no + * guarantee that this is the same magazine the object was originally + * allocated from. We may need to flush entire from the magazine + * back to the slabs to make space. + */ void spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj) { @@ -1253,6 +1411,8 @@ spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj) ENTRY; ASSERT(skc->skc_magic == SKC_MAGIC); + ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags)); + atomic_inc(&skc->skc_ref); local_irq_save(flags); /* Safe to update per-cpu structure without lock, but @@ -1270,62 +1430,87 @@ spl_kmem_cache_free(spl_kmem_cache_t *skc, void *obj) skm->skm_objs[skm->skm_avail++] = obj; local_irq_restore(flags); + atomic_dec(&skc->skc_ref); EXIT; } EXPORT_SYMBOL(spl_kmem_cache_free); +/* + * The generic shrinker function for all caches. Under linux a shrinker + * may not be tightly coupled with a slab cache. In fact linux always + * systematically trys calling all registered shrinker callbacks which + * report that they contain unused objects. Because of this we only + * register one shrinker function in the shim layer for all slab caches. + * We always attempt to shrink all caches when this generic shrinker + * is called. The shrinker should return the number of free objects + * in the cache when called with nr_to_scan == 0 but not attempt to + * free any objects. When nr_to_scan > 0 it is a request that nr_to_scan + * objects should be freed, because Solaris semantics are to free + * all available objects we may free more objects than requested. + */ static int spl_kmem_cache_generic_shrinker(int nr_to_scan, unsigned int gfp_mask) { spl_kmem_cache_t *skc; + int unused = 0; - /* Under linux a shrinker is not tightly coupled with a slab - * cache. In fact linux always systematically trys calling all - * registered shrinker callbacks until its target reclamation level - * is reached. Because of this we only register one shrinker - * function in the shim layer for all slab caches. And we always - * attempt to shrink all caches when this generic shrinker is called. - */ down_read(&spl_kmem_cache_sem); - - list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) - spl_kmem_cache_reap_now(skc); - + list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) { + if (nr_to_scan) + spl_kmem_cache_reap_now(skc); + + /* + * Presume everything alloc'ed in reclaimable, this ensures + * we are called again with nr_to_scan > 0 so can try and + * reclaim. The exact number is not important either so + * we forgo taking this already highly contented lock. + */ + unused += skc->skc_obj_alloc; + } up_read(&spl_kmem_cache_sem); - /* XXX: Under linux we should return the remaining number of - * entries in the cache. We should do this as well. - */ - return 1; + return (unused * sysctl_vfs_cache_pressure) / 100; } +/* + * Call the registered reclaim function for a cache. Depending on how + * many and which objects are released it may simply repopulate the + * local magazine which will then need to age-out. Objects which cannot + * fit in the magazine we will be released back to their slabs which will + * also need to age out before being release. This is all just best + * effort and we do not want to thrash creating and destroying slabs. + */ void spl_kmem_cache_reap_now(spl_kmem_cache_t *skc) { - spl_kmem_magazine_t *skm; - int i; ENTRY; ASSERT(skc->skc_magic == SKC_MAGIC); + ASSERT(!test_bit(KMC_BIT_DESTROY, &skc->skc_flags)); - if (skc->skc_reclaim) - skc->skc_reclaim(skc->skc_private); + /* Prevent concurrent cache reaping when contended */ + if (test_and_set_bit(KMC_BIT_REAPING, &skc->skc_flags)) { + EXIT; + return; + } - /* Ensure per-CPU caches which are idle gradually flush */ - for_each_online_cpu(i) { - skm = skc->skc_mag[i]; + atomic_inc(&skc->skc_ref); - if (time_after(jiffies, skm->skm_age + skc->skc_delay * HZ)) - (void)spl_cache_flush(skc, skm, skm->skm_refill); - } + if (skc->skc_reclaim) + skc->skc_reclaim(skc->skc_private); - spl_slab_reclaim(skc); + spl_slab_reclaim(skc, 0); + clear_bit(KMC_BIT_REAPING, &skc->skc_flags); + atomic_dec(&skc->skc_ref); EXIT; } EXPORT_SYMBOL(spl_kmem_cache_reap_now); +/* + * Reap all free slabs from all registered caches. + */ void spl_kmem_reap(void) { diff --git a/module/splat/splat-internal.h b/module/splat/splat-internal.h index 87c47b1..0fa177c 100644 --- a/module/splat/splat-internal.h +++ b/module/splat/splat-internal.h @@ -40,6 +40,7 @@ #include #include #include +#include #include #include diff --git a/module/splat/splat-kmem.c b/module/splat/splat-kmem.c index 9b96fce..c592e98 100644 --- a/module/splat/splat-kmem.c +++ b/module/splat/splat-kmem.c @@ -4,9 +4,9 @@ * Copyright (c) 2008 Lawrence Livermore National Security, LLC. * Produced at Lawrence Livermore National Laboratory * Written by: - * Brian Behlendorf , - * Herb Wartens , - * Jim Garlick + * Brian Behlendorf , + * Herb Wartens , + * Jim Garlick * UCRL-CODE-235197 * * This is free software; you can redistribute it and/or modify it @@ -47,30 +47,37 @@ #define SPLAT_KMEM_TEST4_DESC "Memory allocation test (vmem_zalloc)" #define SPLAT_KMEM_TEST5_ID 0x0105 -#define SPLAT_KMEM_TEST5_NAME "kmem_small" +#define SPLAT_KMEM_TEST5_NAME "slab_small" #define SPLAT_KMEM_TEST5_DESC "Slab ctor/dtor test (small)" #define SPLAT_KMEM_TEST6_ID 0x0106 -#define SPLAT_KMEM_TEST6_NAME "kmem_large" +#define SPLAT_KMEM_TEST6_NAME "slab_large" #define SPLAT_KMEM_TEST6_DESC "Slab ctor/dtor test (large)" #define SPLAT_KMEM_TEST7_ID 0x0107 -#define SPLAT_KMEM_TEST7_NAME "kmem_reap" -#define SPLAT_KMEM_TEST7_DESC "Slab reaping test" +#define SPLAT_KMEM_TEST7_NAME "slab_align" +#define SPLAT_KMEM_TEST7_DESC "Slab alignment test" #define SPLAT_KMEM_TEST8_ID 0x0108 -#define SPLAT_KMEM_TEST8_NAME "kmem_lock" -#define SPLAT_KMEM_TEST8_DESC "Slab locking test" +#define SPLAT_KMEM_TEST8_NAME "slab_reap" +#define SPLAT_KMEM_TEST8_DESC "Slab reaping test" #define SPLAT_KMEM_TEST9_ID 0x0109 -#define SPLAT_KMEM_TEST9_NAME "kmem_align" -#define SPLAT_KMEM_TEST9_DESC "Slab alignment test" +#define SPLAT_KMEM_TEST9_NAME "slab_age" +#define SPLAT_KMEM_TEST9_DESC "Slab aging test" + +#define SPLAT_KMEM_TEST10_ID 0x010a +#define SPLAT_KMEM_TEST10_NAME "slab_lock" +#define SPLAT_KMEM_TEST10_DESC "Slab locking test" + +#define SPLAT_KMEM_TEST11_ID 0x010b +#define SPLAT_KMEM_TEST11_NAME "slab_overcommit" +#define SPLAT_KMEM_TEST11_DESC "Slab memory overcommit test" #define SPLAT_KMEM_ALLOC_COUNT 10 #define SPLAT_VMEM_ALLOC_COUNT 10 -/* XXX - This test may fail under tight memory conditions */ static int splat_kmem_test1(struct file *file, void *arg) { @@ -96,8 +103,8 @@ splat_kmem_test1(struct file *file, void *arg) kmem_free(ptr[i], size); splat_vprint(file, SPLAT_KMEM_TEST1_NAME, - "%d byte allocations, %d/%d successful\n", - size, count, SPLAT_KMEM_ALLOC_COUNT); + "%d byte allocations, %d/%d successful\n", + size, count, SPLAT_KMEM_ALLOC_COUNT); if (count != SPLAT_KMEM_ALLOC_COUNT) rc = -ENOMEM; @@ -134,8 +141,8 @@ splat_kmem_test2(struct file *file, void *arg) for (j = 0; j < size; j++) { if (((char *)ptr[i])[j] != '\0') { splat_vprint(file, SPLAT_KMEM_TEST2_NAME, - "%d-byte allocation was " - "not zeroed\n", size); + "%d-byte allocation was " + "not zeroed\n", size); rc = -EFAULT; } } @@ -146,8 +153,8 @@ splat_kmem_test2(struct file *file, void *arg) kmem_free(ptr[i], size); splat_vprint(file, SPLAT_KMEM_TEST2_NAME, - "%d byte allocations, %d/%d successful\n", - size, count, SPLAT_KMEM_ALLOC_COUNT); + "%d byte allocations, %d/%d successful\n", + size, count, SPLAT_KMEM_ALLOC_COUNT); if (count != SPLAT_KMEM_ALLOC_COUNT) rc = -ENOMEM; @@ -180,8 +187,8 @@ splat_kmem_test3(struct file *file, void *arg) vmem_free(ptr[i], size); splat_vprint(file, SPLAT_KMEM_TEST3_NAME, - "%d byte allocations, %d/%d successful\n", - size, count, SPLAT_VMEM_ALLOC_COUNT); + "%d byte allocations, %d/%d successful\n", + size, count, SPLAT_VMEM_ALLOC_COUNT); if (count != SPLAT_VMEM_ALLOC_COUNT) rc = -ENOMEM; @@ -212,8 +219,8 @@ splat_kmem_test4(struct file *file, void *arg) for (j = 0; j < size; j++) { if (((char *)ptr[i])[j] != '\0') { splat_vprint(file, SPLAT_KMEM_TEST4_NAME, - "%d-byte allocation was " - "not zeroed\n", size); + "%d-byte allocation was " + "not zeroed\n", size); rc = -EFAULT; } } @@ -224,8 +231,8 @@ splat_kmem_test4(struct file *file, void *arg) vmem_free(ptr[i], size); splat_vprint(file, SPLAT_KMEM_TEST4_NAME, - "%d byte allocations, %d/%d successful\n", - size, count, SPLAT_VMEM_ALLOC_COUNT); + "%d byte allocations, %d/%d successful\n", + size, count, SPLAT_VMEM_ALLOC_COUNT); if (count != SPLAT_VMEM_ALLOC_COUNT) rc = -ENOMEM; @@ -237,8 +244,11 @@ splat_kmem_test4(struct file *file, void *arg) #define SPLAT_KMEM_TEST_MAGIC 0x004488CCUL #define SPLAT_KMEM_CACHE_NAME "kmem_test" -#define SPLAT_KMEM_OBJ_COUNT 128 -#define SPLAT_KMEM_OBJ_RECLAIM 16 +#define SPLAT_KMEM_OBJ_COUNT 1024 +#define SPLAT_KMEM_OBJ_RECLAIM 20 /* percent */ +#define SPLAT_KMEM_THREADS 32 + +#define KCP_FLAG_READY 0x01 typedef struct kmem_cache_data { unsigned long kcd_magic; @@ -246,21 +256,95 @@ typedef struct kmem_cache_data { char kcd_buf[0]; } kmem_cache_data_t; +typedef struct kmem_cache_thread { + kmem_cache_t *kct_cache; + spinlock_t kct_lock; + int kct_id; + int kct_kcd_count; + kmem_cache_data_t *kct_kcd[0]; +} kmem_cache_thread_t; + typedef struct kmem_cache_priv { unsigned long kcp_magic; struct file *kcp_file; kmem_cache_t *kcp_cache; - kmem_cache_data_t *kcp_kcd[SPLAT_KMEM_OBJ_COUNT]; spinlock_t kcp_lock; - wait_queue_head_t kcp_waitq; + wait_queue_head_t kcp_ctl_waitq; + wait_queue_head_t kcp_thr_waitq; + int kcp_flags; + int kcp_kct_count; + kmem_cache_thread_t *kcp_kct[SPLAT_KMEM_THREADS]; int kcp_size; int kcp_align; int kcp_count; - int kcp_threads; int kcp_alloc; int kcp_rc; + int kcp_kcd_count; + kmem_cache_data_t *kcp_kcd[0]; } kmem_cache_priv_t; +static kmem_cache_priv_t * +splat_kmem_cache_test_kcp_alloc(struct file *file, char *name, + int size, int align, int alloc, int count) +{ + kmem_cache_priv_t *kcp; + + kcp = vmem_zalloc(sizeof(kmem_cache_priv_t) + + count * sizeof(kmem_cache_data_t *), KM_SLEEP); + if (!kcp) + return NULL; + + kcp->kcp_magic = SPLAT_KMEM_TEST_MAGIC; + kcp->kcp_file = file; + kcp->kcp_cache = NULL; + spin_lock_init(&kcp->kcp_lock); + init_waitqueue_head(&kcp->kcp_ctl_waitq); + init_waitqueue_head(&kcp->kcp_thr_waitq); + kcp->kcp_flags = 0; + kcp->kcp_kct_count = -1; + kcp->kcp_size = size; + kcp->kcp_align = align; + kcp->kcp_count = 0; + kcp->kcp_alloc = alloc; + kcp->kcp_rc = 0; + kcp->kcp_kcd_count = count; + + return kcp; +} + +static void +splat_kmem_cache_test_kcp_free(kmem_cache_priv_t *kcp) +{ + vmem_free(kcp, sizeof(kmem_cache_priv_t) + + kcp->kcp_kcd_count * sizeof(kmem_cache_data_t *)); +} + +static kmem_cache_thread_t * +splat_kmem_cache_test_kct_alloc(int id, int count) +{ + kmem_cache_thread_t *kct; + + ASSERTF(id < SPLAT_KMEM_THREADS, "id=%d\n", id); + kct = vmem_zalloc(sizeof(kmem_cache_thread_t) + + count * sizeof(kmem_cache_data_t *), KM_SLEEP); + if (!kct) + return NULL; + + spin_lock_init(&kct->kct_lock); + kct->kct_cache = NULL; + kct->kct_id = id; + kct->kct_kcd_count = count; + + return kct; +} + +static void +splat_kmem_cache_test_kct_free(kmem_cache_thread_t *kct) +{ + vmem_free(kct, sizeof(kmem_cache_thread_t) + + kct->kct_kcd_count * sizeof(kmem_cache_data_t *)); +} + static int splat_kmem_cache_test_constructor(void *ptr, void *priv, int flags) { @@ -293,83 +377,340 @@ splat_kmem_cache_test_destructor(void *ptr, void *priv) return; } +/* + * Generic reclaim function which assumes that all objects may + * be reclaimed at any time. We free a small percentage of the + * objects linked off the kcp or kct[] every time we are called. + */ +static void +splat_kmem_cache_test_reclaim(void *priv) +{ + kmem_cache_priv_t *kcp = (kmem_cache_priv_t *)priv; + kmem_cache_thread_t *kct; + int i, j, count; + + ASSERT(kcp->kcp_magic == SPLAT_KMEM_TEST_MAGIC); + count = kcp->kcp_kcd_count * SPLAT_KMEM_OBJ_RECLAIM / 100; + + /* Objects directly attached to the kcp */ + spin_lock(&kcp->kcp_lock); + for (i = 0; i < kcp->kcp_kcd_count; i++) { + if (kcp->kcp_kcd[i]) { + kmem_cache_free(kcp->kcp_cache, kcp->kcp_kcd[i]); + kcp->kcp_kcd[i] = NULL; + + if ((--count) == 0) + break; + } + } + spin_unlock(&kcp->kcp_lock); + + /* No threads containing objects to consider */ + if (kcp->kcp_kct_count == -1) + return; + + /* Objects attached to a kct thread */ + for (i = 0; i < kcp->kcp_kct_count; i++) { + spin_lock(&kcp->kcp_lock); + kct = kcp->kcp_kct[i]; + spin_unlock(&kcp->kcp_lock); + if (!kct) + continue; + + spin_lock(&kct->kct_lock); + count = kct->kct_kcd_count * SPLAT_KMEM_OBJ_RECLAIM / 100; + + for (j = 0; j < kct->kct_kcd_count; j++) { + if (kct->kct_kcd[j]) { + kmem_cache_free(kcp->kcp_cache,kct->kct_kcd[j]); + kct->kct_kcd[j] = NULL; + + if ((--count) == 0) + break; + } + } + spin_unlock(&kct->kct_lock); + } + + return; +} + +static int +splat_kmem_cache_test_threads(kmem_cache_priv_t *kcp, int threads) +{ + int rc; + + spin_lock(&kcp->kcp_lock); + rc = (kcp->kcp_kct_count == threads); + spin_unlock(&kcp->kcp_lock); + + return rc; +} + +static int +splat_kmem_cache_test_flags(kmem_cache_priv_t *kcp, int flags) +{ + int rc; + + spin_lock(&kcp->kcp_lock); + rc = (kcp->kcp_flags & flags); + spin_unlock(&kcp->kcp_lock); + + return rc; +} + +static void +splat_kmem_cache_test_thread(void *arg) +{ + kmem_cache_priv_t *kcp = (kmem_cache_priv_t *)arg; + kmem_cache_thread_t *kct; + int rc = 0, id, i; + void *obj; + + ASSERT(kcp->kcp_magic == SPLAT_KMEM_TEST_MAGIC); + + /* Assign thread ids */ + spin_lock(&kcp->kcp_lock); + if (kcp->kcp_kct_count == -1) + kcp->kcp_kct_count = 0; + + id = kcp->kcp_kct_count; + kcp->kcp_kct_count++; + spin_unlock(&kcp->kcp_lock); + + kct = splat_kmem_cache_test_kct_alloc(id, kcp->kcp_alloc); + if (!kct) { + rc = -ENOMEM; + goto out; + } + + spin_lock(&kcp->kcp_lock); + kcp->kcp_kct[id] = kct; + spin_unlock(&kcp->kcp_lock); + + /* Wait for all threads to have started and report they are ready */ + if (kcp->kcp_kct_count == SPLAT_KMEM_THREADS) + wake_up(&kcp->kcp_ctl_waitq); + + wait_event(kcp->kcp_thr_waitq, + splat_kmem_cache_test_flags(kcp, KCP_FLAG_READY)); + + /* + * Updates to kct->kct_kcd[] are performed under a spin_lock so + * they may safely run concurrent with the reclaim function. If + * we are not in a low memory situation we have one lock per- + * thread so they are not expected to be contended. + */ + for (i = 0; i < kct->kct_kcd_count; i++) { + obj = kmem_cache_alloc(kcp->kcp_cache, KM_SLEEP); + spin_lock(&kct->kct_lock); + kct->kct_kcd[i] = obj; + spin_unlock(&kct->kct_lock); + } + + for (i = 0; i < kct->kct_kcd_count; i++) { + spin_lock(&kct->kct_lock); + if (kct->kct_kcd[i]) { + kmem_cache_free(kcp->kcp_cache, kct->kct_kcd[i]); + kct->kct_kcd[i] = NULL; + } + spin_unlock(&kct->kct_lock); + } +out: + spin_lock(&kcp->kcp_lock); + if (kct) { + splat_kmem_cache_test_kct_free(kct); + kcp->kcp_kct[id] = kct = NULL; + } + + if (!kcp->kcp_rc) + kcp->kcp_rc = rc; + + if ((--kcp->kcp_kct_count) == 0) + wake_up(&kcp->kcp_ctl_waitq); + + spin_unlock(&kcp->kcp_lock); + + thread_exit(); +} + static int splat_kmem_cache_test(struct file *file, void *arg, char *name, - int size, int align, int flags) + int size, int align, int flags) { - kmem_cache_t *cache = NULL; - kmem_cache_data_t *kcd = NULL; - kmem_cache_priv_t kcp; + kmem_cache_priv_t *kcp; + kmem_cache_data_t *kcd; int rc = 0, max; - kcp.kcp_magic = SPLAT_KMEM_TEST_MAGIC; - kcp.kcp_file = file; - kcp.kcp_size = size; - kcp.kcp_align = align; - kcp.kcp_count = 0; - kcp.kcp_rc = 0; - - cache = kmem_cache_create(SPLAT_KMEM_CACHE_NAME, - kcp.kcp_size, kcp.kcp_align, - splat_kmem_cache_test_constructor, - splat_kmem_cache_test_destructor, - NULL, &kcp, NULL, flags); - if (!cache) { + kcp = splat_kmem_cache_test_kcp_alloc(file, name, size, align, 0, 1); + if (!kcp) { + splat_vprint(file, name, "Unable to create '%s'\n", "kcp"); + return -ENOMEM; + } + + kcp->kcp_cache = + kmem_cache_create(SPLAT_KMEM_CACHE_NAME, + kcp->kcp_size, kcp->kcp_align, + splat_kmem_cache_test_constructor, + splat_kmem_cache_test_destructor, + NULL, kcp, NULL, flags); + if (!kcp->kcp_cache) { splat_vprint(file, name, - "Unable to create '%s'\n", + "Unable to create '%s'\n", SPLAT_KMEM_CACHE_NAME); - return -ENOMEM; + rc = -ENOMEM; + goto out_free; } - kcd = kmem_cache_alloc(cache, KM_SLEEP); + kcd = kmem_cache_alloc(kcp->kcp_cache, KM_SLEEP); if (!kcd) { splat_vprint(file, name, - "Unable to allocate from '%s'\n", - SPLAT_KMEM_CACHE_NAME); + "Unable to allocate from '%s'\n", + SPLAT_KMEM_CACHE_NAME); rc = -EINVAL; goto out_free; } + spin_lock(&kcp->kcp_lock); + kcp->kcp_kcd[0] = kcd; + spin_unlock(&kcp->kcp_lock); - if (!kcd->kcd_flag) { + if (!kcp->kcp_kcd[0]->kcd_flag) { splat_vprint(file, name, - "Failed to run contructor for '%s'\n", - SPLAT_KMEM_CACHE_NAME); + "Failed to run contructor for '%s'\n", + SPLAT_KMEM_CACHE_NAME); rc = -EINVAL; goto out_free; } - if (kcd->kcd_magic != kcp.kcp_magic) { + if (kcp->kcp_kcd[0]->kcd_magic != kcp->kcp_magic) { splat_vprint(file, name, - "Failed to pass private data to constructor " - "for '%s'\n", SPLAT_KMEM_CACHE_NAME); + "Failed to pass private data to constructor " + "for '%s'\n", SPLAT_KMEM_CACHE_NAME); rc = -EINVAL; goto out_free; } - max = kcp.kcp_count; - kmem_cache_free(cache, kcd); + max = kcp->kcp_count; + spin_lock(&kcp->kcp_lock); + kmem_cache_free(kcp->kcp_cache, kcp->kcp_kcd[0]); + kcp->kcp_kcd[0] = NULL; + spin_unlock(&kcp->kcp_lock); /* Destroy the entire cache which will force destructors to * run and we can verify one was called for every object */ - kmem_cache_destroy(cache); - if (kcp.kcp_count) { + kmem_cache_destroy(kcp->kcp_cache); + if (kcp->kcp_count) { splat_vprint(file, name, - "Failed to run destructor on all slab objects " - "for '%s'\n", SPLAT_KMEM_CACHE_NAME); + "Failed to run destructor on all slab objects " + "for '%s'\n", SPLAT_KMEM_CACHE_NAME); rc = -EINVAL; } splat_vprint(file, name, - "Successfully ran ctors/dtors for %d elements in '%s'\n", - max, SPLAT_KMEM_CACHE_NAME); + "Successfully ran ctors/dtors for %d elements in '%s'\n", + max, SPLAT_KMEM_CACHE_NAME); return rc; out_free: - if (kcd) - kmem_cache_free(cache, kcd); + if (kcp->kcp_kcd[0]) { + spin_lock(&kcp->kcp_lock); + kmem_cache_free(kcp->kcp_cache, kcp->kcp_kcd[0]); + kcp->kcp_kcd[0] = NULL; + spin_unlock(&kcp->kcp_lock); + } + + if (kcp->kcp_cache) + kmem_cache_destroy(kcp->kcp_cache); + + splat_kmem_cache_test_kcp_free(kcp); + + return rc; +} + +static int +splat_kmem_cache_thread_test(struct file *file, void *arg, char *name, + int size, int alloc) +{ + kmem_cache_priv_t *kcp; + kthread_t *thr; + struct timespec start, stop, delta; + char cache_name[32]; + int i, rc = 0; + + kcp = splat_kmem_cache_test_kcp_alloc(file, name, size, 0, alloc, 0); + if (!kcp) { + splat_vprint(file, name, "Unable to create '%s'\n", "kcp"); + return -ENOMEM; + } + + (void)snprintf(cache_name, 32, "%s-%d-%d", + SPLAT_KMEM_CACHE_NAME, size, alloc); + kcp->kcp_cache = + kmem_cache_create(cache_name, kcp->kcp_size, 0, + splat_kmem_cache_test_constructor, + splat_kmem_cache_test_destructor, + splat_kmem_cache_test_reclaim, + kcp, NULL, KMC_VMEM); + if (!kcp->kcp_cache) { + splat_vprint(file, name, "Unable to create '%s'\n", cache_name); + rc = -ENOMEM; + goto out_kcp; + } + + start = current_kernel_time(); + + for (i = 0; i < SPLAT_KMEM_THREADS; i++) { + thr = thread_create(NULL, 0, + splat_kmem_cache_test_thread, + kcp, 0, &p0, TS_RUN, minclsyspri); + if (thr == NULL) { + rc = -ESRCH; + goto out_cache; + } + } + + /* Sleep until all threads have started, then set the ready + * flag and wake them all up for maximum concurrency. */ + wait_event(kcp->kcp_ctl_waitq, + splat_kmem_cache_test_threads(kcp, SPLAT_KMEM_THREADS)); + + spin_lock(&kcp->kcp_lock); + kcp->kcp_flags |= KCP_FLAG_READY; + spin_unlock(&kcp->kcp_lock); + wake_up_all(&kcp->kcp_thr_waitq); + + /* Sleep until all thread have finished */ + wait_event(kcp->kcp_ctl_waitq, splat_kmem_cache_test_threads(kcp, 0)); + + stop = current_kernel_time(); + delta = timespec_sub(stop, start); - kmem_cache_destroy(cache); + splat_vprint(file, name, + "%-22s %2ld.%09ld\t" + "%lu/%lu/%lu\t%lu/%lu/%lu\n", + kcp->kcp_cache->skc_name, + delta.tv_sec, delta.tv_nsec, + (unsigned long)kcp->kcp_cache->skc_slab_total, + (unsigned long)kcp->kcp_cache->skc_slab_max, + (unsigned long)(kcp->kcp_alloc * + SPLAT_KMEM_THREADS / + SPL_KMEM_CACHE_OBJ_PER_SLAB), + (unsigned long)kcp->kcp_cache->skc_obj_total, + (unsigned long)kcp->kcp_cache->skc_obj_max, + (unsigned long)(kcp->kcp_alloc * + SPLAT_KMEM_THREADS)); + + if (delta.tv_sec >= 5) + rc = -ETIME; + + if (!rc && kcp->kcp_rc) + rc = kcp->kcp_rc; + +out_cache: + kmem_cache_destroy(kcp->kcp_cache); +out_kcp: + splat_kmem_cache_test_kcp_free(kcp); return rc; } @@ -409,291 +750,279 @@ splat_kmem_test6(struct file *file, void *arg) return splat_kmem_cache_test(file, arg, name, 128*1028, 0, KMC_VMEM); } -static void -splat_kmem_cache_test_reclaim(void *priv) +/* Validate object alignment cache behavior for caches */ +static int +splat_kmem_test7(struct file *file, void *arg) { - kmem_cache_priv_t *kcp = (kmem_cache_priv_t *)priv; - int i, count; - - count = min(SPLAT_KMEM_OBJ_RECLAIM, kcp->kcp_count); - splat_vprint(kcp->kcp_file, SPLAT_KMEM_TEST7_NAME, - "Reaping %d objects from '%s'\n", count, - SPLAT_KMEM_CACHE_NAME); - - for (i = 0; i < SPLAT_KMEM_OBJ_COUNT; i++) { - if (kcp->kcp_kcd[i]) { - kmem_cache_free(kcp->kcp_cache, kcp->kcp_kcd[i]); - kcp->kcp_kcd[i] = NULL; + char *name = SPLAT_KMEM_TEST7_NAME; + int i, rc; - if (--count == 0) - break; - } + for (i = 8; i <= PAGE_SIZE; i *= 2) { + rc = splat_kmem_cache_test(file, arg, name, 157, i, 0); + if (rc) + return rc; } - return; + return rc; } static int -splat_kmem_test7(struct file *file, void *arg) +splat_kmem_test8(struct file *file, void *arg) { - kmem_cache_t *cache; - kmem_cache_priv_t kcp; - int i, rc = 0; - - kcp.kcp_magic = SPLAT_KMEM_TEST_MAGIC; - kcp.kcp_file = file; - kcp.kcp_size = 256; - kcp.kcp_count = 0; - kcp.kcp_rc = 0; - - cache = kmem_cache_create(SPLAT_KMEM_CACHE_NAME, kcp.kcp_size, 0, - splat_kmem_cache_test_constructor, - splat_kmem_cache_test_destructor, - splat_kmem_cache_test_reclaim, - &kcp, NULL, 0); - if (!cache) { - splat_vprint(file, SPLAT_KMEM_TEST7_NAME, - "Unable to create '%s'\n", SPLAT_KMEM_CACHE_NAME); + kmem_cache_priv_t *kcp; + kmem_cache_data_t *kcd; + int i, j, rc = 0; + + kcp = splat_kmem_cache_test_kcp_alloc(file, SPLAT_KMEM_TEST8_NAME, + 256, 0, 0, SPLAT_KMEM_OBJ_COUNT); + if (!kcp) { + splat_vprint(file, SPLAT_KMEM_TEST8_NAME, + "Unable to create '%s'\n", "kcp"); return -ENOMEM; } - kcp.kcp_cache = cache; + kcp->kcp_cache = + kmem_cache_create(SPLAT_KMEM_CACHE_NAME, kcp->kcp_size, 0, + splat_kmem_cache_test_constructor, + splat_kmem_cache_test_destructor, + splat_kmem_cache_test_reclaim, + kcp, NULL, 0); + if (!kcp->kcp_cache) { + splat_kmem_cache_test_kcp_free(kcp); + splat_vprint(file, SPLAT_KMEM_TEST8_NAME, + "Unable to create '%s'\n", SPLAT_KMEM_CACHE_NAME); + return -ENOMEM; + } for (i = 0; i < SPLAT_KMEM_OBJ_COUNT; i++) { - /* All allocations need not succeed */ - kcp.kcp_kcd[i] = kmem_cache_alloc(cache, KM_SLEEP); - if (!kcp.kcp_kcd[i]) { - splat_vprint(file, SPLAT_KMEM_TEST7_NAME, - "Unable to allocate from '%s'\n", - SPLAT_KMEM_CACHE_NAME); + kcd = kmem_cache_alloc(kcp->kcp_cache, KM_SLEEP); + spin_lock(&kcp->kcp_lock); + kcp->kcp_kcd[i] = kcd; + spin_unlock(&kcp->kcp_lock); + if (!kcd) { + splat_vprint(file, SPLAT_KMEM_TEST8_NAME, + "Unable to allocate from '%s'\n", + SPLAT_KMEM_CACHE_NAME); } } - ASSERT(kcp.kcp_count > 0); - /* Request the slab cache free any objects it can. For a few reasons * this may not immediately result in more free memory even if objects * are freed. First off, due to fragmentation we may not be able to * reclaim any slabs. Secondly, even if we do we fully clear some * slabs we will not want to immedately reclaim all of them because * we may contend with cache allocs and thrash. What we want to see - * is slab size decrease more gradually as it becomes clear they + * is the slab size decrease more gradually as it becomes clear they * will not be needed. This should be acheivable in less than minute * if it takes longer than this something has gone wrong. */ for (i = 0; i < 60; i++) { - kmem_cache_reap_now(cache); - splat_vprint(file, SPLAT_KMEM_TEST7_NAME, - "%s cache objects %d, slabs %u/%u objs %u/%u\n", - SPLAT_KMEM_CACHE_NAME, kcp.kcp_count, - (unsigned)cache->skc_slab_alloc, - (unsigned)cache->skc_slab_total, - (unsigned)cache->skc_obj_alloc, - (unsigned)cache->skc_obj_total); - - if (cache->skc_obj_total == 0) + kmem_cache_reap_now(kcp->kcp_cache); + splat_vprint(file, SPLAT_KMEM_TEST8_NAME, + "%s cache objects %d, slabs %u/%u objs %u/%u mags ", + SPLAT_KMEM_CACHE_NAME, kcp->kcp_count, + (unsigned)kcp->kcp_cache->skc_slab_alloc, + (unsigned)kcp->kcp_cache->skc_slab_total, + (unsigned)kcp->kcp_cache->skc_obj_alloc, + (unsigned)kcp->kcp_cache->skc_obj_total); + + for_each_online_cpu(j) + splat_print(file, "%u/%u ", + kcp->kcp_cache->skc_mag[j]->skm_avail, + kcp->kcp_cache->skc_mag[j]->skm_size); + + splat_print(file, "%s\n", ""); + + if (kcp->kcp_cache->skc_obj_total == 0) break; set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(HZ); } - if (cache->skc_obj_total == 0) { - splat_vprint(file, SPLAT_KMEM_TEST7_NAME, + if (kcp->kcp_cache->skc_obj_total == 0) { + splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "Successfully created %d objects " "in cache %s and reclaimed them\n", - SPLAT_KMEM_OBJ_COUNT, SPLAT_KMEM_CACHE_NAME); + SPLAT_KMEM_OBJ_COUNT, SPLAT_KMEM_CACHE_NAME); } else { - splat_vprint(file, SPLAT_KMEM_TEST7_NAME, + splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "Failed to reclaim %u/%d objects from cache %s\n", - (unsigned)cache->skc_obj_total, SPLAT_KMEM_OBJ_COUNT, - SPLAT_KMEM_CACHE_NAME); + (unsigned)kcp->kcp_cache->skc_obj_total, + SPLAT_KMEM_OBJ_COUNT, SPLAT_KMEM_CACHE_NAME); rc = -ENOMEM; } /* Cleanup our mess (for failure case of time expiring) */ + spin_lock(&kcp->kcp_lock); for (i = 0; i < SPLAT_KMEM_OBJ_COUNT; i++) - if (kcp.kcp_kcd[i]) - kmem_cache_free(cache, kcp.kcp_kcd[i]); + if (kcp->kcp_kcd[i]) + kmem_cache_free(kcp->kcp_cache, kcp->kcp_kcd[i]); + spin_unlock(&kcp->kcp_lock); - kmem_cache_destroy(cache); + kmem_cache_destroy(kcp->kcp_cache); + splat_kmem_cache_test_kcp_free(kcp); return rc; } -static void -splat_kmem_test8_thread(void *arg) +static int +splat_kmem_test9(struct file *file, void *arg) { - kmem_cache_priv_t *kcp = (kmem_cache_priv_t *)arg; - int count = kcp->kcp_alloc, rc = 0, i; - void **objs; - - ASSERT(kcp->kcp_magic == SPLAT_KMEM_TEST_MAGIC); + kmem_cache_priv_t *kcp; + kmem_cache_data_t *kcd; + int i, j, rc = 0, count = SPLAT_KMEM_OBJ_COUNT * 128; + + kcp = splat_kmem_cache_test_kcp_alloc(file, SPLAT_KMEM_TEST9_NAME, + 256, 0, 0, count); + if (!kcp) { + splat_vprint(file, SPLAT_KMEM_TEST9_NAME, + "Unable to create '%s'\n", "kcp"); + return -ENOMEM; + } - objs = vmem_zalloc(count * sizeof(void *), KM_SLEEP); - if (!objs) { - splat_vprint(kcp->kcp_file, SPLAT_KMEM_TEST8_NAME, - "Unable to alloc objp array for cache '%s'\n", - kcp->kcp_cache->skc_name); - rc = -ENOMEM; - goto out; + kcp->kcp_cache = + kmem_cache_create(SPLAT_KMEM_CACHE_NAME, kcp->kcp_size, 0, + splat_kmem_cache_test_constructor, + splat_kmem_cache_test_destructor, + NULL, kcp, NULL, 0); + if (!kcp->kcp_cache) { + splat_kmem_cache_test_kcp_free(kcp); + splat_vprint(file, SPLAT_KMEM_TEST9_NAME, + "Unable to create '%s'\n", SPLAT_KMEM_CACHE_NAME); + return -ENOMEM; } for (i = 0; i < count; i++) { - objs[i] = kmem_cache_alloc(kcp->kcp_cache, KM_SLEEP); - if (!objs[i]) { - splat_vprint(kcp->kcp_file, SPLAT_KMEM_TEST8_NAME, - "Unable to allocate from cache '%s'\n", - kcp->kcp_cache->skc_name); - rc = -ENOMEM; - break; + kcd = kmem_cache_alloc(kcp->kcp_cache, KM_SLEEP); + spin_lock(&kcp->kcp_lock); + kcp->kcp_kcd[i] = kcd; + spin_unlock(&kcp->kcp_lock); + if (!kcd) { + splat_vprint(file, SPLAT_KMEM_TEST9_NAME, + "Unable to allocate from '%s'\n", + SPLAT_KMEM_CACHE_NAME); } } - for (i = 0; i < count; i++) - if (objs[i]) - kmem_cache_free(kcp->kcp_cache, objs[i]); - - vmem_free(objs, count * sizeof(void *)); -out: spin_lock(&kcp->kcp_lock); - if (!kcp->kcp_rc) - kcp->kcp_rc = rc; - - if (--kcp->kcp_threads == 0) - wake_up(&kcp->kcp_waitq); - + for (i = 0; i < count; i++) + if (kcp->kcp_kcd[i]) + kmem_cache_free(kcp->kcp_cache, kcp->kcp_kcd[i]); spin_unlock(&kcp->kcp_lock); - thread_exit(); -} + /* We have allocated a large number of objects thus creating a + * large number of slabs and then free'd them all. However since + * there should be little memory pressure at the moment those + * slabs have not been freed. What we want to see is the slab + * size decrease gradually as it becomes clear they will not be + * be needed. This should be acheivable in less than minute + * if it takes longer than this something has gone wrong. + */ + for (i = 0; i < 60; i++) { + splat_vprint(file, SPLAT_KMEM_TEST9_NAME, + "%s cache objects %d, slabs %u/%u objs %u/%u mags ", + SPLAT_KMEM_CACHE_NAME, kcp->kcp_count, + (unsigned)kcp->kcp_cache->skc_slab_alloc, + (unsigned)kcp->kcp_cache->skc_slab_total, + (unsigned)kcp->kcp_cache->skc_obj_alloc, + (unsigned)kcp->kcp_cache->skc_obj_total); + + for_each_online_cpu(j) + splat_print(file, "%u/%u ", + kcp->kcp_cache->skc_mag[j]->skm_avail, + kcp->kcp_cache->skc_mag[j]->skm_size); + + splat_print(file, "%s\n", ""); + + if (kcp->kcp_cache->skc_obj_total == 0) + break; -static int -splat_kmem_test8_count(kmem_cache_priv_t *kcp, int threads) -{ - int ret; + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(HZ); + } - spin_lock(&kcp->kcp_lock); - ret = (kcp->kcp_threads == threads); - spin_unlock(&kcp->kcp_lock); + if (kcp->kcp_cache->skc_obj_total == 0) { + splat_vprint(file, SPLAT_KMEM_TEST9_NAME, + "Successfully created %d objects " + "in cache %s and reclaimed them\n", + count, SPLAT_KMEM_CACHE_NAME); + } else { + splat_vprint(file, SPLAT_KMEM_TEST9_NAME, + "Failed to reclaim %u/%d objects from cache %s\n", + (unsigned)kcp->kcp_cache->skc_obj_total, count, + SPLAT_KMEM_CACHE_NAME); + rc = -ENOMEM; + } + + kmem_cache_destroy(kcp->kcp_cache); + splat_kmem_cache_test_kcp_free(kcp); - return ret; + return rc; } -/* This test will always pass and is simply here so I can easily - * eyeball the slab cache locking overhead to ensure it is reasonable. +/* + * This test creates N threads with a shared kmem cache. They then all + * concurrently allocate and free from the cache to stress the locking and + * concurrent cache performance. If any one test takes longer than 5 + * seconds to complete it is treated as a failure and may indicate a + * performance regression. On my test system no one test takes more + * than 1 second to complete so a 5x slowdown likely a problem. */ static int -splat_kmem_test8_sc(struct file *file, void *arg, int size, int count) +splat_kmem_test10(struct file *file, void *arg) { - kmem_cache_priv_t kcp; - kthread_t *thr; - struct timespec start, stop, delta; - char cache_name[32]; - int i, j, rc = 0, threads = 32; - - kcp.kcp_magic = SPLAT_KMEM_TEST_MAGIC; - kcp.kcp_file = file; - - splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%-22s %s", "name", - "time (sec)\tslabs \tobjs \thash\n"); - splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%-22s %s", "", - " \ttot/max/calc\ttot/max/calc\n"); - - for (i = 1; i <= count; i *= 2) { - kcp.kcp_size = size; - kcp.kcp_count = 0; - kcp.kcp_threads = 0; - kcp.kcp_alloc = i; - kcp.kcp_rc = 0; - spin_lock_init(&kcp.kcp_lock); - init_waitqueue_head(&kcp.kcp_waitq); - - (void)snprintf(cache_name, 32, "%s-%d-%d", - SPLAT_KMEM_CACHE_NAME, size, i); - kcp.kcp_cache = kmem_cache_create(cache_name, kcp.kcp_size, 0, - splat_kmem_cache_test_constructor, - splat_kmem_cache_test_destructor, - NULL, &kcp, NULL, 0); - if (!kcp.kcp_cache) { - splat_vprint(file, SPLAT_KMEM_TEST8_NAME, - "Unable to create '%s' cache\n", - SPLAT_KMEM_CACHE_NAME); - rc = -ENOMEM; - break; - } - - start = current_kernel_time(); - - for (j = 0; j < threads; j++) { - thr = thread_create(NULL, 0, splat_kmem_test8_thread, - &kcp, 0, &p0, TS_RUN, minclsyspri); - if (thr == NULL) { - rc = -ESRCH; - break; - } - spin_lock(&kcp.kcp_lock); - kcp.kcp_threads++; - spin_unlock(&kcp.kcp_lock); - } + uint64_t size, alloc, free_mem, rc = 0; - /* Sleep until the thread sets kcp.kcp_threads == 0 */ - wait_event(kcp.kcp_waitq, splat_kmem_test8_count(&kcp, 0)); - stop = current_kernel_time(); - delta = timespec_sub(stop, start); + free_mem = nr_free_pages() * PAGE_SIZE; + for (size = 16; size <= 1024*1024; size *= 2) { - splat_vprint(file, SPLAT_KMEM_TEST8_NAME, "%-22s %2ld.%09ld\t" - "%lu/%lu/%lu\t%lu/%lu/%lu\n", - kcp.kcp_cache->skc_name, - delta.tv_sec, delta.tv_nsec, - (unsigned long)kcp.kcp_cache->skc_slab_total, - (unsigned long)kcp.kcp_cache->skc_slab_max, - (unsigned long)(kcp.kcp_alloc * threads / - SPL_KMEM_CACHE_OBJ_PER_SLAB), - (unsigned long)kcp.kcp_cache->skc_obj_total, - (unsigned long)kcp.kcp_cache->skc_obj_max, - (unsigned long)(kcp.kcp_alloc * threads)); + splat_vprint(file, SPLAT_KMEM_TEST10_NAME, "%-22s %s", "name", + "time (sec)\tslabs \tobjs \thash\n"); + splat_vprint(file, SPLAT_KMEM_TEST10_NAME, "%-22s %s", "", + " \ttot/max/calc\ttot/max/calc\n"); - kmem_cache_destroy(kcp.kcp_cache); + for (alloc = 1; alloc <= 1024; alloc *= 2) { - if (!rc && kcp.kcp_rc) - rc = kcp.kcp_rc; + /* Skip tests which exceed free memory */ + if (size * alloc * SPLAT_KMEM_THREADS > free_mem / 2) + continue; - if (rc) - break; + rc = splat_kmem_cache_thread_test(file, arg, + SPLAT_KMEM_TEST10_NAME, size, alloc); + if (rc) + break; + } } return rc; } +/* + * This test creates N threads with a shared kmem cache which overcommits + * memory by 4x. This makes it impossible for the slab to satify the + * thread requirements without having its reclaim hook run which will + * free objects back for use. This behavior is triggered by the linum VM + * detecting a low memory condition on the node and invoking the shrinkers. + * This should allow all the threads to complete while avoiding deadlock + * and for the most part out of memory events. This is very tough on the + * system so it is possible the test app may get oom'ed. + */ static int -splat_kmem_test8(struct file *file, void *arg) +splat_kmem_test11(struct file *file, void *arg) { - int i, rc = 0; + uint64_t size, alloc, rc; - /* Run through slab cache with objects size from - * 16-1Mb in 4x multiples with 1024 objects each */ - for (i = 16; i <= 1024*1024; i *= 4) { - rc = splat_kmem_test8_sc(file, arg, i, 256); - if (rc) - break; - } - - return rc; -} + size = 1024*1024; + alloc = ((4 * num_physpages * PAGE_SIZE) / size) / SPLAT_KMEM_THREADS; -/* Validate object alignment cache behavior for caches */ -static int -splat_kmem_test9(struct file *file, void *arg) -{ - char *name = SPLAT_KMEM_TEST9_NAME; - int i, rc; + splat_vprint(file, SPLAT_KMEM_TEST10_NAME, "%-22s %s", "name", + "time (sec)\tslabs \tobjs \thash\n"); + splat_vprint(file, SPLAT_KMEM_TEST10_NAME, "%-22s %s", "", + " \ttot/max/calc\ttot/max/calc\n"); - for (i = 8; i <= PAGE_SIZE; i *= 2) { - rc = splat_kmem_cache_test(file, arg, name, 157, i, 0); - if (rc) - return rc; - } + rc = splat_kmem_cache_thread_test(file, arg, + SPLAT_KMEM_TEST11_NAME, size, alloc); return rc; } @@ -701,60 +1030,66 @@ splat_kmem_test9(struct file *file, void *arg) splat_subsystem_t * splat_kmem_init(void) { - splat_subsystem_t *sub; + splat_subsystem_t *sub; - sub = kmalloc(sizeof(*sub), GFP_KERNEL); - if (sub == NULL) - return NULL; + sub = kmalloc(sizeof(*sub), GFP_KERNEL); + if (sub == NULL) + return NULL; - memset(sub, 0, sizeof(*sub)); - strncpy(sub->desc.name, SPLAT_KMEM_NAME, SPLAT_NAME_SIZE); + memset(sub, 0, sizeof(*sub)); + strncpy(sub->desc.name, SPLAT_KMEM_NAME, SPLAT_NAME_SIZE); strncpy(sub->desc.desc, SPLAT_KMEM_DESC, SPLAT_DESC_SIZE); - INIT_LIST_HEAD(&sub->subsystem_list); + INIT_LIST_HEAD(&sub->subsystem_list); INIT_LIST_HEAD(&sub->test_list); - spin_lock_init(&sub->test_lock); - sub->desc.id = SPLAT_SUBSYSTEM_KMEM; - - SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST1_NAME, SPLAT_KMEM_TEST1_DESC, - SPLAT_KMEM_TEST1_ID, splat_kmem_test1); - SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST2_NAME, SPLAT_KMEM_TEST2_DESC, - SPLAT_KMEM_TEST2_ID, splat_kmem_test2); - SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST3_NAME, SPLAT_KMEM_TEST3_DESC, - SPLAT_KMEM_TEST3_ID, splat_kmem_test3); - SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST4_NAME, SPLAT_KMEM_TEST4_DESC, - SPLAT_KMEM_TEST4_ID, splat_kmem_test4); - SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST5_NAME, SPLAT_KMEM_TEST5_DESC, - SPLAT_KMEM_TEST5_ID, splat_kmem_test5); - SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST6_NAME, SPLAT_KMEM_TEST6_DESC, - SPLAT_KMEM_TEST6_ID, splat_kmem_test6); - SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST7_NAME, SPLAT_KMEM_TEST7_DESC, - SPLAT_KMEM_TEST7_ID, splat_kmem_test7); - SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST8_NAME, SPLAT_KMEM_TEST8_DESC, - SPLAT_KMEM_TEST8_ID, splat_kmem_test8); - SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST9_NAME, SPLAT_KMEM_TEST9_DESC, - SPLAT_KMEM_TEST9_ID, splat_kmem_test9); - - return sub; + spin_lock_init(&sub->test_lock); + sub->desc.id = SPLAT_SUBSYSTEM_KMEM; + + SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST1_NAME, SPLAT_KMEM_TEST1_DESC, + SPLAT_KMEM_TEST1_ID, splat_kmem_test1); + SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST2_NAME, SPLAT_KMEM_TEST2_DESC, + SPLAT_KMEM_TEST2_ID, splat_kmem_test2); + SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST3_NAME, SPLAT_KMEM_TEST3_DESC, + SPLAT_KMEM_TEST3_ID, splat_kmem_test3); + SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST4_NAME, SPLAT_KMEM_TEST4_DESC, + SPLAT_KMEM_TEST4_ID, splat_kmem_test4); + SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST5_NAME, SPLAT_KMEM_TEST5_DESC, + SPLAT_KMEM_TEST5_ID, splat_kmem_test5); + SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST6_NAME, SPLAT_KMEM_TEST6_DESC, + SPLAT_KMEM_TEST6_ID, splat_kmem_test6); + SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST7_NAME, SPLAT_KMEM_TEST7_DESC, + SPLAT_KMEM_TEST7_ID, splat_kmem_test7); + SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST8_NAME, SPLAT_KMEM_TEST8_DESC, + SPLAT_KMEM_TEST8_ID, splat_kmem_test8); + SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST9_NAME, SPLAT_KMEM_TEST9_DESC, + SPLAT_KMEM_TEST9_ID, splat_kmem_test9); + SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST10_NAME, SPLAT_KMEM_TEST10_DESC, + SPLAT_KMEM_TEST10_ID, splat_kmem_test10); + SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST11_NAME, SPLAT_KMEM_TEST11_DESC, + SPLAT_KMEM_TEST11_ID, splat_kmem_test11); + + return sub; } void splat_kmem_fini(splat_subsystem_t *sub) { - ASSERT(sub); - SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST9_ID); - SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST8_ID); - SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST7_ID); - SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST6_ID); - SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST5_ID); - SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST4_ID); - SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST3_ID); - SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST2_ID); - SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST1_ID); - - kfree(sub); + ASSERT(sub); + SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST11_ID); + SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST10_ID); + SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST9_ID); + SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST8_ID); + SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST7_ID); + SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST6_ID); + SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST5_ID); + SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST4_ID); + SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST3_ID); + SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST2_ID); + SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST1_ID); + + kfree(sub); } int splat_kmem_id(void) { - return SPLAT_SUBSYSTEM_KMEM; + return SPLAT_SUBSYSTEM_KMEM; } -- 2.40.0