From ff449ac40691903c7f49cb89b1a02696b6c37aa7 Mon Sep 17 00:00:00 2001 From: behlendo Date: Fri, 27 Jun 2008 21:40:11 +0000 Subject: [PATCH] Further slab improvements, I'm getting close to something which works well for the expected workloads. Improvement in this commit include: - Added DEBUG_KMEM_TRACKING #define which can optionally be set when DEBUG_KMEM is defined to do per allocation tracking. This allows us to get all the lightweight kmem debugging enabled by default which is pretty light weight, and only when looking for a memory leak we can briefly enable the per alloc tracking. - Added set_normalized_timespec() in to SPL to simply using the timespec() primatives from within a module. - Added per-spinlock cycle counters to the slab in an attempt to run down a lock contention issue. The contended lock was in vmalloc() but I'm going to leave the cycle counters in place for a little while until I'm convinced there arn't other locking improvement possible in the slab. - Added a proc interface to the slab to export per slab cache statistics to /proc/spl/kmem/slab for analysis. - Reworked spl_slab_alloc() function to allocate from kmem for small allocation and vmem for large allocations. This improved things considerably but futher work is needed. git-svn-id: https://outreach.scidac.gov/svn/spl/trunk@138 7e1ea52c-4ff2-0310-8f11-9dd32ca42a1c --- include/sys/kmem.h | 120 ++++++++++++++++++++++++-- modules/spl/spl-kmem.c | 168 ++++++++++++++++++++++++------------- modules/spl/spl-proc.c | 128 ++++++++++++++++++++++++++-- modules/spl/spl-time.c | 19 +++++ modules/splat/splat-kmem.c | 17 ---- 5 files changed, 357 insertions(+), 95 deletions(-) diff --git a/include/sys/kmem.h b/include/sys/kmem.h index 2208151..47ac72e 100644 --- a/include/sys/kmem.h +++ b/include/sys/kmem.h @@ -32,6 +32,7 @@ extern "C" { #endif #undef DEBUG_KMEM_UNIMPLEMENTED +#undef DEBUG_KMEM_TRACKING /* Per-allocation memory tracking */ #include #include @@ -58,10 +59,9 @@ extern atomic64_t kmem_alloc_used; extern unsigned long kmem_alloc_max; extern atomic64_t vmem_alloc_used; extern unsigned long vmem_alloc_max; - extern int kmem_warning_flag; -extern atomic64_t kmem_cache_alloc_failed; +#ifdef DEBUG_KMEM_TRACKING /* XXX - Not to surprisingly with debugging enabled the xmem_locks are very * highly contended particularly on xfree(). If we want to run with this * detailed debugging enabled for anything other than debugging we need to @@ -171,9 +171,6 @@ __kmem_del_init(spinlock_t *lock,struct hlist_head *table,int bits,void *addr) _ptr_; \ }) -#define kmem_alloc(size, flags) __kmem_alloc((size), (flags), kmalloc) -#define kmem_zalloc(size, flags) __kmem_alloc((size), (flags), kzalloc) - #define kmem_free(ptr, size) \ ({ \ kmem_debug_t *_dptr_; \ @@ -252,10 +249,6 @@ __kmem_del_init(spinlock_t *lock,struct hlist_head *table,int bits,void *addr) _ptr_; \ }) -#define vmem_alloc(size, flags) __vmem_alloc((size), (flags)) -#define vmem_zalloc(size, flags) __vmem_alloc((size), ((flags) | \ - __GFP_ZERO)) - #define vmem_free(ptr, size) \ ({ \ kmem_debug_t *_dptr_; \ @@ -278,6 +271,108 @@ __kmem_del_init(spinlock_t *lock,struct hlist_head *table,int bits,void *addr) vfree(ptr); \ }) +#else /* DEBUG_KMEM_TRACKING */ + +#define __kmem_alloc(size, flags, allocator) \ +({ void *_ptr_ = NULL; \ + \ + /* Marked unlikely because we should never be doing this, */ \ + /* we tolerate to up 2 pages but a single page is best. */ \ + if (unlikely((size) > (PAGE_SIZE * 2)) && kmem_warning_flag) \ + __CDEBUG_LIMIT(S_KMEM, D_WARNING, "Warning large " \ + "kmem_alloc(%d, 0x%x) (%ld/%ld)\n", \ + (int)(size), (int)(flags), \ + atomic64_read(&kmem_alloc_used), \ + kmem_alloc_max); \ + \ + _ptr_ = (void *)allocator((size), (flags)); \ + if (_ptr_ == NULL) { \ + __CDEBUG_LIMIT(S_KMEM, D_WARNING, "Warning " \ + "kmem_alloc(%d, 0x%x) failed (%ld/" \ + "%ld)\n", (int)(size), (int)(flags), \ + atomic64_read(&kmem_alloc_used), \ + kmem_alloc_max); \ + } else { \ + atomic64_add((size), &kmem_alloc_used); \ + if (unlikely(atomic64_read(&kmem_alloc_used) > \ + kmem_alloc_max)) \ + kmem_alloc_max = \ + atomic64_read(&kmem_alloc_used); \ + \ + __CDEBUG_LIMIT(S_KMEM, D_INFO, "kmem_alloc(%d, 0x%x) = %p " \ + "(%ld/%ld)\n", (int)(size), (int)(flags), \ + _ptr_, atomic64_read(&kmem_alloc_used), \ + kmem_alloc_max); \ + } \ + \ + _ptr_; \ +}) + +#define kmem_free(ptr, size) \ +({ \ + ASSERT((ptr) || (size > 0)); \ + \ + atomic64_sub((size), &kmem_alloc_used); \ + __CDEBUG_LIMIT(S_KMEM, D_INFO, "kmem_free(%p, %d) (%ld/%ld)\n", \ + (ptr), (int)(size), atomic64_read(&kmem_alloc_used), \ + kmem_alloc_max); \ + memset(ptr, 0x5a, (size)); \ + kfree(ptr); \ +}) + +#define __vmem_alloc(size, flags) \ +({ void *_ptr_ = NULL; \ + \ + ASSERT((flags) & KM_SLEEP); \ + \ + _ptr_ = (void *)__vmalloc((size), (((flags) | \ + __GFP_HIGHMEM) & ~__GFP_ZERO), PAGE_KERNEL);\ + if (_ptr_ == NULL) { \ + __CDEBUG_LIMIT(S_KMEM, D_WARNING, "Warning " \ + "vmem_alloc(%d, 0x%x) failed (%ld/" \ + "%ld)\n", (int)(size), (int)(flags), \ + atomic64_read(&vmem_alloc_used), \ + vmem_alloc_max); \ + } else { \ + if (flags & __GFP_ZERO) \ + memset(_ptr_, 0, (size)); \ + \ + atomic64_add((size), &vmem_alloc_used); \ + if (unlikely(atomic64_read(&vmem_alloc_used) > \ + vmem_alloc_max)) \ + vmem_alloc_max = \ + atomic64_read(&vmem_alloc_used); \ + \ + __CDEBUG_LIMIT(S_KMEM, D_INFO, "vmem_alloc(" \ + "%d, 0x%x) = %p (%ld/%ld)\n", \ + (int)(size), (int)(flags), _ptr_, \ + atomic64_read(&vmem_alloc_used), \ + vmem_alloc_max); \ + } \ + \ + _ptr_; \ +}) + +#define vmem_free(ptr, size) \ +({ \ + ASSERT((ptr) || (size > 0)); \ + \ + atomic64_sub((size), &vmem_alloc_used); \ + __CDEBUG_LIMIT(S_KMEM, D_INFO, "vmem_free(%p, %d) (%ld/%ld)\n", \ + (ptr), (int)(size), atomic64_read(&vmem_alloc_used), \ + vmem_alloc_max); \ + memset(ptr, 0x5a, (size)); \ + vfree(ptr); \ +}) + +#endif /* DEBUG_KMEM_TRACKING */ + +#define kmem_alloc(size, flags) __kmem_alloc((size), (flags), kmalloc) +#define kmem_zalloc(size, flags) __kmem_alloc((size), (flags), kzalloc) + +#define vmem_alloc(size, flags) __vmem_alloc((size), (flags)) +#define vmem_zalloc(size, flags) __vmem_alloc((size), ((flags) | __GFP_ZERO)) + #else /* DEBUG_KMEM */ #define kmem_alloc(size, flags) kmalloc((size), (flags)) @@ -359,6 +454,8 @@ kmem_debugging(void) extern int kmem_set_warning(int flag); +extern struct list_head spl_kmem_cache_list; +extern struct rw_semaphore spl_kmem_cache_sem; #define SKM_MAGIC 0x2e2e2e2e #define SKO_MAGIC 0x20202020 @@ -442,6 +539,11 @@ typedef struct spl_kmem_cache { uint64_t skc_obj_max; /* Obj max historic */ uint64_t skc_hash_depth; /* Lazy hash depth */ uint64_t skc_hash_count; /* Hash entries current */ + cycles_t skc_lock_reclaim; + cycles_t skc_lock_destroy; + cycles_t skc_lock_grow; + cycles_t skc_lock_refill; + cycles_t skc_lock_flush; } spl_kmem_cache_t; extern spl_kmem_cache_t * diff --git a/modules/spl/spl-kmem.c b/modules/spl/spl-kmem.c index 453360d..0ee04a2 100644 --- a/modules/spl/spl-kmem.c +++ b/modules/spl/spl-kmem.c @@ -48,8 +48,14 @@ unsigned long kmem_alloc_max = 0; atomic64_t vmem_alloc_used; unsigned long vmem_alloc_max = 0; int kmem_warning_flag = 1; -atomic64_t kmem_cache_alloc_failed; +EXPORT_SYMBOL(kmem_alloc_used); +EXPORT_SYMBOL(kmem_alloc_max); +EXPORT_SYMBOL(vmem_alloc_used); +EXPORT_SYMBOL(vmem_alloc_max); +EXPORT_SYMBOL(kmem_warning_flag); + +#ifdef DEBUG_KMEM_TRACKING spinlock_t kmem_lock; struct hlist_head kmem_table[KMEM_TABLE_SIZE]; struct list_head kmem_list; @@ -58,12 +64,6 @@ spinlock_t vmem_lock; struct hlist_head vmem_table[VMEM_TABLE_SIZE]; struct list_head vmem_list; -EXPORT_SYMBOL(kmem_alloc_used); -EXPORT_SYMBOL(kmem_alloc_max); -EXPORT_SYMBOL(vmem_alloc_used); -EXPORT_SYMBOL(vmem_alloc_max); -EXPORT_SYMBOL(kmem_warning_flag); - EXPORT_SYMBOL(kmem_lock); EXPORT_SYMBOL(kmem_table); EXPORT_SYMBOL(kmem_list); @@ -71,6 +71,7 @@ EXPORT_SYMBOL(kmem_list); EXPORT_SYMBOL(vmem_lock); EXPORT_SYMBOL(vmem_table); EXPORT_SYMBOL(vmem_list); +#endif int kmem_set_warning(int flag) { return (kmem_warning_flag = !!flag); } #else @@ -109,8 +110,6 @@ EXPORT_SYMBOL(kmem_set_warning); * small virtual address space on 32bit arches. This will seriously * constrain the size of the slab caches and their performance. * - * XXX: Implement SPL proc interface to export full per cache stats. - * * XXX: Implement work requests to keep an eye on each cache and * shrink them via spl_slab_reclaim() when they are wasting lots * of space. Currently this process is driven by the reapers. @@ -149,10 +148,10 @@ EXPORT_SYMBOL(kmem_set_warning); #undef kmem_cache_alloc #undef kmem_cache_free -static struct list_head spl_kmem_cache_list; /* List of caches */ -static struct rw_semaphore spl_kmem_cache_sem; /* Cache list lock */ -static kmem_cache_t *spl_slab_cache; /* Cache for slab structs */ -static kmem_cache_t *spl_obj_cache; /* Cache for obj structs */ +struct list_head spl_kmem_cache_list; /* List of caches */ +struct rw_semaphore spl_kmem_cache_sem; /* Cache list lock */ +static kmem_cache_t *spl_slab_cache; /* Cache for slab structs */ +static kmem_cache_t *spl_obj_cache; /* Cache for obj structs */ static int spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush); @@ -206,7 +205,17 @@ out_alloc: GOTO(out, sks = NULL); } - sko->sko_addr = vmem_alloc(skc->skc_obj_size, flags); + /* Objects less than a page can use kmem_alloc() and avoid + * the locking overhead in __get_vm_area_node() when locking + * for a free address. For objects over a page we use + * vmem_alloc() because it is usually worth paying this + * overhead to avoid the need to find contigeous pages. + * This should give us the best of both worlds. */ + if (skc->skc_obj_size <= PAGE_SIZE) + sko->sko_addr = kmem_alloc(skc->skc_obj_size, flags); + else + sko->sko_addr = vmem_alloc(skc->skc_obj_size, flags); + if (sko->sko_addr == NULL) { kmem_cache_free(spl_obj_cache, sko); GOTO(out_alloc, sks = NULL); @@ -248,7 +257,11 @@ spl_slab_free(spl_kmem_slab_t *sks) { if (skc->skc_dtor) skc->skc_dtor(sko->sko_addr, skc->skc_private); - vmem_free(sko->sko_addr, skc->skc_obj_size); + if (skc->skc_obj_size <= PAGE_SIZE) + kmem_free(sko->sko_addr, skc->skc_obj_size); + else + vmem_free(sko->sko_addr, skc->skc_obj_size); + list_del(&sko->sko_list); kmem_cache_free(spl_obj_cache, sko); i++; @@ -292,13 +305,18 @@ __spl_slab_reclaim(spl_kmem_cache_t *skc) static int spl_slab_reclaim(spl_kmem_cache_t *skc) { + cycles_t start; int rc; ENTRY; spin_lock(&skc->skc_lock); + start = get_cycles(); rc = __spl_slab_reclaim(skc); spin_unlock(&skc->skc_lock); + if (unlikely((get_cycles() - start) > skc->skc_lock_reclaim)) + skc->skc_lock_reclaim = get_cycles() - start; + RETURN(rc); } @@ -311,17 +329,15 @@ spl_magazine_size(spl_kmem_cache_t *skc) /* Guesses for reasonable magazine sizes, they * should really adapt based on observed usage. */ if (skc->skc_obj_size > (PAGE_SIZE * 256)) - size = 1; - else if (skc->skc_obj_size > (PAGE_SIZE * 32)) size = 4; - else if (skc->skc_obj_size > (PAGE_SIZE)) + else if (skc->skc_obj_size > (PAGE_SIZE * 32)) size = 16; + else if (skc->skc_obj_size > (PAGE_SIZE)) + size = 64; else if (skc->skc_obj_size > (PAGE_SIZE / 4)) - size = 32; - else if (skc->skc_obj_size > (PAGE_SIZE / 16)) - size = 48; + size = 128; else - size = 64; + size = 512; RETURN(size); } @@ -412,7 +428,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align, kmem_flags = KM_NOSLEEP; /* Allocate new cache memory and initialize. */ - skc = (spl_kmem_cache_t *)kmem_alloc(sizeof(*skc), kmem_flags); + skc = (spl_kmem_cache_t *)kmem_zalloc(sizeof(*skc), kmem_flags); if (skc == NULL) RETURN(NULL); @@ -441,7 +457,7 @@ spl_kmem_cache_create(char *name, size_t size, size_t align, skc->skc_hash_size = SPL_KMEM_CACHE_HASH_SIZE; skc->skc_hash_elts = SPL_KMEM_CACHE_HASH_ELTS; skc->skc_hash = (struct hlist_head *) - kmem_alloc(skc->skc_hash_size, kmem_flags); + vmem_alloc(skc->skc_hash_size, kmem_flags); if (skc->skc_hash == NULL) { kmem_free(skc->skc_name, skc->skc_name_size); kmem_free(skc, sizeof(*skc)); @@ -466,10 +482,15 @@ spl_kmem_cache_create(char *name, size_t size, size_t align, skc->skc_obj_max = 0; skc->skc_hash_depth = 0; skc->skc_hash_count = 0; + skc->skc_lock_reclaim = 0; + skc->skc_lock_destroy = 0; + skc->skc_lock_grow = 0; + skc->skc_lock_refill = 0; + skc->skc_lock_flush = 0; rc = spl_magazine_create(skc); if (rc) { - kmem_free(skc->skc_hash, skc->skc_hash_size); + vmem_free(skc->skc_hash, skc->skc_hash_size); kmem_free(skc->skc_name, skc->skc_name_size); kmem_free(skc, sizeof(*skc)); RETURN(NULL); @@ -490,6 +511,7 @@ void spl_kmem_cache_destroy(spl_kmem_cache_t *skc) { spl_kmem_slab_t *sks, *m; + cycles_t start; ENTRY; ASSERT(skc->skc_magic == SKC_MAGIC); @@ -500,6 +522,7 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc) spl_magazine_destroy(skc); spin_lock(&skc->skc_lock); + start = get_cycles(); /* Validate there are no objects in use and free all the * spl_kmem_slab_t, spl_kmem_obj_t, and object buffers. */ @@ -510,9 +533,13 @@ spl_kmem_cache_destroy(spl_kmem_cache_t *skc) list_for_each_entry_safe(sks, m, &skc->skc_partial_list, sks_list) spl_slab_free(sks); - kmem_free(skc->skc_hash, skc->skc_hash_size); + vmem_free(skc->skc_hash, skc->skc_hash_size); kmem_free(skc->skc_name, skc->skc_name_size); spin_unlock(&skc->skc_lock); + + if (unlikely((get_cycles() - start) > skc->skc_lock_destroy)) + skc->skc_lock_destroy = get_cycles() - start; + kmem_free(skc, sizeof(*skc)); EXIT; @@ -603,6 +630,7 @@ spl_cache_grow(spl_kmem_cache_t *skc, int flags) { spl_kmem_slab_t *sks; spl_kmem_obj_t *sko; + cycles_t start; ENTRY; ASSERT(skc->skc_magic == SKC_MAGIC); @@ -634,11 +662,16 @@ spl_cache_grow(spl_kmem_cache_t *skc, int flags) /* Link the new empty slab in to the end of skc_partial_list */ spin_lock(&skc->skc_lock); + start = get_cycles(); skc->skc_slab_total++; skc->skc_obj_total += sks->sks_objs; list_add_tail(&sks->sks_list, &skc->skc_partial_list); spin_unlock(&skc->skc_lock); + if (unlikely((get_cycles() - start) > skc->skc_lock_grow)) + skc->skc_lock_grow = get_cycles() - start; + + RETURN(sks); } @@ -647,6 +680,7 @@ spl_cache_refill(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flags) { spl_kmem_slab_t *sks; int rc = 0, refill; + cycles_t start; ENTRY; ASSERT(skc->skc_magic == SKC_MAGIC); @@ -656,10 +690,16 @@ spl_cache_refill(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flags) refill = MIN(skm->skm_refill, skm->skm_size - skm->skm_avail); spin_lock(&skc->skc_lock); + start = get_cycles(); + while (refill > 0) { /* No slabs available we must grow the cache */ if (list_empty(&skc->skc_partial_list)) { spin_unlock(&skc->skc_lock); + + if (unlikely((get_cycles() - start) > skc->skc_lock_refill)) + skc->skc_lock_refill = get_cycles() - start; + sks = spl_cache_grow(skc, flags); if (!sks) GOTO(out, rc); @@ -674,6 +714,7 @@ spl_cache_refill(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flags) refill = MIN(refill, skm->skm_size - skm->skm_avail); spin_lock(&skc->skc_lock); + start = get_cycles(); continue; } @@ -700,6 +741,9 @@ spl_cache_refill(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flags) } spin_unlock(&skc->skc_lock); + + if (unlikely((get_cycles() - start) > skc->skc_lock_refill)) + skc->skc_lock_refill = get_cycles() - start; out: /* Returns the number of entries added to cache */ RETURN(rc); @@ -716,8 +760,8 @@ spl_cache_shrink(spl_kmem_cache_t *skc, void *obj) ASSERT(spin_is_locked(&skc->skc_lock)); sko = spl_hash_obj(skc, obj); - ASSERTF(sko, "Obj %p missing from in-use hash (%d) for cache %s\n", - obj, skc->skc_hash_count, skc->skc_name); + ASSERTF(sko, "Obj %p missing from in-use hash (%d/%d) for cache %s\n", + obj, skc->skc_hash_depth, skc->skc_hash_count, skc->skc_name); sks = sko->sko_slab; ASSERTF(sks, "Obj %p/%p linked to invalid slab for cache %s\n", @@ -755,12 +799,15 @@ static int spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush) { int i, count = MIN(flush, skm->skm_avail); + cycles_t start; ENTRY; ASSERT(skc->skc_magic == SKC_MAGIC); ASSERT(skm->skm_magic == SKM_MAGIC); spin_lock(&skc->skc_lock); + start = get_cycles(); + for (i = 0; i < count; i++) spl_cache_shrink(skc, skm->skm_objs[i]); @@ -771,6 +818,9 @@ spl_cache_flush(spl_kmem_cache_t *skc, spl_kmem_magazine_t *skm, int flush) spin_unlock(&skc->skc_lock); + if (unlikely((get_cycles() - start) > skc->skc_lock_flush)) + skc->skc_lock_flush = get_cycles() - start; + RETURN(count); } @@ -942,11 +992,11 @@ spl_kmem_init(void) #endif #ifdef DEBUG_KMEM - { int i; atomic64_set(&kmem_alloc_used, 0); atomic64_set(&vmem_alloc_used, 0); - atomic64_set(&kmem_cache_alloc_failed, 0); +#ifdef DEBUG_KMEM_TRACKING + { int i; spin_lock_init(&kmem_lock); INIT_LIST_HEAD(&kmem_list); @@ -959,6 +1009,7 @@ spl_kmem_init(void) for (i = 0; i < VMEM_TABLE_SIZE; i++) INIT_HLIST_HEAD(&vmem_table[i]); } +#endif #endif RETURN(rc); @@ -972,7 +1023,7 @@ out_cache: RETURN(rc); } -#ifdef DEBUG_KMEM +#if defined(DEBUG_KMEM) && defined(DEBUG_KMEM_TRACKING) static char * spl_sprintf_addr(kmem_debug_t *kd, char *str, int len, int min) { @@ -1013,54 +1064,51 @@ spl_sprintf_addr(kmem_debug_t *kd, char *str, int len, int min) return str; } -#endif /* DEBUG_KMEM */ -void -spl_kmem_fini(void) +static void +spl_kmem_fini_tracking(struct list_head *list, spinlock_t *lock) { -#ifdef DEBUG_KMEM unsigned long flags; kmem_debug_t *kd; char str[17]; - /* Display all unreclaimed memory addresses, including the - * allocation size and the first few bytes of what's located - * at that address to aid in debugging. Performance is not - * a serious concern here since it is module unload time. */ - if (atomic64_read(&kmem_alloc_used) != 0) - CWARN("kmem leaked %ld/%ld bytes\n", - atomic_read(&kmem_alloc_used), kmem_alloc_max); - - spin_lock_irqsave(&kmem_lock, flags); - if (!list_empty(&kmem_list)) + spin_lock_irqsave(lock, flags); + if (!list_empty(list)) CDEBUG(D_WARNING, "%-16s %-5s %-16s %s:%s\n", "address", "size", "data", "func", "line"); - list_for_each_entry(kd, &kmem_list, kd_list) + list_for_each_entry(kd, list, kd_list) CDEBUG(D_WARNING, "%p %-5d %-16s %s:%d\n", kd->kd_addr, kd->kd_size, spl_sprintf_addr(kd, str, 17, 8), kd->kd_func, kd->kd_line); - spin_unlock_irqrestore(&kmem_lock, flags); + spin_unlock_irqrestore(lock, flags); +} +#else /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */ +#define spl_kmem_fini_tracking(list, lock) +#endif /* DEBUG_KMEM && DEBUG_KMEM_TRACKING */ + +void +spl_kmem_fini(void) +{ +#ifdef DEBUG_KMEM + /* Display all unreclaimed memory addresses, including the + * allocation size and the first few bytes of what's located + * at that address to aid in debugging. Performance is not + * a serious concern here since it is module unload time. */ + if (atomic64_read(&kmem_alloc_used) != 0) + CWARN("kmem leaked %ld/%ld bytes\n", + atomic_read(&kmem_alloc_used), kmem_alloc_max); + if (atomic64_read(&vmem_alloc_used) != 0) CWARN("vmem leaked %ld/%ld bytes\n", atomic_read(&vmem_alloc_used), vmem_alloc_max); - spin_lock_irqsave(&vmem_lock, flags); - if (!list_empty(&vmem_list)) - CDEBUG(D_WARNING, "%-16s %-5s %-16s %s:%s\n", - "address", "size", "data", "func", "line"); - - list_for_each_entry(kd, &vmem_list, kd_list) - CDEBUG(D_WARNING, "%p %-5d %-16s %s:%d\n", - kd->kd_addr, kd->kd_size, - spl_sprintf_addr(kd, str, 17, 8), - kd->kd_func, kd->kd_line); - - spin_unlock_irqrestore(&vmem_lock, flags); -#endif + spl_kmem_fini_tracking(&kmem_list, &kmem_lock); + spl_kmem_fini_tracking(&vmem_list, &vmem_lock); +#endif /* DEBUG_KMEM */ ENTRY; #ifdef HAVE_SET_SHRINKER diff --git a/modules/spl/spl-proc.c b/modules/spl/spl-proc.c index 8ef1698..e5f87d2 100644 --- a/modules/spl/spl-proc.c +++ b/modules/spl/spl-proc.c @@ -49,6 +49,7 @@ static struct proc_dir_entry *proc_spl_mutex_stats = NULL; #endif /* DEBUG_MUTEX */ #ifdef DEBUG_KMEM static struct proc_dir_entry *proc_spl_kmem = NULL; +static struct proc_dir_entry *proc_spl_kmem_slab = NULL; #endif /* DEBUG_KMEM */ #ifdef DEBUG_KSTAT struct proc_dir_entry *proc_spl_kstat = NULL; @@ -131,7 +132,6 @@ enum { CTL_KMEM_KMEMMAX, /* Max alloc'd by kmem bytes */ CTL_KMEM_VMEMUSED, /* Alloc'd vmem bytes */ CTL_KMEM_VMEMMAX, /* Max alloc'd by vmem bytes */ - CTL_KMEM_ALLOC_FAILED, /* Cache allocation failed */ #endif CTL_MUTEX_STATS, /* Global mutex statistics */ @@ -561,6 +561,112 @@ static struct file_operations proc_mutex_operations = { }; #endif /* DEBUG_MUTEX */ +#ifdef DEBUG_KMEM +static void +slab_seq_show_headers(struct seq_file *f) +{ + seq_printf(f, "%-36s\n", "name"); +} + +static int +slab_seq_show(struct seq_file *f, void *p) +{ + spl_kmem_cache_t *skc = p; + + ASSERT(skc->skc_magic == SKC_MAGIC); + + spin_lock(&skc->skc_lock); + seq_printf(f, "%-36s ", skc->skc_name); + seq_printf(f, "%u %u %u - %u %u %u - " + "%lu %lu %lu - %lu %lu %lu - %lu %lu %lu - %lu %lu - " + "%llu %llu %llu %llu %llu\n", + (unsigned)skc->skc_obj_size, + (unsigned)skc->skc_chunk_size, + (unsigned)skc->skc_slab_size, + (unsigned)skc->skc_hash_bits, + (unsigned)skc->skc_hash_size, + (unsigned)skc->skc_hash_elts, + (long unsigned)skc->skc_slab_fail, + (long unsigned)skc->skc_slab_create, + (long unsigned)skc->skc_slab_destroy, + (long unsigned)skc->skc_slab_total, + (long unsigned)skc->skc_slab_alloc, + (long unsigned)skc->skc_slab_max, + (long unsigned)skc->skc_obj_total, + (long unsigned)skc->skc_obj_alloc, + (long unsigned)skc->skc_obj_max, + (long unsigned)skc->skc_hash_depth, + (long unsigned)skc->skc_hash_count, + (long long unsigned)skc->skc_lock_reclaim, + (long long unsigned)skc->skc_lock_destroy, + (long long unsigned)skc->skc_lock_grow, + (long long unsigned)skc->skc_lock_refill, + (long long unsigned)skc->skc_lock_flush); + + spin_unlock(&skc->skc_lock); + + return 0; +} + +static void * +slab_seq_start(struct seq_file *f, loff_t *pos) +{ + struct list_head *p; + loff_t n = *pos; + ENTRY; + + down_read(&spl_kmem_cache_sem); + if (!n) + slab_seq_show_headers(f); + + p = spl_kmem_cache_list.next; + while (n--) { + p = p->next; + if (p == &spl_kmem_cache_list) + RETURN(NULL); + } + + RETURN(list_entry(p, spl_kmem_cache_t, skc_list)); +} + +static void * +slab_seq_next(struct seq_file *f, void *p, loff_t *pos) +{ + spl_kmem_cache_t *skc = p; + ENTRY; + + ++*pos; + RETURN((skc->skc_list.next == &spl_kmem_cache_list) ? + NULL : list_entry(skc->skc_list.next, spl_kmem_cache_t, skc_list)); +} + +static void +slab_seq_stop(struct seq_file *f, void *v) +{ + up_read(&spl_kmem_cache_sem); +} + +static struct seq_operations slab_seq_ops = { + .show = slab_seq_show, + .start = slab_seq_start, + .next = slab_seq_next, + .stop = slab_seq_stop, +}; + +static int +proc_slab_open(struct inode *inode, struct file *filp) +{ + return seq_open(filp, &slab_seq_ops); +} + +static struct file_operations proc_slab_operations = { + .open = proc_slab_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; +#endif /* DEBUG_KMEM */ + static struct ctl_table spl_debug_table[] = { { .ctl_name = CTL_DEBUG_SUBSYS, @@ -735,14 +841,6 @@ static struct ctl_table spl_kmem_table[] = { .mode = 0444, .proc_handler = &proc_doulongvec_minmax, }, - { - .ctl_name = CTL_KMEM_ALLOC_FAILED, - .procname = "kmem_alloc_failed", - .data = &kmem_cache_alloc_failed, - .maxlen = sizeof(atomic64_t), - .mode = 0444, - .proc_handler = &proc_doatomic64, - }, {0}, }; #endif /* DEBUG_KMEM */ @@ -901,6 +999,12 @@ proc_init(void) proc_spl_kmem = proc_mkdir("kmem", proc_spl); if (proc_spl_kmem == NULL) GOTO(out, rc = -EUNATCH); + + proc_spl_kmem_slab = create_proc_entry("slab", 0444, proc_spl_kmem); + if (proc_spl_kmem_slab == NULL) + GOTO(out, rc = -EUNATCH); + + proc_spl_kmem_slab->proc_fops = &proc_slab_operations; #endif /* DEBUG_KMEM */ #ifdef DEBUG_KSTAT @@ -912,6 +1016,9 @@ proc_init(void) out: if (rc) { remove_proc_entry("kstat", proc_spl); +#ifdef DEBUG_KMEM + remove_proc_entry("slab", proc_spl_kmem); +#endif remove_proc_entry("kmem", proc_spl); #ifdef DEBUG_MUTEX remove_proc_entry("stats_per", proc_spl_mutex); @@ -934,6 +1041,9 @@ proc_fini(void) #if defined(DEBUG_MUTEX) || defined(DEBUG_KMEM) || defined(DEBUG_KSTAT) remove_proc_entry("kstat", proc_spl); +#ifdef DEBUG_KMEM + remove_proc_entry("slab", proc_spl_kmem); +#endif remove_proc_entry("kmem", proc_spl); #ifdef DEBUG_MUTEX remove_proc_entry("stats_per", proc_spl_mutex); diff --git a/modules/spl/spl-time.c b/modules/spl/spl-time.c index a2cfd6e..b2cb121 100644 --- a/modules/spl/spl-time.c +++ b/modules/spl/spl-time.c @@ -66,3 +66,22 @@ __gethrtime(void) { return rc; } EXPORT_SYMBOL(__gethrtime); + +/* Not exported from the kernel, but we need it for timespec_sub. Be very + * careful here we are using the kernel prototype, so that must not change. + */ +void +set_normalized_timespec(struct timespec *ts, time_t sec, long nsec) +{ + while (nsec >= NSEC_PER_SEC) { + nsec -= NSEC_PER_SEC; + ++sec; + } + while (nsec < 0) { + nsec += NSEC_PER_SEC; + --sec; + } + ts->tv_sec = sec; + ts->tv_nsec = nsec; +} +EXPORT_SYMBOL(set_normalized_timespec); diff --git a/modules/splat/splat-kmem.c b/modules/splat/splat-kmem.c index 43221ea..de9b368 100644 --- a/modules/splat/splat-kmem.c +++ b/modules/splat/splat-kmem.c @@ -65,23 +65,6 @@ #define SPLAT_KMEM_ALLOC_COUNT 10 #define SPLAT_VMEM_ALLOC_COUNT 10 -/* Not exported from the kernel, but we need it for timespec_sub. Be very - * * careful here we are using the kernel prototype, so that must not change. - * */ -void -set_normalized_timespec(struct timespec *ts, time_t sec, long nsec) -{ - while (nsec >= NSEC_PER_SEC) { - nsec -= NSEC_PER_SEC; - ++sec; - } - while (nsec < 0) { - nsec += NSEC_PER_SEC; - --sec; - } - ts->tv_sec = sec; - ts->tv_nsec = nsec; -} /* XXX - This test may fail under tight memory conditions */ static int -- 2.40.0