From: Brian Behlendorf Date: Mon, 15 Dec 2014 22:06:18 +0000 (-0800) Subject: Refine slab cache sizing X-Git-Tag: zfs-0.8.0-rc1~152^2~197^2~2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=3018bffa9b8bfd8d24010407c8a07c25b44c010e;p=zfs Refine slab cache sizing This change is designed to improve the memory utilization of slabs by more carefully setting their size. The way the code currently works is problematic for slabs which contain large objects (>1MB). This is due to slabs being unconditionally rounded up to a power of two which may result in unused space at the end of the slab. The reason the existing code rounds up every slab is because it assumes it will backed by the buddy allocator. Since the buddy allocator can only performs power of two allocations this is desirable because it avoids wasting any space. However, this logic breaks down if slab is backed by vmalloc() which operates at a page level granularity. In this case, the optimal thing to do is calculate the minimum required slab size given certain constraints (object size, alignment, objects/slab, etc). Therefore, this patch reworks the spl_slab_size() function so that it sizes KMC_KMEM slabs differently than KMC_VMEM slabs. KMC_KMEM slabs are rounded up to the nearest power of two, and KMC_VMEM slabs are allowed to be the minimum required size. This change also reduces the default number of objects per slab. This reduces how much memory a single cache object can pin, which can result in significant memory saving for highly fragmented caches. But depending on the workload it may result in slabs being allocated and freed more frequently. In practice, this has been shown to be a better default for most workloads. Also the maximum slab size has been reduced to 4MB on 32-bit systems. Due to the limited virtual address space it's critical the we be as frugal as possible. A limit of 4M still lets us reasonably comfortably allocate a limited number of 1MB objects. Finally, the kmem:slab_small and kmem:slab_large SPLAT tests were extended to provide better test coverage of various object sizes and alignments. Caches are created with random parameters and their basic functionality is verified by allocating several slabs worth of objects. Signed-off-by: Brian Behlendorf --- diff --git a/include/sys/kmem_cache.h b/include/sys/kmem_cache.h index a9b5bdd2f..9ac41e6ec 100644 --- a/include/sys/kmem_cache.h +++ b/include/sys/kmem_cache.h @@ -101,9 +101,24 @@ extern struct rw_semaphore spl_kmem_cache_sem; #define SPL_KMEM_CACHE_DELAY 15 /* Minimum slab release age */ #define SPL_KMEM_CACHE_REAP 0 /* Default reap everything */ -#define SPL_KMEM_CACHE_OBJ_PER_SLAB 16 /* Target objects per slab */ +#define SPL_KMEM_CACHE_OBJ_PER_SLAB 8 /* Target objects per slab */ #define SPL_KMEM_CACHE_OBJ_PER_SLAB_MIN 1 /* Minimum objects per slab */ #define SPL_KMEM_CACHE_ALIGN 8 /* Default object alignment */ +#ifdef _LP64 +#define SPL_KMEM_CACHE_MAX_SIZE 32 /* Max slab size in MB */ +#else +#define SPL_KMEM_CACHE_MAX_SIZE 4 /* Max slab size in MB */ +#endif + +#define SPL_MAX_ORDER (MAX_ORDER - 3) +#define SPL_MAX_ORDER_NR_PAGES (1 << (SPL_MAX_ORDER - 1)) + +#ifdef CONFIG_SLUB +#define SPL_MAX_KMEM_CACHE_ORDER PAGE_ALLOC_COSTLY_ORDER +#define SPL_MAX_KMEM_ORDER_NR_PAGES (1 << (SPL_MAX_KMEM_CACHE_ORDER - 1)) +#else +#define SPL_MAX_KMEM_ORDER_NR_PAGES (KMALLOC_MAX_SIZE >> PAGE_SHIFT) +#endif #define POINTER_IS_VALID(p) 0 /* Unimplemented */ #define POINTER_INVALIDATE(pp) /* Unimplemented */ diff --git a/man/man5/spl-module-parameters.5 b/man/man5/spl-module-parameters.5 index 27c7bfd48..3e7e877fb 100644 --- a/man/man5/spl-module-parameters.5 +++ b/man/man5/spl-module-parameters.5 @@ -68,7 +68,7 @@ required to perform an allocation. Conversely, a smaller value will minimize the footprint and improve cache reclaim time but individual allocations may take longer. .sp -Default value: \fB16\fR +Default value: \fB8\fR .RE .sp @@ -96,7 +96,7 @@ the maximum cache object size to \fBspl_kmem_cache_max_size\fR / \fBspl_kmem_cache_obj_per_slab\fR. Caches may not be created with object sized larger than this limit. .sp -Default value: \fB32\fR +Default value: \fB32 (64-bit) or 4 (32-bit)\fR .RE .sp diff --git a/module/spl/spl-kmem-cache.c b/module/spl/spl-kmem-cache.c index 809ac5cc5..5160aa1cf 100644 --- a/module/spl/spl-kmem-cache.c +++ b/module/spl/spl-kmem-cache.c @@ -109,7 +109,7 @@ module_param(spl_kmem_cache_obj_per_slab_min, uint, 0644); MODULE_PARM_DESC(spl_kmem_cache_obj_per_slab_min, "Minimal number of objects per slab"); -unsigned int spl_kmem_cache_max_size = 32; +unsigned int spl_kmem_cache_max_size = SPL_KMEM_CACHE_MAX_SIZE; module_param(spl_kmem_cache_max_size, uint, 0644); MODULE_PARM_DESC(spl_kmem_cache_max_size, "Maximum size of slab in MB"); @@ -128,7 +128,13 @@ module_param(spl_kmem_cache_slab_limit, uint, 0644); MODULE_PARM_DESC(spl_kmem_cache_slab_limit, "Objects less than N bytes use the Linux slab"); -unsigned int spl_kmem_cache_kmem_limit = (PAGE_SIZE / 4); +/* + * This value defaults to a threshold designed to avoid allocations which + * have been deemed costly by the kernel. + */ +unsigned int spl_kmem_cache_kmem_limit = + ((1 << (PAGE_ALLOC_COSTLY_ORDER - 1)) * PAGE_SIZE) / + SPL_KMEM_CACHE_OBJ_PER_SLAB; module_param(spl_kmem_cache_kmem_limit, uint, 0644); MODULE_PARM_DESC(spl_kmem_cache_kmem_limit, "Objects less than N bytes use the kmalloc"); @@ -181,12 +187,12 @@ kv_alloc(spl_kmem_cache_t *skc, int size, int flags) gfp_t lflags = kmem_flags_convert(flags); void *ptr; - ASSERT(ISP2(size)); - - if (skc->skc_flags & KMC_KMEM) + if (skc->skc_flags & KMC_KMEM) { + ASSERT(ISP2(size)); ptr = (void *)__get_free_pages(lflags, get_order(size)); - else + } else { ptr = spl_vmalloc(size, lflags | __GFP_HIGHMEM, PAGE_KERNEL); + } /* Resulting allocated memory will be page aligned */ ASSERT(IS_P2ALIGNED(ptr, PAGE_SIZE)); @@ -198,7 +204,6 @@ static void kv_free(spl_kmem_cache_t *skc, void *ptr, int size) { ASSERT(IS_P2ALIGNED(ptr, PAGE_SIZE)); - ASSERT(ISP2(size)); /* * The Linux direct reclaim path uses this out of band value to @@ -210,10 +215,12 @@ kv_free(spl_kmem_cache_t *skc, void *ptr, int size) if (current->reclaim_state) current->reclaim_state->reclaimed_slab += size >> PAGE_SHIFT; - if (skc->skc_flags & KMC_KMEM) + if (skc->skc_flags & KMC_KMEM) { + ASSERT(ISP2(size)); free_pages((unsigned long)ptr, get_order(size)); - else + } else { vfree(ptr); + } } /* @@ -668,40 +675,48 @@ spl_cache_age(void *data) static int spl_slab_size(spl_kmem_cache_t *skc, uint32_t *objs, uint32_t *size) { - uint32_t sks_size, obj_size, max_size; + uint32_t sks_size, obj_size, max_size, tgt_size, tgt_objs; if (skc->skc_flags & KMC_OFFSLAB) { - *objs = spl_kmem_cache_obj_per_slab; - *size = P2ROUNDUP(sizeof (spl_kmem_slab_t), PAGE_SIZE); - return (0); + tgt_objs = spl_kmem_cache_obj_per_slab; + tgt_size = P2ROUNDUP(sizeof (spl_kmem_slab_t), PAGE_SIZE); + + if ((skc->skc_flags & KMC_KMEM) && + (spl_obj_size(skc) > (SPL_MAX_ORDER_NR_PAGES * PAGE_SIZE))) + return (-ENOSPC); } else { sks_size = spl_sks_size(skc); obj_size = spl_obj_size(skc); - - if (skc->skc_flags & KMC_KMEM) - max_size = ((uint32_t)1 << (MAX_ORDER-3)) * PAGE_SIZE; - else - max_size = (spl_kmem_cache_max_size * 1024 * 1024); - - /* Power of two sized slab */ - for (*size = PAGE_SIZE; *size <= max_size; *size *= 2) { - *objs = (*size - sks_size) / obj_size; - if (*objs >= spl_kmem_cache_obj_per_slab) - return (0); - } + max_size = (spl_kmem_cache_max_size * 1024 * 1024); + tgt_size = (spl_kmem_cache_obj_per_slab * obj_size + sks_size); /* - * Unable to satisfy target objects per slab, fall back to - * allocating a maximally sized slab and assuming it can - * contain the minimum objects count use it. If not fail. + * KMC_KMEM slabs are allocated by __get_free_pages() which + * rounds up to the nearest order. Knowing this the size + * should be rounded up to the next power of two with a hard + * maximum defined by the maximum allowed allocation order. */ - *size = max_size; - *objs = (*size - sks_size) / obj_size; - if (*objs >= (spl_kmem_cache_obj_per_slab_min)) - return (0); + if (skc->skc_flags & KMC_KMEM) { + max_size = SPL_MAX_ORDER_NR_PAGES * PAGE_SIZE; + tgt_size = MIN(max_size, + PAGE_SIZE * (1 << MAX(get_order(tgt_size) - 1, 1))); + } + + if (tgt_size <= max_size) { + tgt_objs = (tgt_size - sks_size) / obj_size; + } else { + tgt_objs = (max_size - sks_size) / obj_size; + tgt_size = (tgt_objs * obj_size) + sks_size; + } } - return (-ENOSPC); + if (tgt_objs == 0) + return (-ENOSPC); + + *objs = tgt_objs; + *size = tgt_size; + + return (0); } /* @@ -960,6 +975,11 @@ spl_kmem_cache_create(char *name, size_t size, size_t align, if (rc) goto out; } else { + if (size > (SPL_MAX_KMEM_ORDER_NR_PAGES * PAGE_SIZE)) { + rc = EINVAL; + goto out; + } + skc->skc_linux_cache = kmem_cache_create( skc->skc_name, size, align, 0, NULL); if (skc->skc_linux_cache == NULL) { @@ -1406,8 +1426,11 @@ restart: skm->skm_age = jiffies; } else { obj = spl_cache_refill(skc, skm, flags); - if (obj == NULL) + if ((obj == NULL) && !(flags & KM_NOSLEEP)) goto restart; + + local_irq_enable(); + goto ret; } local_irq_enable(); @@ -1427,7 +1450,6 @@ ret: return (obj); } - EXPORT_SYMBOL(spl_kmem_cache_alloc); /* diff --git a/module/splat/splat-kmem.c b/module/splat/splat-kmem.c index 81f748bb6..cd0000bae 100644 --- a/module/splat/splat-kmem.c +++ b/module/splat/splat-kmem.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include "splat-internal.h" @@ -583,87 +584,124 @@ out: static int splat_kmem_cache_test(struct file *file, void *arg, char *name, - int size, int align, int flags) + int size, int align, int flags) { - kmem_cache_priv_t *kcp; - kmem_cache_data_t *kcd = NULL; - int rc = 0, max; + kmem_cache_priv_t *kcp = NULL; + kmem_cache_data_t **kcd = NULL; + int i, rc = 0, objs = 0; + + splat_vprint(file, name, + "Testing size=%d, align=%d, flags=0x%04x\n", + size, align, flags); kcp = splat_kmem_cache_test_kcp_alloc(file, name, size, align, 0); if (!kcp) { splat_vprint(file, name, "Unable to create '%s'\n", "kcp"); - return -ENOMEM; + return (-ENOMEM); } - kcp->kcp_cache = - kmem_cache_create(SPLAT_KMEM_CACHE_NAME, - kcp->kcp_size, kcp->kcp_align, - splat_kmem_cache_test_constructor, - splat_kmem_cache_test_destructor, - NULL, kcp, NULL, flags); - if (!kcp->kcp_cache) { - splat_vprint(file, name, - "Unable to create '%s'\n", - SPLAT_KMEM_CACHE_NAME); + kcp->kcp_cache = kmem_cache_create(SPLAT_KMEM_CACHE_NAME, + kcp->kcp_size, kcp->kcp_align, + splat_kmem_cache_test_constructor, + splat_kmem_cache_test_destructor, + NULL, kcp, NULL, flags); + if (kcp->kcp_cache == NULL) { + splat_vprint(file, name, "Unable to create " + "name='%s', size=%d, align=%d, flags=0x%x\n", + SPLAT_KMEM_CACHE_NAME, size, align, flags); rc = -ENOMEM; goto out_free; } - kcd = kmem_cache_alloc(kcp->kcp_cache, KM_SLEEP); - if (!kcd) { - splat_vprint(file, name, - "Unable to allocate from '%s'\n", - SPLAT_KMEM_CACHE_NAME); - rc = -EINVAL; + /* + * Allocate several slabs worth of objects to verify functionality. + * However, on 32-bit systems with limited address space constrain + * it to a single slab for the purposes of this test. + */ +#ifdef _LP64 + objs = SPL_KMEM_CACHE_OBJ_PER_SLAB * 4; +#else + objs = 1; +#endif + kcd = kmem_zalloc(sizeof (kmem_cache_data_t *) * objs, KM_SLEEP); + if (kcd == NULL) { + splat_vprint(file, name, "Unable to allocate pointers " + "for %d objects\n", objs); + rc = -ENOMEM; goto out_free; } - if (!kcd->kcd_flag) { - splat_vprint(file, name, - "Failed to run contructor for '%s'\n", - SPLAT_KMEM_CACHE_NAME); - rc = -EINVAL; - goto out_free; - } + for (i = 0; i < objs; i++) { + kcd[i] = kmem_cache_alloc(kcp->kcp_cache, KM_SLEEP); + if (kcd[i] == NULL) { + splat_vprint(file, name, "Unable to allocate " + "from '%s'\n", SPLAT_KMEM_CACHE_NAME); + rc = -EINVAL; + goto out_free; + } - if (kcd->kcd_magic != kcp->kcp_magic) { - splat_vprint(file, name, - "Failed to pass private data to constructor " - "for '%s'\n", SPLAT_KMEM_CACHE_NAME); - rc = -EINVAL; - goto out_free; + if (!kcd[i]->kcd_flag) { + splat_vprint(file, name, "Failed to run constructor " + "for '%s'\n", SPLAT_KMEM_CACHE_NAME); + rc = -EINVAL; + goto out_free; + } + + if (kcd[i]->kcd_magic != kcp->kcp_magic) { + splat_vprint(file, name, + "Failed to pass private data to constructor " + "for '%s'\n", SPLAT_KMEM_CACHE_NAME); + rc = -EINVAL; + goto out_free; + } } - max = kcp->kcp_count; - kmem_cache_free(kcp->kcp_cache, kcd); + for (i = 0; i < objs; i++) { + kmem_cache_free(kcp->kcp_cache, kcd[i]); + + /* Destructors are run for every kmem_cache_free() */ + if (kcd[i]->kcd_flag) { + splat_vprint(file, name, + "Failed to run destructor for '%s'\n", + SPLAT_KMEM_CACHE_NAME); + rc = -EINVAL; + goto out_free; + } + } - /* Destroy the entire cache which will force destructors to - * run and we can verify one was called for every object */ - kmem_cache_destroy(kcp->kcp_cache); if (kcp->kcp_count) { splat_vprint(file, name, - "Failed to run destructor on all slab objects " - "for '%s'\n", SPLAT_KMEM_CACHE_NAME); + "Failed to run destructor on all slab objects for '%s'\n", + SPLAT_KMEM_CACHE_NAME); rc = -EINVAL; } + kmem_free(kcd, sizeof (kmem_cache_data_t *) * objs); + kmem_cache_destroy(kcp->kcp_cache); + splat_kmem_cache_test_kcp_free(kcp); splat_vprint(file, name, - "Successfully ran ctors/dtors for %d elements in '%s'\n", - max, SPLAT_KMEM_CACHE_NAME); + "Success ran alloc'd/free'd %d objects of size %d\n", + objs, size); - return rc; + return (rc); out_free: - if (kcd) - kmem_cache_free(kcp->kcp_cache, kcd); + if (kcd) { + for (i = 0; i < objs; i++) { + if (kcd[i] != NULL) + kmem_cache_free(kcp->kcp_cache, kcd[i]); + } + + kmem_free(kcd, sizeof (kmem_cache_data_t *) * objs); + } if (kcp->kcp_cache) kmem_cache_destroy(kcp->kcp_cache); splat_kmem_cache_test_kcp_free(kcp); - return rc; + return (rc); } static int @@ -757,35 +795,49 @@ static int splat_kmem_test5(struct file *file, void *arg) { char *name = SPLAT_KMEM_TEST5_NAME; - int rc; - - /* On slab (default + kmem + vmem) */ - rc = splat_kmem_cache_test(file, arg, name, 128, 0, 0); - if (rc) - return rc; + int i, rc = 0; - rc = splat_kmem_cache_test(file, arg, name, 128, 0, KMC_KMEM); - if (rc) - return rc; + /* Randomly pick small object sizes and alignments. */ + for (i = 0; i < 100; i++) { + int size, align, flags = 0; + uint32_t rnd; + + /* Evenly distribute tests over all value cache types */ + get_random_bytes((void *)&rnd, sizeof (uint32_t)); + switch (rnd & 0x03) { + default: + case 0x00: + flags = 0; + break; + case 0x01: + flags = KMC_KMEM; + break; + case 0x02: + flags = KMC_VMEM; + break; + case 0x03: + flags = KMC_SLAB; + break; + } - rc = splat_kmem_cache_test(file, arg, name, 128, 0, KMC_VMEM); - if (rc) - return rc; + /* The following flags are set with a 1/10 chance */ + flags |= ((((rnd >> 8) % 10) == 0) ? KMC_OFFSLAB : 0); + flags |= ((((rnd >> 16) % 10) == 0) ? KMC_NOEMERGENCY : 0); - /* Off slab (default + kmem + vmem) */ - rc = splat_kmem_cache_test(file, arg, name, 128, 0, KMC_OFFSLAB); - if (rc) - return rc; + /* 32b - PAGE_SIZE */ + get_random_bytes((void *)&rnd, sizeof (uint32_t)); + size = MAX(rnd % (PAGE_SIZE + 1), 32); - rc = splat_kmem_cache_test(file, arg, name, 128, 0, - KMC_KMEM | KMC_OFFSLAB); - if (rc) - return rc; + /* 2^N where (3 <= N <= PAGE_SHIFT) */ + get_random_bytes((void *)&rnd, sizeof (uint32_t)); + align = (1 << MAX(3, rnd % (PAGE_SHIFT + 1))); - rc = splat_kmem_cache_test(file, arg, name, 128, 0, - KMC_VMEM | KMC_OFFSLAB); + rc = splat_kmem_cache_test(file, arg, name, size, align, flags); + if (rc) + return (rc); + } - return rc; + return (rc); } /* @@ -795,44 +847,53 @@ static int splat_kmem_test6(struct file *file, void *arg) { char *name = SPLAT_KMEM_TEST6_NAME; - int rc; - - /* On slab (default + kmem + vmem) */ - rc = splat_kmem_cache_test(file, arg, name, 256*1024, 0, 0); - if (rc) - return rc; - - rc = splat_kmem_cache_test(file, arg, name, 64*1024, 0, KMC_KMEM); - if (rc) - return rc; - - rc = splat_kmem_cache_test(file, arg, name, 1024*1024, 0, KMC_VMEM); - if (rc) - return rc; - - rc = splat_kmem_cache_test(file, arg, name, 16*1024*1024, 0, KMC_VMEM); - if (rc) - return rc; + int i, max_size, rc = 0; + + /* Randomly pick large object sizes and alignments. */ + for (i = 0; i < 100; i++) { + int size, align, flags = 0; + uint32_t rnd; + + /* Evenly distribute tests over all value cache types */ + get_random_bytes((void *)&rnd, sizeof (uint32_t)); + switch (rnd & 0x03) { + default: + case 0x00: + flags = 0; + max_size = (SPL_KMEM_CACHE_MAX_SIZE * 1024 * 1024) / 2; + break; + case 0x01: + flags = KMC_KMEM; + max_size = (SPL_MAX_ORDER_NR_PAGES - 2) * PAGE_SIZE; + break; + case 0x02: + flags = KMC_VMEM; + max_size = (SPL_KMEM_CACHE_MAX_SIZE * 1024 * 1024) / 2; + break; + case 0x03: + flags = KMC_SLAB; + max_size = SPL_MAX_KMEM_ORDER_NR_PAGES * PAGE_SIZE; + break; + } - /* Off slab (default + kmem + vmem) */ - rc = splat_kmem_cache_test(file, arg, name, 256*1024, 0, KMC_OFFSLAB); - if (rc) - return rc; + /* The following flags are set with a 1/10 chance */ + flags |= ((((rnd >> 8) % 10) == 0) ? KMC_OFFSLAB : 0); + flags |= ((((rnd >> 16) % 10) == 0) ? KMC_NOEMERGENCY : 0); - rc = splat_kmem_cache_test(file, arg, name, 64*1024, 0, - KMC_KMEM | KMC_OFFSLAB); - if (rc) - return rc; + /* PAGE_SIZE - max_size */ + get_random_bytes((void *)&rnd, sizeof (uint32_t)); + size = MAX(rnd % (max_size + 1), PAGE_SIZE), - rc = splat_kmem_cache_test(file, arg, name, 1024*1024, 0, - KMC_VMEM | KMC_OFFSLAB); - if (rc) - return rc; + /* 2^N where (3 <= N <= PAGE_SHIFT) */ + get_random_bytes((void *)&rnd, sizeof (uint32_t)); + align = (1 << MAX(3, rnd % (PAGE_SHIFT + 1))); - rc = splat_kmem_cache_test(file, arg, name, 16*1024*1024, 0, - KMC_VMEM | KMC_OFFSLAB); + rc = splat_kmem_cache_test(file, arg, name, size, align, flags); + if (rc) + return (rc); + } - return rc; + return (rc); } /* @@ -842,14 +903,20 @@ static int splat_kmem_test7(struct file *file, void *arg) { char *name = SPLAT_KMEM_TEST7_NAME; + int max_size = (SPL_KMEM_CACHE_MAX_SIZE * 1024 * 1024) / 2; int i, rc; for (i = SPL_KMEM_CACHE_ALIGN; i <= PAGE_SIZE; i *= 2) { - rc = splat_kmem_cache_test(file, arg, name, 157, i, 0); + uint32_t size; + + get_random_bytes((void *)&size, sizeof (uint32_t)); + size = MAX(size % (max_size + 1), 32); + + rc = splat_kmem_cache_test(file, arg, name, size, i, 0); if (rc) return rc; - rc = splat_kmem_cache_test(file, arg, name, 157, i, + rc = splat_kmem_cache_test(file, arg, name, size, i, KMC_OFFSLAB); if (rc) return rc;