From 989056833ff24691cc26c8bc8b9ba951a08b4a66 Mon Sep 17 00:00:00 2001 From: Alessandro Bruni Date: Thu, 28 Jan 2016 00:03:49 +0300 Subject: [PATCH] GC_malloc[_atomic] global and thread-local generalization with kind * include/gc_inline.h (GC_malloc_kind, GC_malloc_kind_global): New public function declaration. * include/gc_inline.h (GC_MALLOC_WORDS_KIND): New public macro. * include/gc_inline.h (GC_MALLOC_WORDS, GC_MALLOC_ATOMIC_WORDS): Use GC_MALLOC_WORDS_KIND. * include/gc_inline.h (GC_CONS): Use GC_malloc_kind (instead of GC_malloc); reformat code. * include/private/gc_priv.h (MAXOBJKINDS): Allow user-defined values. * include/private/gc_priv.h (GC_core_malloc, GC_core_malloc_atomic): Remove prototype. * malloc.c: Include gc_inline.h (to get GC_malloc_kind prototype). * mallocx.c: Likewise. * malloc.c (GC_generic_malloc_inner, GC_generic_malloc_inner_ignore_off_page, GC_generic_malloc): Add assertion on "k" (kind) argument (should be less than MAXOBJKINDS). * mallocx.c (GC_generic_malloc_ignore_off_page, GC_generic_malloc_many): Likewise. * malloc.c (GC_generic_malloc_uncollectable): Add assertion on "k" argument (should be less than PREDEFINED_KINDS). * malloc.c (GC_core_malloc_atomic, GC_core_malloc): Replace with GC_malloc_kind_global. * malloc.c (GC_malloc_atomic, GC_malloc): Define as a wrapper around GC_malloc_kind_global. * malloc.c (GC_malloc_kind): Redirect to GC_malloc_kind_global if not defined in gc_inline.h (as a macro) or in thread_local_alloc.c. * mallocx.c (GC_generic_or_special_malloc): Call GC_malloc_kind instead of GC_malloc_kind and GC_malloc. * thread_local_alloc.c (GC_malloc, GC_malloc_atomic): Replace with GC_malloc_kind; remove tiny_fl local variable; call GC_malloc_kind_global instead of GC_core_malloc and GC_core_malloc_atomic. * thread_local_alloc.c (GC_destroy_thread_local): Adjust static assert to guard against global _freelists overrun. --- include/gc_inline.h | 54 ++++++++++++++--------- include/private/gc_priv.h | 13 +++--- malloc.c | 90 ++++++++++++++++++--------------------- mallocx.c | 6 ++- thread_local_alloc.c | 62 +++++++++------------------ 5 files changed, 104 insertions(+), 121 deletions(-) diff --git a/include/gc_inline.h b/include/gc_inline.h index db1b2718..f4d3b8d0 100644 --- a/include/gc_inline.h +++ b/include/gc_inline.h @@ -56,6 +56,20 @@ GC_API void GC_CALL GC_generic_malloc_many(size_t /* lb */, int /* k */, void ** /* result */); +/* Generalized version of GC_malloc and GC_malloc_atomic. */ +/* Uses appropriately the thread-local (if available) or the global */ +/* free-list of the specified kind. */ +GC_API GC_ATTR_MALLOC GC_ATTR_ALLOC_SIZE(1) void * GC_CALL + GC_malloc_kind(size_t /* lb */, int /* k */); + +#ifdef GC_THREADS + /* Same as above but uses only the global free-list. */ + GC_API GC_ATTR_MALLOC GC_ATTR_ALLOC_SIZE(1) void * GC_CALL + GC_malloc_kind_global(size_t /* lb */, int /* k */); +#else +# define GC_malloc_kind_global GC_malloc_kind +#endif + /* The ultimately general inline allocation macro. Allocate an object */ /* of size granules, putting the resulting pointer in result. Tiny_fl */ /* is a "tiny" free list array, which will be used first, if the size */ @@ -74,7 +88,7 @@ GC_API void GC_CALL GC_generic_malloc_many(size_t /* lb */, int /* k */, /* num_direct = 0 case. */ /* Particularly if granules is constant, this should generate a small */ /* amount of code. */ -# define GC_FAST_MALLOC_GRANS(result,granules,tiny_fl,num_direct,\ +# define GC_FAST_MALLOC_GRANS(result,granules,tiny_fl,num_direct, \ kind,default_expr,init) \ do { \ if (GC_EXPECT((granules) >= GC_TINY_FREELISTS,0)) { \ @@ -128,30 +142,30 @@ GC_API void GC_CALL GC_generic_malloc_many(size_t /* lb */, int /* k */, /* the caller is responsible for supplying a cleared tiny_fl */ /* free list array. For single-threaded applications, this may be */ /* a global array. */ +# define GC_MALLOC_WORDS_KIND(result,n,tiny_fl,k,init) \ + do { \ + size_t grans = GC_WORDS_TO_WHOLE_GRANULES(n); \ + GC_FAST_MALLOC_GRANS(result, grans, tiny_fl, 0, k, \ + GC_malloc_kind(grans * GC_GRANULE_BYTES, k), \ + init); \ + } while (0) + # define GC_MALLOC_WORDS(result,n,tiny_fl) \ - do { \ - size_t grans = GC_WORDS_TO_WHOLE_GRANULES(n); \ - GC_FAST_MALLOC_GRANS(result, grans, tiny_fl, 0, GC_I_NORMAL, \ - GC_malloc(grans * GC_GRANULE_BYTES), \ - *(void **)(result) = 0); \ - } while (0) + GC_MALLOC_WORDS_KIND(result, n, tiny_fl, GC_I_NORMAL, \ + *(void **)(result) = 0) # define GC_MALLOC_ATOMIC_WORDS(result,n,tiny_fl) \ - do { \ - size_t grans = GC_WORDS_TO_WHOLE_GRANULES(n); \ - GC_FAST_MALLOC_GRANS(result, grans, tiny_fl, 0, GC_I_PTRFREE, \ - GC_malloc_atomic(grans * GC_GRANULE_BYTES), \ - (void)0 /* no initialization */); \ - } while (0) + GC_MALLOC_WORDS_KIND(result, n, tiny_fl, GC_I_PTRFREE, (void)0) /* And once more for two word initialized objects: */ # define GC_CONS(result, first, second, tiny_fl) \ - do { \ - size_t grans = GC_WORDS_TO_WHOLE_GRANULES(2); \ - GC_FAST_MALLOC_GRANS(result, grans, tiny_fl, 0, GC_I_NORMAL, \ - GC_malloc(grans * GC_GRANULE_BYTES), \ - *(void **)(result) = (void *)(first)); \ - ((void **)(result))[1] = (void *)(second); \ - } while (0) + do { \ + size_t grans = GC_WORDS_TO_WHOLE_GRANULES(2); \ + GC_FAST_MALLOC_GRANS(result, grans, tiny_fl, 0, GC_I_NORMAL, \ + GC_malloc_kind(grans * GC_GRANULE_BYTES, \ + GC_I_NORMAL), \ + *(void **)(result) = (void *)(first)); \ + ((void **)(result))[1] = (void *)(second); \ + } while (0) #endif /* !GC_INLINE_H */ diff --git a/include/private/gc_priv.h b/include/private/gc_priv.h index 7c613846..a84536e7 100644 --- a/include/private/gc_priv.h +++ b/include/private/gc_priv.h @@ -1346,8 +1346,9 @@ GC_API_PRIV GC_FAR struct _GC_arrays GC_arrays; #define USED_HEAP_SIZE (GC_heapsize - GC_large_free_bytes) /* Object kinds: */ -#define MAXOBJKINDS 16 - +#ifndef MAXOBJKINDS +# define MAXOBJKINDS 16 +#endif GC_EXTERN struct obj_kind { void **ok_freelist; /* Array of free list headers for this kind of */ /* object. Point either to GC_arrays or to */ @@ -1878,13 +1879,9 @@ GC_INNER ptr_t GC_allocobj(size_t sz, int kind); #endif /* !GC_COLLECT_AT_MALLOC */ /* Allocation routines that bypass the thread local cache. */ -#ifdef THREAD_LOCAL_ALLOC - GC_INNER void * GC_core_malloc(size_t); - GC_INNER void * GC_core_malloc_atomic(size_t); -# ifdef GC_GCJ_SUPPORT +#if defined(THREAD_LOCAL_ALLOC) && defined(GC_GCJ_SUPPORT) GC_INNER void * GC_core_gcj_malloc(size_t, void *); -# endif -#endif /* THREAD_LOCAL_ALLOC */ +#endif GC_INNER void GC_init_headers(void); GC_INNER struct hblkhdr * GC_install_header(struct hblk *h); diff --git a/malloc.c b/malloc.c index 72a98ec2..083704cf 100644 --- a/malloc.c +++ b/malloc.c @@ -14,6 +14,7 @@ */ #include "private/gc_priv.h" +#include "gc_inline.h" /* for GC_malloc_kind */ #include #include @@ -109,7 +110,8 @@ GC_INNER void * GC_generic_malloc_inner(size_t lb, int k) void *op; GC_ASSERT(I_HOLD_LOCK()); - if(SMALL_OBJ(lb)) { + GC_ASSERT(k < MAXOBJKINDS); + if (SMALL_OBJ(lb)) { struct obj_kind * kind = GC_obj_kinds + k; size_t lg = GC_size_map[lb]; void ** opp = &(kind -> ok_freelist[lg]); @@ -162,7 +164,8 @@ GC_INNER void * GC_generic_malloc_inner_ignore_off_page(size_t lb, int k) GC_ASSERT(I_HOLD_LOCK()); if (lb <= HBLKSIZE) - return(GC_generic_malloc_inner(lb, k)); + return GC_generic_malloc_inner(lb, k); + GC_ASSERT(k < MAXOBJKINDS); lb_adjusted = ADD_SLOP(lb); op = GC_alloc_large_and_clear(lb_adjusted, k, IGNORE_OFF_PAGE); GC_bytes_allocd += lb_adjusted; @@ -180,6 +183,7 @@ GC_API GC_ATTR_MALLOC void * GC_CALL GC_generic_malloc(size_t lb, int k) void * result; DCL_LOCK_STATE; + GC_ASSERT(k < MAXOBJKINDS); if (EXPECT(GC_have_errors, FALSE)) GC_print_all_errors(); GC_INVOKE_FINALIZERS(); @@ -229,68 +233,57 @@ GC_API GC_ATTR_MALLOC void * GC_CALL GC_generic_malloc(size_t lb, int k) } } -/* Allocate lb bytes of atomic (pointer-free) data. */ -#ifdef THREAD_LOCAL_ALLOC - GC_INNER void * GC_core_malloc_atomic(size_t lb) -#else - GC_API GC_ATTR_MALLOC void * GC_CALL GC_malloc_atomic(size_t lb) -#endif +GC_API GC_ATTR_MALLOC void * GC_CALL GC_malloc_kind_global(size_t lb, int k) { void *op; size_t lg; DCL_LOCK_STATE; - if(SMALL_OBJ(lb)) { + GC_STATIC_ASSERT(MAXOBJKINDS >= PREDEFINED_KINDS); + GC_ASSERT(k < PREDEFINED_KINDS); + if (SMALL_OBJ(lb)) { GC_DBG_COLLECT_AT_MALLOC(lb); lg = GC_size_map[lb]; LOCK(); - op = GC_freelists[PTRFREE][lg]; - if (EXPECT(0 == op, FALSE)) { + op = GC_freelists[k][lg]; + if (EXPECT(op != NULL, TRUE)) { + if (k == PTRFREE) { + GC_freelists[k][lg] = obj_link(op); + } else { + GC_ASSERT(0 == obj_link(op) + || ((word)obj_link(op) + <= (word)GC_greatest_plausible_heap_addr + && (word)obj_link(op) + >= (word)GC_least_plausible_heap_addr)); + GC_freelists[k][lg] = obj_link(op); + obj_link(op) = 0; + } + GC_bytes_allocd += GRANULES_TO_BYTES(lg); UNLOCK(); - return(GENERAL_MALLOC((word)lb, PTRFREE)); + return op; } - GC_freelists[PTRFREE][lg] = obj_link(op); - GC_bytes_allocd += GRANULES_TO_BYTES(lg); UNLOCK(); - return((void *) op); - } else { - return(GENERAL_MALLOC((word)lb, PTRFREE)); - } + } + return GENERAL_MALLOC(lb, k); } -/* Allocate lb bytes of composite (pointerful) data */ -#ifdef THREAD_LOCAL_ALLOC - GC_INNER void * GC_core_malloc(size_t lb) -#else - GC_API GC_ATTR_MALLOC void * GC_CALL GC_malloc(size_t lb) +#if defined(THREADS) && !defined(THREAD_LOCAL_ALLOC) + GC_API GC_ATTR_MALLOC void * GC_CALL GC_malloc_kind(size_t lb, int k) + { + return GC_malloc_kind_global(lb, k); + } #endif + +/* Allocate lb bytes of atomic (pointer-free) data. */ +GC_API GC_ATTR_MALLOC void * GC_CALL GC_malloc_atomic(size_t lb) { - void *op; - size_t lg; - DCL_LOCK_STATE; + return GC_malloc_kind(lb, PTRFREE); +} - if(SMALL_OBJ(lb)) { - GC_DBG_COLLECT_AT_MALLOC(lb); - lg = GC_size_map[lb]; - LOCK(); - op = GC_freelists[NORMAL][lg]; - if (EXPECT(0 == op, FALSE)) { - UNLOCK(); - return (GENERAL_MALLOC((word)lb, NORMAL)); - } - GC_ASSERT(0 == obj_link(op) - || ((word)obj_link(op) - <= (word)GC_greatest_plausible_heap_addr - && (word)obj_link(op) - >= (word)GC_least_plausible_heap_addr)); - GC_freelists[NORMAL][lg] = obj_link(op); - obj_link(op) = 0; - GC_bytes_allocd += GRANULES_TO_BYTES(lg); - UNLOCK(); - return op; - } else { - return(GENERAL_MALLOC(lb, NORMAL)); - } +/* Allocate lb bytes of composite (pointerful) data. */ +GC_API GC_ATTR_MALLOC void * GC_CALL GC_malloc(size_t lb) +{ + return GC_malloc_kind(lb, NORMAL); } GC_API GC_ATTR_MALLOC void * GC_CALL GC_generic_malloc_uncollectable( @@ -300,6 +293,7 @@ GC_API GC_ATTR_MALLOC void * GC_CALL GC_generic_malloc_uncollectable( size_t lg; DCL_LOCK_STATE; + GC_ASSERT(k < PREDEFINED_KINDS); if (SMALL_OBJ(lb)) { GC_DBG_COLLECT_AT_MALLOC(lb); if (EXTRA_BYTES != 0 && lb != 0) lb--; diff --git a/mallocx.c b/mallocx.c index f84b0b60..75147486 100644 --- a/mallocx.c +++ b/mallocx.c @@ -15,6 +15,7 @@ */ #include "private/gc_priv.h" +#include "gc_inline.h" /* for GC_malloc_kind */ /* * These are extra allocation routines which are likely to be less @@ -65,9 +66,8 @@ GC_API GC_ATTR_MALLOC void * GC_CALL GC_generic_or_special_malloc(size_t lb, return GC_malloc_stubborn(lb); # endif case PTRFREE: - return GC_malloc_atomic(lb); case NORMAL: - return GC_malloc(lb); + return GC_malloc_kind(lb, knd); case UNCOLLECTABLE: # ifdef GC_ATOMIC_UNCOLLECTABLE case AUNCOLLECTABLE: @@ -189,6 +189,7 @@ GC_API GC_ATTR_MALLOC void * GC_CALL if (SMALL_OBJ(lb)) return GC_generic_malloc(lb, k); + GC_ASSERT(k < MAXOBJKINDS); lg = ROUNDED_UP_GRANULES(lb); lb_rounded = GRANULES_TO_BYTES(lg); if (lb_rounded < lb) @@ -300,6 +301,7 @@ GC_API void GC_CALL GC_generic_malloc_many(size_t lb, int k, void **result) *result = op; return; } + GC_ASSERT(k < MAXOBJKINDS); lw = BYTES_TO_WORDS(lb); lg = BYTES_TO_GRANULES(lb); if (EXPECT(GC_have_errors, FALSE)) diff --git a/thread_local_alloc.c b/thread_local_alloc.c index 28b8620c..9b194a78 100644 --- a/thread_local_alloc.c +++ b/thread_local_alloc.c @@ -138,7 +138,7 @@ GC_INNER void GC_destroy_thread_local(GC_tlfs p) /* We currently only do this from the thread itself or from */ /* the fork handler for a child process. */ - GC_STATIC_ASSERT(MAXOBJKINDS >= THREAD_FREELISTS_KINDS); + GC_STATIC_ASSERT(PREDEFINED_KINDS >= THREAD_FREELISTS_KINDS); for (i = 0; i < THREAD_FREELISTS_KINDS; ++i) { return_freelists(p -> _freelists[i], GC_freelists[i]); } @@ -156,19 +156,23 @@ GC_INNER void GC_destroy_thread_local(GC_tlfs p) GC_bool GC_is_thread_tsd_valid(void *tsd); #endif -GC_API GC_ATTR_MALLOC void * GC_CALL GC_malloc(size_t bytes) +GC_API GC_ATTR_MALLOC void * GC_CALL GC_malloc_kind(size_t bytes, int knd) { - size_t granules = ROUNDED_UP_GRANULES(bytes); + size_t granules; void *tsd; void *result; - void **tiny_fl; +# if PREDEFINED_KINDS > THREAD_FREELISTS_KINDS + if (EXPECT(knd >= THREAD_FREELISTS_KINDS, FALSE)) { + return GC_malloc_kind_global(bytes, knd); + } +# endif # if !defined(USE_PTHREAD_SPECIFIC) && !defined(USE_WIN32_SPECIFIC) GC_key_t k = GC_thread_key; if (EXPECT(0 == k, FALSE)) { /* We haven't yet run GC_init_parallel. That means */ /* we also aren't locking, so this is fairly cheap. */ - return GC_core_malloc(bytes); + return GC_malloc_kind_global(bytes, knd); } tsd = GC_getspecific(k); # else @@ -176,53 +180,25 @@ GC_API GC_ATTR_MALLOC void * GC_CALL GC_malloc(size_t bytes) # endif # if !defined(USE_COMPILER_TLS) && !defined(USE_WIN32_COMPILER_TLS) if (EXPECT(0 == tsd, FALSE)) { - return GC_core_malloc(bytes); + return GC_malloc_kind_global(bytes, knd); } # endif GC_ASSERT(GC_is_initialized); - GC_ASSERT(GC_is_thread_tsd_valid(tsd)); - - tiny_fl = ((GC_tlfs)tsd) -> normal_freelists; - GC_FAST_MALLOC_GRANS(result, granules, tiny_fl, DIRECT_GRANULES, - NORMAL, GC_core_malloc(bytes), obj_link(result)=0); + granules = ROUNDED_UP_GRANULES(bytes); + GC_FAST_MALLOC_GRANS(result, granules, + ((GC_tlfs)tsd) -> _freelists[knd], DIRECT_GRANULES, + knd, GC_malloc_kind_global(bytes, knd), + (void)(knd == PTRFREE ? NULL + : (obj_link(result) = 0))); # ifdef LOG_ALLOCS - GC_log_printf("GC_malloc(%lu) returned %p, recent GC #%lu\n", - (unsigned long)bytes, result, (unsigned long)GC_gc_no); + GC_log_printf("GC_malloc_kind(%lu, %d) returned %p, recent GC #%lu\n", + (unsigned long)bytes, knd, result, + (unsigned long)GC_gc_no); # endif return result; } -GC_API GC_ATTR_MALLOC void * GC_CALL GC_malloc_atomic(size_t bytes) -{ - size_t granules = ROUNDED_UP_GRANULES(bytes); - void *tsd; - void *result; - void **tiny_fl; - -# if !defined(USE_PTHREAD_SPECIFIC) && !defined(USE_WIN32_SPECIFIC) - GC_key_t k = GC_thread_key; - if (EXPECT(0 == k, FALSE)) { - /* We haven't yet run GC_init_parallel. That means */ - /* we also aren't locking, so this is fairly cheap. */ - return GC_core_malloc_atomic(bytes); - } - tsd = GC_getspecific(k); -# else - tsd = GC_getspecific(GC_thread_key); -# endif -# if !defined(USE_COMPILER_TLS) && !defined(USE_WIN32_COMPILER_TLS) - if (EXPECT(0 == tsd, FALSE)) { - return GC_core_malloc_atomic(bytes); - } -# endif - GC_ASSERT(GC_is_initialized); - tiny_fl = ((GC_tlfs)tsd) -> ptrfree_freelists; - GC_FAST_MALLOC_GRANS(result, granules, tiny_fl, DIRECT_GRANULES, PTRFREE, - GC_core_malloc_atomic(bytes), (void)0 /* no init */); - return result; -} - #ifdef GC_GCJ_SUPPORT # include "atomic_ops.h" /* for AO_compiler_barrier() */ -- 2.40.0