2 * Copyright (c) 2000-2005 by Hewlett-Packard Company. All rights reserved.
4 * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
5 * OR IMPLIED. ANY USE IS AT YOUR OWN RISK.
7 * Permission is hereby granted to use or copy this program
8 * for any purpose, provided the above notices are retained on all copies.
9 * Permission to modify the code and to distribute modified code is granted,
10 * provided the above notices are retained, and a notice that the code was
11 * modified is included with the above copyright notice.
14 #include "private/gc_priv.h"
16 #if defined(THREAD_LOCAL_ALLOC)
19 # error "invalid config - THREAD_LOCAL_ALLOC requires GC_THREADS"
22 #include "private/thread_local_alloc.h"
26 #if defined(USE_COMPILER_TLS)
27 __thread GC_ATTR_TLS_FAST
28 #elif defined(USE_WIN32_COMPILER_TLS)
29 __declspec(thread) GC_ATTR_TLS_FAST
31 GC_key_t GC_thread_key;
33 static GC_bool keys_initialized;
35 /* Return a single nonempty freelist fl to the global one pointed to */
38 static void return_single_freelist(void *fl, void **gfl)
45 GC_ASSERT(GC_size(fl) == GC_size(*gfl));
47 qptr = &(obj_link(fl));
48 while ((word)(q = *qptr) >= HBLKSIZE)
49 qptr = &(obj_link(q));
56 /* Recover the contents of the freelist array fl into the global one gfl.*/
57 /* We hold the allocator lock. */
58 static void return_freelists(void **fl, void **gfl)
62 for (i = 1; i < TINY_FREELISTS; ++i) {
63 if ((word)(fl[i]) >= HBLKSIZE) {
64 return_single_freelist(fl[i], &gfl[i]);
66 /* Clear fl[i], since the thread structure may hang around. */
67 /* Do it in a way that is likely to trap if we access it. */
68 fl[i] = (ptr_t)HBLKSIZE;
70 /* The 0 granule freelist really contains 1 granule objects. */
71 # ifdef GC_GCJ_SUPPORT
72 if (fl[0] == ERROR_FL) return;
74 if ((word)(fl[0]) >= HBLKSIZE) {
75 return_single_freelist(fl[0], &gfl[1]);
79 #ifdef USE_PTHREAD_SPECIFIC
80 /* Re-set the TLS value on thread cleanup to allow thread-local */
81 /* allocations to happen in the TLS destructors. */
82 /* GC_unregister_my_thread (and similar routines) will finally set */
83 /* the GC_thread_key to NULL preventing this destructor from being */
84 /* called repeatedly. */
85 static void reset_thread_key(void* v) {
86 pthread_setspecific(GC_thread_key, v);
89 # define reset_thread_key 0
92 /* Each thread structure must be initialized. */
93 /* This call must be made from the new thread. */
94 GC_INNER void GC_init_thread_local(GC_tlfs p)
98 GC_ASSERT(I_HOLD_LOCK());
99 if (!EXPECT(keys_initialized, TRUE)) {
100 GC_ASSERT((word)&GC_thread_key % sizeof(word) == 0);
101 res = GC_key_create(&GC_thread_key, reset_thread_key);
102 if (COVERT_DATAFLOW(res) != 0) {
103 ABORT("Failed to create key for local allocator");
105 keys_initialized = TRUE;
107 res = GC_setspecific(GC_thread_key, p);
108 if (COVERT_DATAFLOW(res) != 0) {
109 ABORT("Failed to set thread specific allocation pointers");
111 for (j = 0; j < TINY_FREELISTS; ++j) {
112 for (i = 0; i < THREAD_FREELISTS_KINDS; ++i) {
113 p -> _freelists[i][j] = (void *)(word)1;
115 # ifdef GC_GCJ_SUPPORT
116 p -> gcj_freelists[j] = (void *)(word)1;
119 /* The size 0 free lists are handled like the regular free lists, */
120 /* to ensure that the explicit deallocation works. However, */
121 /* allocation of a size 0 "gcj" object is always an error. */
122 # ifdef GC_GCJ_SUPPORT
123 p -> gcj_freelists[0] = ERROR_FL;
127 /* We hold the allocator lock. */
128 GC_INNER void GC_destroy_thread_local(GC_tlfs p)
132 /* We currently only do this from the thread itself. */
133 GC_STATIC_ASSERT(THREAD_FREELISTS_KINDS <= MAXOBJKINDS);
134 for (k = 0; k < THREAD_FREELISTS_KINDS; ++k) {
135 if (k == (int)GC_n_kinds)
136 break; /* kind is not created */
137 return_freelists(p -> _freelists[k], GC_obj_kinds[k].ok_freelist);
139 # ifdef GC_GCJ_SUPPORT
140 return_freelists(p -> gcj_freelists, (void **)GC_gcjobjfreelist);
144 GC_API GC_ATTR_MALLOC void * GC_CALL GC_malloc_kind(size_t bytes, int kind)
150 # if MAXOBJKINDS > THREAD_FREELISTS_KINDS
151 if (EXPECT(kind >= THREAD_FREELISTS_KINDS, FALSE)) {
152 return GC_malloc_kind_global(bytes, kind);
155 # if !defined(USE_PTHREAD_SPECIFIC) && !defined(USE_WIN32_SPECIFIC)
157 GC_key_t k = GC_thread_key;
159 if (EXPECT(0 == k, FALSE)) {
160 /* We haven't yet run GC_init_parallel. That means */
161 /* we also aren't locking, so this is fairly cheap. */
162 return GC_malloc_kind_global(bytes, kind);
164 tsd = GC_getspecific(k);
167 if (!EXPECT(keys_initialized, TRUE))
168 return GC_malloc_kind_global(bytes, kind);
169 tsd = GC_getspecific(GC_thread_key);
171 # if !defined(USE_COMPILER_TLS) && !defined(USE_WIN32_COMPILER_TLS)
172 if (EXPECT(0 == tsd, FALSE)) {
173 return GC_malloc_kind_global(bytes, kind);
176 GC_ASSERT(GC_is_initialized);
177 GC_ASSERT(GC_is_thread_tsd_valid(tsd));
178 granules = ROUNDED_UP_GRANULES(bytes);
179 GC_FAST_MALLOC_GRANS(result, granules,
180 ((GC_tlfs)tsd) -> _freelists[kind], DIRECT_GRANULES,
181 kind, GC_malloc_kind_global(bytes, kind),
182 (void)(kind == PTRFREE ? NULL
183 : (obj_link(result) = 0)));
185 GC_log_printf("GC_malloc_kind(%lu, %d) returned %p, recent GC #%lu\n",
186 (unsigned long)bytes, kind, result,
187 (unsigned long)GC_gc_no);
192 #ifdef GC_GCJ_SUPPORT
196 /* Gcj-style allocation without locks is extremely tricky. The */
197 /* fundamental issue is that we may end up marking a free list, which */
198 /* has freelist links instead of "vtable" pointers. That is usually */
199 /* OK, since the next object on the free list will be cleared, and */
200 /* will thus be interpreted as containing a zero descriptor. That's */
201 /* fine if the object has not yet been initialized. But there are */
202 /* interesting potential races. */
203 /* In the case of incremental collection, this seems hopeless, since */
204 /* the marker may run asynchronously, and may pick up the pointer to */
205 /* the next freelist entry (which it thinks is a vtable pointer), get */
206 /* suspended for a while, and then see an allocated object instead */
207 /* of the vtable. This may be avoidable with either a handshake with */
208 /* the collector or, probably more easily, by moving the free list */
209 /* links to the second word of each object. The latter isn't a */
210 /* universal win, since on architecture like Itanium, nonzero offsets */
211 /* are not necessarily free. And there may be cache fill order issues. */
212 /* For now, we punt with incremental GC. This probably means that */
213 /* incremental GC should be enabled before we fork a second thread. */
214 /* Unlike the other thread local allocation calls, we assume that the */
215 /* collector has been explicitly initialized. */
216 GC_API GC_ATTR_MALLOC void * GC_CALL GC_gcj_malloc(size_t bytes,
217 void * ptr_to_struct_containing_descr)
219 if (EXPECT(GC_incremental, FALSE)) {
220 return GC_core_gcj_malloc(bytes, ptr_to_struct_containing_descr);
222 size_t granules = ROUNDED_UP_GRANULES(bytes);
226 GC_ASSERT(GC_gcj_malloc_initialized);
227 tiny_fl = ((GC_tlfs)GC_getspecific(GC_thread_key))->gcj_freelists;
228 GC_FAST_MALLOC_GRANS(result, granules, tiny_fl, DIRECT_GRANULES,
230 GC_core_gcj_malloc(bytes,
231 ptr_to_struct_containing_descr),
232 {AO_compiler_barrier();
233 *(void **)result = ptr_to_struct_containing_descr;});
234 /* This forces the initialization of the "method ptr". */
235 /* This is necessary to ensure some very subtle properties */
236 /* required if a GC is run in the middle of such an allocation. */
237 /* Here we implicitly also assume atomicity for the free list. */
238 /* and method pointer assignments. */
239 /* We must update the freelist before we store the pointer. */
240 /* Otherwise a GC at this point would see a corrupted */
242 /* A real memory barrier is not needed, since the */
243 /* action of stopping this thread will cause prior writes */
245 /* We assert that any concurrent marker will stop us. */
246 /* Thus it is impossible for a mark procedure to see the */
247 /* allocation of the next object, but to see this object */
248 /* still containing a free list pointer. Otherwise the */
249 /* marker, by misinterpreting the freelist link as a vtable */
250 /* pointer, might find a random "mark descriptor" in the next */
256 #endif /* GC_GCJ_SUPPORT */
258 /* The thread support layer must arrange to mark thread-local */
259 /* free lists explicitly, since the link field is often */
260 /* invisible to the marker. It knows how to find all threads; */
261 /* we take care of an individual thread freelist structure. */
262 GC_INNER void GC_mark_thread_local_fls_for(GC_tlfs p)
267 for (j = 0; j < TINY_FREELISTS; ++j) {
268 for (i = 0; i < THREAD_FREELISTS_KINDS; ++i) {
269 /* Load the pointer atomically as it might be updated */
270 /* concurrently by GC_FAST_MALLOC_GRANS. */
271 q = (ptr_t)AO_load((volatile AO_t *)&p->_freelists[i][j]);
272 if ((word)q > HBLKSIZE)
275 # ifdef GC_GCJ_SUPPORT
276 if (EXPECT(j > 0, TRUE)) {
277 q = (ptr_t)AO_load((volatile AO_t *)&p->gcj_freelists[j]);
278 if ((word)q > HBLKSIZE)
285 #if defined(GC_ASSERTIONS)
286 /* Check that all thread-local free-lists in p are completely marked. */
287 void GC_check_tls_for(GC_tlfs p)
291 for (j = 1; j < TINY_FREELISTS; ++j) {
292 for (i = 0; i < THREAD_FREELISTS_KINDS; ++i) {
293 GC_check_fl_marks(&p->_freelists[i][j]);
295 # ifdef GC_GCJ_SUPPORT
296 GC_check_fl_marks(&p->gcj_freelists[j]);
300 #endif /* GC_ASSERTIONS */
302 #endif /* THREAD_LOCAL_ALLOC */