From 185d8819702c5f2a917da60d21d0ee52b0d3043a Mon Sep 17 00:00:00 2001 From: ivmai Date: Thu, 5 Nov 2009 12:35:44 +0000 Subject: [PATCH] 2009-11-05 Ivan Maidanski * include/private/gc_priv.h (GC_total_stacksize): New variable declaration (only if THREADS). * alloc.c (GC_total_stacksize): New variable (only if THREADS). * alloc.c (min_bytes_allocd): Calculate stack_size using GC_stackbottom only in the single-threaded case; otherwise use GC_total_stacksize; print GC_total_stacksize value if DEBUG_THREADS. * darwin_stop_world.c (GC_push_all_stacks): Use "%p" printf type specifier for lo/hi values (instead of "%lx"). * darwin_stop_world.c (GC_push_all_stacks): Use GC_push_all_stack_frames() instead of GC_push_all_stack(). * darwin_stop_world.c (GC_push_all_stacks): Recalculate GC_total_stacksize value. * pthread_stop_world.c (GC_push_all_stacks): Ditto. * win32_threads.c (GC_push_all_stacks): Ditto. * win32_threads.c (GC_push_stack_for): Pass "me" argument; return stack size; don't check for non-zero value of thread->stack_base. * win32_threads.c (GC_push_all_stacks): Don't call GC_push_stack_for() and don't check for "t->id == me" if thread->stack_base is zero. --- ChangeLog | 23 ++++ alloc.c | 34 +++-- darwin_stop_world.c | 16 ++- include/private/gc_priv.h | 1 + pthread_stop_world.c | 8 ++ win32_threads.c | 275 +++++++++++++++++++------------------- 6 files changed, 205 insertions(+), 152 deletions(-) diff --git a/ChangeLog b/ChangeLog index 693c806a..0698d650 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,26 @@ +2009-11-05 Ivan Maidanski + + * include/private/gc_priv.h (GC_total_stacksize): New variable + declaration (only if THREADS). + * alloc.c (GC_total_stacksize): New variable (only if THREADS). + * alloc.c (min_bytes_allocd): Calculate stack_size using + GC_stackbottom only in the single-threaded case; otherwise use + GC_total_stacksize; print GC_total_stacksize value if + DEBUG_THREADS. + * darwin_stop_world.c (GC_push_all_stacks): Use "%p" printf type + specifier for lo/hi values (instead of "%lx"). + * darwin_stop_world.c (GC_push_all_stacks): Use + GC_push_all_stack_frames() instead of GC_push_all_stack(). + * darwin_stop_world.c (GC_push_all_stacks): Recalculate + GC_total_stacksize value. + * pthread_stop_world.c (GC_push_all_stacks): Ditto. + * win32_threads.c (GC_push_all_stacks): Ditto. + * win32_threads.c (GC_push_stack_for): Pass "me" argument; return + stack size; don't check for non-zero value of thread->stack_base. + * win32_threads.c (GC_push_all_stacks): Don't call + GC_push_stack_for() and don't check for "t->id == me" if + thread->stack_base is zero. + 2009-11-05 Ivan Maidanski * dyn_load.c (GC_dump_meminfo): Prefix "%lx" printf type specifier diff --git a/alloc.c b/alloc.c index 3efbde7f..e96b05fc 100644 --- a/alloc.c +++ b/alloc.c @@ -186,26 +186,40 @@ GC_API GC_stop_func GC_CALL GC_get_stop_func(void) } #endif /* !SMALL_CONFIG */ +#ifdef THREADS + GC_INNER word GC_total_stacksize = 0; /* updated on every push_all_stacks */ +#endif + /* Return the minimum number of words that must be allocated between */ /* collections to amortize the collection cost. */ static word min_bytes_allocd(void) { -# ifdef THREADS - /* We punt, for now. */ - signed_word stack_size = 10000; + int dummy; /* GC_stackbottom is used only for a single-threaded case. */ +# ifdef STACK_GROWS_UP + word stack_size = (ptr_t)(&dummy) - GC_stackbottom; # else - int dummy; - signed_word stack_size = (ptr_t)(&dummy) - GC_stackbottom; + word stack_size = GC_stackbottom - (ptr_t)(&dummy); # endif - word total_root_size; /* includes double stack size, */ - /* since the stack is expensive */ - /* to scan. */ + + word total_root_size; /* includes double stack size, */ + /* since the stack is expensive */ + /* to scan. */ word scan_size; /* Estimate of memory to be scanned */ /* during normal GC. */ - if (stack_size < 0) stack_size = -stack_size; +# ifdef THREADS + if (GC_need_to_lock) { + /* We are multi-threaded... */ + stack_size = GC_total_stacksize; + /* For now, we just use the value computed during the latest GC. */ +# ifdef DEBUG_THREADS + GC_printf("Total stacks size: %lu\n", (unsigned long)stack_size); +# endif + } +# endif + total_root_size = 2 * stack_size + GC_root_size; - scan_size = 2 * GC_composite_in_use + GC_atomic_in_use/4 + scan_size = 2 * GC_composite_in_use + GC_atomic_in_use / 4 + total_root_size; if (TRUE_INCREMENTAL) { return scan_size / (2 * GC_free_space_divisor); diff --git a/darwin_stop_world.c b/darwin_stop_world.c index 9f9f7be2..468fe65a 100644 --- a/darwin_stop_world.c +++ b/darwin_stop_world.c @@ -48,6 +48,7 @@ GC_INNER void GC_push_all_stacks(void) GC_thread p; pthread_t me; ptr_t lo, hi; + word total_size = 0; GC_THREAD_STATE_T state; /* MACHINE_THREAD_STATE_COUNT doesn't seem to be defined everywhere. */ /* Hence we use our own version. */ @@ -173,13 +174,14 @@ GC_INNER void GC_push_all_stacks(void) else hi = p->stack_end; # ifdef DEBUG_THREADS - GC_printf("Darwin: Stack for thread 0x%lx = [%lx,%lx)\n", - (unsigned long) p -> id, (unsigned long) lo, - (unsigned long) hi); + GC_printf("Darwin: Stack for thread 0x%lx = [%p,%p)\n", + (unsigned long) p -> id, lo, hi); # endif - GC_push_all_stack(lo, hi); + GC_push_all_stack_frames(lo, hi, p -> activation_frame); + total_size += hi - lo; /* lo <= hi */ } /* for(p=GC_threads[i]...) */ } /* for(i=0;i activation_frame); +# ifdef STACK_GROWS_UP + total_size += lo - hi; +# else + total_size += hi - lo; /* lo <= hi */ +# endif # ifdef IA64 # ifdef DEBUG_THREADS GC_printf("Reg stack for thread 0x%x = [%p,%p)\n", @@ -334,6 +340,7 @@ GC_INNER void GC_push_all_stacks(void) /* entries, and hence overflow the mark stack, which is bad. */ GC_push_all_register_frames(bs_lo, bs_hi, THREAD_EQUAL(p -> id, me), p -> activation_frame); + total_size += bs_hi - bs_lo; /* bs_lo <= bs_hi */ # endif } } @@ -342,6 +349,7 @@ GC_INNER void GC_push_all_stacks(void) } if (!found_me && !GC_in_thread_creation) ABORT("Collecting from unknown thread."); + GC_total_stacksize = total_size; } /* There seems to be a very rare thread stopping problem. To help us */ diff --git a/win32_threads.c b/win32_threads.c index 2a13ca05..a71d11b5 100644 --- a/win32_threads.c +++ b/win32_threads.c @@ -1114,145 +1114,143 @@ static GC_bool may_be_in_stack(ptr_t s) && !(last_info.Protect & PAGE_GUARD); } -STATIC void GC_push_stack_for(GC_thread thread) +STATIC word GC_push_stack_for(GC_thread thread, DWORD me) { int dummy; ptr_t sp, stack_min; - DWORD me = GetCurrentThreadId(); - if (thread -> stack_base) { - struct GC_activation_frame_s *activation_frame = - thread -> activation_frame; - if (thread -> id == me) { - GC_ASSERT(thread -> thread_blocked_sp == NULL); - sp = (ptr_t) &dummy; - } else if ((sp = thread -> thread_blocked_sp) == NULL) { - /* Use saved sp value for blocked threads. */ - /* For unblocked threads call GetThreadContext(). */ - CONTEXT context; - context.ContextFlags = CONTEXT_INTEGER|CONTEXT_CONTROL; - if (!GetThreadContext(THREAD_HANDLE(thread), &context)) - ABORT("GetThreadContext failed"); - - /* Push all registers that might point into the heap. Frame */ - /* pointer registers are included in case client code was */ - /* compiled with the 'omit frame pointer' optimisation. */ -# define PUSH1(reg) GC_push_one((word)context.reg) -# define PUSH2(r1,r2) PUSH1(r1), PUSH1(r2) -# define PUSH4(r1,r2,r3,r4) PUSH2(r1,r2), PUSH2(r3,r4) -# if defined(I386) - PUSH4(Edi,Esi,Ebx,Edx), PUSH2(Ecx,Eax), PUSH1(Ebp); - sp = (ptr_t)context.Esp; -# elif defined(X86_64) - PUSH4(Rax,Rcx,Rdx,Rbx); PUSH2(Rbp, Rsi); PUSH1(Rdi); - PUSH4(R8, R9, R10, R11); PUSH4(R12, R13, R14, R15); - sp = (ptr_t)context.Rsp; -# elif defined(ARM32) - PUSH4(R0,R1,R2,R3),PUSH4(R4,R5,R6,R7),PUSH4(R8,R9,R10,R11); - PUSH1(R12); - sp = (ptr_t)context.Sp; -# elif defined(SHx) - PUSH4(R0,R1,R2,R3), PUSH4(R4,R5,R6,R7), PUSH4(R8,R9,R10,R11); - PUSH2(R12,R13), PUSH1(R14); - sp = (ptr_t)context.R15; -# elif defined(MIPS) - PUSH4(IntAt,IntV0,IntV1,IntA0), PUSH4(IntA1,IntA2,IntA3,IntT0); - PUSH4(IntT1,IntT2,IntT3,IntT4), PUSH4(IntT5,IntT6,IntT7,IntS0); - PUSH4(IntS1,IntS2,IntS3,IntS4), PUSH4(IntS5,IntS6,IntS7,IntT8); - PUSH4(IntT9,IntK0,IntK1,IntS8); - sp = (ptr_t)context.IntSp; -# elif defined(PPC) - PUSH4(Gpr0, Gpr3, Gpr4, Gpr5), PUSH4(Gpr6, Gpr7, Gpr8, Gpr9); - PUSH4(Gpr10,Gpr11,Gpr12,Gpr14), PUSH4(Gpr15,Gpr16,Gpr17,Gpr18); - PUSH4(Gpr19,Gpr20,Gpr21,Gpr22), PUSH4(Gpr23,Gpr24,Gpr25,Gpr26); - PUSH4(Gpr27,Gpr28,Gpr29,Gpr30), PUSH1(Gpr31); - sp = (ptr_t)context.Gpr1; -# elif defined(ALPHA) - PUSH4(IntV0,IntT0,IntT1,IntT2), PUSH4(IntT3,IntT4,IntT5,IntT6); - PUSH4(IntT7,IntS0,IntS1,IntS2), PUSH4(IntS3,IntS4,IntS5,IntFp); - PUSH4(IntA0,IntA1,IntA2,IntA3), PUSH4(IntA4,IntA5,IntT8,IntT9); - PUSH4(IntT10,IntT11,IntT12,IntAt); - sp = (ptr_t)context.IntSp; -# else -# error "architecture is not supported" -# endif - } /* ! current thread */ + struct GC_activation_frame_s *activation_frame = + thread -> activation_frame; + if (thread -> id == me) { + GC_ASSERT(thread -> thread_blocked_sp == NULL); + sp = (ptr_t) &dummy; + } else if ((sp = thread -> thread_blocked_sp) == NULL) { + /* Use saved sp value for blocked threads. */ + /* For unblocked threads call GetThreadContext(). */ + CONTEXT context; + context.ContextFlags = CONTEXT_INTEGER|CONTEXT_CONTROL; + if (!GetThreadContext(THREAD_HANDLE(thread), &context)) + ABORT("GetThreadContext failed"); + + /* Push all registers that might point into the heap. Frame */ + /* pointer registers are included in case client code was */ + /* compiled with the 'omit frame pointer' optimisation. */ +# define PUSH1(reg) GC_push_one((word)context.reg) +# define PUSH2(r1,r2) PUSH1(r1), PUSH1(r2) +# define PUSH4(r1,r2,r3,r4) PUSH2(r1,r2), PUSH2(r3,r4) +# if defined(I386) + PUSH4(Edi,Esi,Ebx,Edx), PUSH2(Ecx,Eax), PUSH1(Ebp); + sp = (ptr_t)context.Esp; +# elif defined(X86_64) + PUSH4(Rax,Rcx,Rdx,Rbx); PUSH2(Rbp, Rsi); PUSH1(Rdi); + PUSH4(R8, R9, R10, R11); PUSH4(R12, R13, R14, R15); + sp = (ptr_t)context.Rsp; +# elif defined(ARM32) + PUSH4(R0,R1,R2,R3),PUSH4(R4,R5,R6,R7),PUSH4(R8,R9,R10,R11); + PUSH1(R12); + sp = (ptr_t)context.Sp; +# elif defined(SHx) + PUSH4(R0,R1,R2,R3), PUSH4(R4,R5,R6,R7), PUSH4(R8,R9,R10,R11); + PUSH2(R12,R13), PUSH1(R14); + sp = (ptr_t)context.R15; +# elif defined(MIPS) + PUSH4(IntAt,IntV0,IntV1,IntA0), PUSH4(IntA1,IntA2,IntA3,IntT0); + PUSH4(IntT1,IntT2,IntT3,IntT4), PUSH4(IntT5,IntT6,IntT7,IntS0); + PUSH4(IntS1,IntS2,IntS3,IntS4), PUSH4(IntS5,IntS6,IntS7,IntT8); + PUSH4(IntT9,IntK0,IntK1,IntS8); + sp = (ptr_t)context.IntSp; +# elif defined(PPC) + PUSH4(Gpr0, Gpr3, Gpr4, Gpr5), PUSH4(Gpr6, Gpr7, Gpr8, Gpr9); + PUSH4(Gpr10,Gpr11,Gpr12,Gpr14), PUSH4(Gpr15,Gpr16,Gpr17,Gpr18); + PUSH4(Gpr19,Gpr20,Gpr21,Gpr22), PUSH4(Gpr23,Gpr24,Gpr25,Gpr26); + PUSH4(Gpr27,Gpr28,Gpr29,Gpr30), PUSH1(Gpr31); + sp = (ptr_t)context.Gpr1; +# elif defined(ALPHA) + PUSH4(IntV0,IntT0,IntT1,IntT2), PUSH4(IntT3,IntT4,IntT5,IntT6); + PUSH4(IntT7,IntS0,IntS1,IntS2), PUSH4(IntS3,IntS4,IntS5,IntFp); + PUSH4(IntA0,IntA1,IntA2,IntA3), PUSH4(IntA4,IntA5,IntT8,IntT9); + PUSH4(IntT10,IntT11,IntT12,IntAt); + sp = (ptr_t)context.IntSp; +# else +# error "architecture is not supported" +# endif + } /* ! current thread */ - /* Set stack_min to the lowest address in the thread stack, */ - /* or to an address in the thread stack no larger than sp, */ - /* taking advantage of the old value to avoid slow traversals */ - /* of large stacks. */ - if (thread -> last_stack_min == ADDR_LIMIT) { -# ifdef MSWINCE - if (GC_dont_query_stack_min) { - stack_min = GC_wince_evaluate_stack_min(activation_frame != NULL ? - (ptr_t)activation_frame : thread -> stack_base); - /* Keep last_stack_min value unmodified. */ - } else -# endif - /* else */ { - stack_min = GC_get_stack_min(activation_frame != NULL ? - (ptr_t)activation_frame : thread -> stack_base); - UNPROTECT_THREAD(thread); - thread -> last_stack_min = stack_min; - } - } else { - /* First, adjust the latest known minimum stack address if we */ - /* are inside GC_call_with_gc_active(). */ - if (activation_frame != NULL && - thread -> last_stack_min > (ptr_t)activation_frame) { - UNPROTECT_THREAD(thread); - thread -> last_stack_min = (ptr_t)activation_frame; - } + /* Set stack_min to the lowest address in the thread stack, */ + /* or to an address in the thread stack no larger than sp, */ + /* taking advantage of the old value to avoid slow traversals */ + /* of large stacks. */ + if (thread -> last_stack_min == ADDR_LIMIT) { +# ifdef MSWINCE + if (GC_dont_query_stack_min) { + stack_min = GC_wince_evaluate_stack_min(activation_frame != NULL ? + (ptr_t)activation_frame : thread -> stack_base); + /* Keep last_stack_min value unmodified. */ + } else +# endif + /* else */ { + stack_min = GC_get_stack_min(activation_frame != NULL ? + (ptr_t)activation_frame : thread -> stack_base); + UNPROTECT_THREAD(thread); + thread -> last_stack_min = stack_min; + } + } else { + /* First, adjust the latest known minimum stack address if we */ + /* are inside GC_call_with_gc_active(). */ + if (activation_frame != NULL && + thread -> last_stack_min > (ptr_t)activation_frame) { + UNPROTECT_THREAD(thread); + thread -> last_stack_min = (ptr_t)activation_frame; + } - if (sp < thread -> stack_base && sp >= thread -> last_stack_min) { - stack_min = sp; + if (sp < thread -> stack_base && sp >= thread -> last_stack_min) { + stack_min = sp; + } else { + /* In the current thread it is always safe to use sp value. */ + if (may_be_in_stack(thread -> id == me && + sp < thread -> last_stack_min ? + sp : thread -> last_stack_min)) { + stack_min = last_info.BaseAddress; + /* Do not probe rest of the stack if sp is correct. */ + if (sp < stack_min || sp >= thread->stack_base) + stack_min = GC_get_stack_min(thread -> last_stack_min); } else { - /* In the current thread it is always safe to use sp value. */ - if (may_be_in_stack(thread -> id == me && - sp < thread -> last_stack_min ? - sp : thread -> last_stack_min)) { - stack_min = last_info.BaseAddress; - /* Do not probe rest of the stack if sp is correct. */ - if (sp < stack_min || sp >= thread->stack_base) - stack_min = GC_get_stack_min(thread -> last_stack_min); - } else { - /* Stack shrunk? Is this possible? */ - stack_min = GC_get_stack_min(thread -> stack_base); - } - UNPROTECT_THREAD(thread); - thread -> last_stack_min = stack_min; + /* Stack shrunk? Is this possible? */ + stack_min = GC_get_stack_min(thread -> stack_base); } + UNPROTECT_THREAD(thread); + thread -> last_stack_min = stack_min; } + } - GC_ASSERT(GC_dont_query_stack_min - || stack_min == GC_get_stack_min(thread -> stack_base) - || (sp >= stack_min && stack_min < thread -> stack_base - && stack_min > GC_get_stack_min(thread -> stack_base))); + GC_ASSERT(GC_dont_query_stack_min + || stack_min == GC_get_stack_min(thread -> stack_base) + || (sp >= stack_min && stack_min < thread -> stack_base + && stack_min > GC_get_stack_min(thread -> stack_base))); - if (sp >= stack_min && sp < thread->stack_base) { -# ifdef DEBUG_THREADS - GC_printf("Pushing stack for 0x%x from sp %p to %p from 0x%x\n", - (int)thread -> id, sp, thread -> stack_base, (int)me); -# endif - GC_push_all_stack_frames(sp, thread->stack_base, activation_frame); - } else { - /* If not current thread then it is possible for sp to point to */ - /* the guarded (untouched yet) page just below the current */ - /* stack_min of the thread. */ - if (thread -> id == me || sp >= thread->stack_base - || sp + GC_page_size < stack_min) - WARN("Thread stack pointer %p out of range, pushing everything\n", - sp); -# ifdef DEBUG_THREADS - GC_printf("Pushing stack for 0x%x from (min) %p to %p from 0x%x\n", - (int)thread -> id, stack_min, - thread -> stack_base, (int)me); -# endif - /* Push everything - ignore activation "frames" data. */ - GC_push_all_stack(stack_min, thread->stack_base); - } - } /* thread looks live */ + if (sp >= stack_min && sp < thread->stack_base) { +# ifdef DEBUG_THREADS + GC_printf("Pushing stack for 0x%x from sp %p to %p from 0x%x\n", + (int)thread -> id, sp, thread -> stack_base, (int)me); +# endif + GC_push_all_stack_frames(sp, thread->stack_base, activation_frame); + } else { + /* If not current thread then it is possible for sp to point to */ + /* the guarded (untouched yet) page just below the current */ + /* stack_min of the thread. */ + if (thread -> id == me || sp >= thread->stack_base + || sp + GC_page_size < stack_min) + WARN("Thread stack pointer %p out of range, pushing everything\n", + sp); +# ifdef DEBUG_THREADS + GC_printf("Pushing stack for 0x%x from (min) %p to %p from 0x%x\n", + (int)thread -> id, stack_min, + thread -> stack_base, (int)me); +# endif + /* Push everything - ignore activation "frames" data. */ + GC_push_all_stack(stack_min, thread->stack_base); + } + return thread->stack_base - sp; /* stack grows down */ } GC_INNER void GC_push_all_stacks(void) @@ -1262,6 +1260,7 @@ GC_INNER void GC_push_all_stacks(void) # ifndef SMALL_CONFIG unsigned nthreads = 0; # endif + word total_size = 0; # ifndef GC_NO_DLLMAIN if (GC_win32_dll_threads) { int i; @@ -1269,27 +1268,28 @@ GC_INNER void GC_push_all_stacks(void) for (i = 0; i <= my_max; i++) { GC_thread t = (GC_thread)(dll_thread_table + i); - if (t -> tm.in_use) { + if (t -> tm.in_use && t -> stack_base) { # ifndef SMALL_CONFIG ++nthreads; # endif - GC_push_stack_for(t); + total_size += GC_push_stack_for(t, me); if (t -> id == me) found_me = TRUE; } } } else # endif /* else */ { - GC_thread t; int i; - for (i = 0; i < THREAD_TABLE_SZ; i++) { + GC_thread t; for (t = GC_threads[i]; t != 0; t = t -> tm.next) { -# ifndef SMALL_CONFIG - ++nthreads; -# endif - if (!KNOWN_FINISHED(t)) GC_push_stack_for(t); - if (t -> id == me) found_me = TRUE; + if (!KNOWN_FINISHED(t) && t -> stack_base) { +# ifndef SMALL_CONFIG + ++nthreads; +# endif + total_size += GC_push_stack_for(t, me); + if (t -> id == me) found_me = TRUE; + } } } } @@ -1301,6 +1301,7 @@ GC_INNER void GC_push_all_stacks(void) # endif if (!found_me && !GC_in_thread_creation) ABORT("Collecting from unknown thread."); + GC_total_stacksize = total_size; } #ifdef PARALLEL_MARK -- 2.40.0