]> granicus.if.org Git - gc/commitdiff
2009-11-05 Ivan Maidanski <ivmai@mail.ru>
authorivmai <ivmai>
Thu, 5 Nov 2009 12:35:44 +0000 (12:35 +0000)
committerIvan Maidanski <ivmai@mail.ru>
Tue, 26 Jul 2011 17:06:51 +0000 (21:06 +0400)
* include/private/gc_priv.h (GC_total_stacksize): New variable
declaration (only if THREADS).
* alloc.c (GC_total_stacksize): New variable (only if THREADS).
* alloc.c (min_bytes_allocd): Calculate stack_size using
GC_stackbottom only in the single-threaded case; otherwise use
GC_total_stacksize; print GC_total_stacksize value if
DEBUG_THREADS.
* darwin_stop_world.c (GC_push_all_stacks): Use "%p" printf type
specifier for lo/hi values (instead of "%lx").
* darwin_stop_world.c (GC_push_all_stacks): Use
GC_push_all_stack_frames() instead of GC_push_all_stack().
* darwin_stop_world.c (GC_push_all_stacks): Recalculate
GC_total_stacksize value.
* pthread_stop_world.c (GC_push_all_stacks): Ditto.
* win32_threads.c (GC_push_all_stacks): Ditto.
* win32_threads.c (GC_push_stack_for): Pass "me" argument; return
stack size; don't check for non-zero value of thread->stack_base.
* win32_threads.c (GC_push_all_stacks): Don't call
GC_push_stack_for() and don't check for "t->id == me" if
thread->stack_base is zero.

ChangeLog
alloc.c
darwin_stop_world.c
include/private/gc_priv.h
pthread_stop_world.c
win32_threads.c

index 693c806a6b9cedec51a140d990182ea5d8e0926e..0698d650b9be5156fabf0878f7cf8db885a99bec 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,26 @@
+2009-11-05  Ivan Maidanski <ivmai@mail.ru>
+
+       * include/private/gc_priv.h (GC_total_stacksize): New variable
+       declaration (only if THREADS).
+       * alloc.c (GC_total_stacksize): New variable (only if THREADS).
+       * alloc.c (min_bytes_allocd): Calculate stack_size using
+       GC_stackbottom only in the single-threaded case; otherwise use
+       GC_total_stacksize; print GC_total_stacksize value if
+       DEBUG_THREADS.
+       * darwin_stop_world.c (GC_push_all_stacks): Use "%p" printf type
+       specifier for lo/hi values (instead of "%lx").
+       * darwin_stop_world.c (GC_push_all_stacks): Use
+       GC_push_all_stack_frames() instead of GC_push_all_stack().
+       * darwin_stop_world.c (GC_push_all_stacks): Recalculate
+       GC_total_stacksize value.
+       * pthread_stop_world.c (GC_push_all_stacks): Ditto.
+       * win32_threads.c (GC_push_all_stacks): Ditto.
+       * win32_threads.c (GC_push_stack_for): Pass "me" argument; return
+       stack size; don't check for non-zero value of thread->stack_base.
+       * win32_threads.c (GC_push_all_stacks): Don't call
+       GC_push_stack_for() and don't check for "t->id == me" if
+       thread->stack_base is zero.
+
 2009-11-05  Ivan Maidanski <ivmai@mail.ru>
 
        * dyn_load.c (GC_dump_meminfo): Prefix "%lx" printf type specifier
diff --git a/alloc.c b/alloc.c
index 3efbde7fa1f74fa0739b38b5790edea8f01cd0d5..e96b05fc9277d6c4a488db9b263dc68ac2c469d6 100644 (file)
--- a/alloc.c
+++ b/alloc.c
@@ -186,26 +186,40 @@ GC_API GC_stop_func GC_CALL GC_get_stop_func(void)
   }
 #endif /* !SMALL_CONFIG */
 
+#ifdef THREADS
+  GC_INNER word GC_total_stacksize = 0; /* updated on every push_all_stacks */
+#endif
+
 /* Return the minimum number of words that must be allocated between    */
 /* collections to amortize the collection cost.                         */
 static word min_bytes_allocd(void)
 {
-#   ifdef THREADS
-        /* We punt, for now. */
-        signed_word stack_size = 10000;
+    int dummy; /* GC_stackbottom is used only for a single-threaded case. */
+#   ifdef STACK_GROWS_UP
+      word stack_size = (ptr_t)(&dummy) - GC_stackbottom;
 #   else
-        int dummy;
-        signed_word stack_size = (ptr_t)(&dummy) - GC_stackbottom;
+      word stack_size = GC_stackbottom - (ptr_t)(&dummy);
 #   endif
-    word total_root_size;           /* includes double stack size,      */
-                                    /* since the stack is expensive     */
-                                    /* to scan.                         */
+
+    word total_root_size;       /* includes double stack size,  */
+                                /* since the stack is expensive */
+                                /* to scan.                     */
     word scan_size;             /* Estimate of memory to be scanned     */
                                 /* during normal GC.                    */
 
-    if (stack_size < 0) stack_size = -stack_size;
+#   ifdef THREADS
+      if (GC_need_to_lock) {
+        /* We are multi-threaded... */
+        stack_size = GC_total_stacksize;
+        /* For now, we just use the value computed during the latest GC. */
+#       ifdef DEBUG_THREADS
+          GC_printf("Total stacks size: %lu\n", (unsigned long)stack_size);
+#       endif
+      }
+#   endif
+
     total_root_size = 2 * stack_size + GC_root_size;
-    scan_size = 2 * GC_composite_in_use + GC_atomic_in_use/4
+    scan_size = 2 * GC_composite_in_use + GC_atomic_in_use / 4
                 + total_root_size;
     if (TRUE_INCREMENTAL) {
         return scan_size / (2 * GC_free_space_divisor);
index 9f9f7be23eaba77804d50ea6fee7d2f8e2b9d54d..468fe65a7ff44f3ea01a0a314f1e9a726d6407ad 100644 (file)
@@ -48,6 +48,7 @@ GC_INNER void GC_push_all_stacks(void)
   GC_thread p;
   pthread_t me;
   ptr_t lo, hi;
+  word total_size = 0;
   GC_THREAD_STATE_T state;
   /* MACHINE_THREAD_STATE_COUNT doesn't seem to be defined everywhere.  */
   /* Hence we use our own version.                                      */
@@ -173,13 +174,14 @@ GC_INNER void GC_push_all_stacks(void)
       else
         hi = p->stack_end;
 #     ifdef DEBUG_THREADS
-        GC_printf("Darwin: Stack for thread 0x%lx = [%lx,%lx)\n",
-                  (unsigned long) p -> id, (unsigned long) lo,
-                  (unsigned long) hi);
+        GC_printf("Darwin: Stack for thread 0x%lx = [%p,%p)\n",
+                  (unsigned long) p -> id, lo, hi);
 #     endif
-      GC_push_all_stack(lo, hi);
+      GC_push_all_stack_frames(lo, hi, p -> activation_frame);
+      total_size += hi - lo; /* lo <= hi */
     } /* for(p=GC_threads[i]...) */
   } /* for(i=0;i<THREAD_TABLE_SZ...) */
+  GC_total_stacksize = total_size;
 }
 
 #else /* !DARWIN_DONT_PARSE_STACK; Use FindTopOfStack() */
@@ -240,6 +242,7 @@ GC_INNER void GC_push_all_stacks(void)
   kern_return_t r;
   mach_port_t me;
   ptr_t lo, hi;
+  word total_size = 0;
   thread_act_array_t act_list = 0;
   mach_msg_type_number_t listcount = 0;
 
@@ -400,12 +403,15 @@ GC_INNER void GC_push_all_stacks(void)
         GC_printf("Darwin: Stack for thread 0x%lx = [%p,%p)\n",
                   (unsigned long) thread, lo, hi);
 #     endif
+      /* FIXME: use GC_push_all_stack_frames. */
       GC_push_all_stack(lo, hi);
       mach_port_deallocate(my_task, thread);
-    } /* for(p=GC_threads[i]...) */
+      total_size += hi - lo; /* lo <= hi */
+    } /* for(i=0; ...) */
     vm_deallocate(my_task, (vm_address_t)act_list,
                   sizeof(thread_t) * listcount);
     mach_port_deallocate(my_task, me);
+    GC_total_stacksize = total_size;
 }
 #endif /* !DARWIN_DONT_PARSE_STACK */
 
index 34da17629dba6fbbeaea3e3cf18eb990baa0bad4..49041a9a1e5556d96edd1fdfc1c448df5f5e0b2b 100644 (file)
@@ -1348,6 +1348,7 @@ struct GC_activation_frame_s {
 /* frames belonging to the user functions invoked by GC_do_blocking().  */
   GC_INNER void GC_push_all_stack_frames(ptr_t lo, ptr_t hi,
                         struct GC_activation_frame_s *activation_frame);
+  GC_EXTERN word GC_total_stacksize; /* updated on every push_all_stacks */
 #else
   GC_EXTERN ptr_t GC_blocked_sp;
   GC_EXTERN struct GC_activation_frame_s *GC_activation_frame;
index 7c0a6bfc0887c649ac12ba11959c86adc67ae1f1..d799eab2dfbd77275429978e13fb2355922f5f86 100644 (file)
@@ -289,6 +289,7 @@ GC_INNER void GC_push_all_stacks(void)
     /* On IA64, we also need to scan the register backing store. */
     IF_IA64(ptr_t bs_lo; ptr_t bs_hi;)
     pthread_t me = pthread_self();
+    word total_size = 0;
 
     if (!GC_thr_initialized) GC_thr_init();
 #   ifdef DEBUG_THREADS
@@ -325,6 +326,11 @@ GC_INNER void GC_push_all_stacks(void)
 #       endif
         if (0 == lo) ABORT("GC_push_all_stacks: sp not set!\n");
         GC_push_all_stack_frames(lo, hi, p -> activation_frame);
+#       ifdef STACK_GROWS_UP
+          total_size += lo - hi;
+#       else
+          total_size += hi - lo; /* lo <= hi */
+#       endif
 #       ifdef IA64
 #         ifdef DEBUG_THREADS
             GC_printf("Reg stack for thread 0x%x = [%p,%p)\n",
@@ -334,6 +340,7 @@ GC_INNER void GC_push_all_stacks(void)
           /* entries, and hence overflow the mark stack, which is bad.  */
           GC_push_all_register_frames(bs_lo, bs_hi,
                         THREAD_EQUAL(p -> id, me), p -> activation_frame);
+          total_size += bs_hi - bs_lo; /* bs_lo <= bs_hi */
 #       endif
       }
     }
@@ -342,6 +349,7 @@ GC_INNER void GC_push_all_stacks(void)
     }
     if (!found_me && !GC_in_thread_creation)
       ABORT("Collecting from unknown thread.");
+    GC_total_stacksize = total_size;
 }
 
 /* There seems to be a very rare thread stopping problem.  To help us  */
index 2a13ca050bbb8d980e9380c0a5599f2ac24b2441..a71d11b5fa0ae76f6702dfaadf81a6b76029b52c 100644 (file)
@@ -1114,145 +1114,143 @@ static GC_bool may_be_in_stack(ptr_t s)
           && !(last_info.Protect & PAGE_GUARD);
 }
 
-STATIC void GC_push_stack_for(GC_thread thread)
+STATIC word GC_push_stack_for(GC_thread thread, DWORD me)
 {
   int dummy;
   ptr_t sp, stack_min;
-  DWORD me = GetCurrentThreadId();
 
-  if (thread -> stack_base) {
-    struct GC_activation_frame_s *activation_frame =
-                                        thread -> activation_frame;
-    if (thread -> id == me) {
-      GC_ASSERT(thread -> thread_blocked_sp == NULL);
-      sp = (ptr_t) &dummy;
-    } else if ((sp = thread -> thread_blocked_sp) == NULL) {
-                /* Use saved sp value for blocked threads.      */
-      /* For unblocked threads call GetThreadContext().         */
-      CONTEXT context;
-      context.ContextFlags = CONTEXT_INTEGER|CONTEXT_CONTROL;
-      if (!GetThreadContext(THREAD_HANDLE(thread), &context))
-        ABORT("GetThreadContext failed");
-
-      /* Push all registers that might point into the heap.  Frame      */
-      /* pointer registers are included in case client code was         */
-      /* compiled with the 'omit frame pointer' optimisation.           */
-#     define PUSH1(reg) GC_push_one((word)context.reg)
-#     define PUSH2(r1,r2) PUSH1(r1), PUSH1(r2)
-#     define PUSH4(r1,r2,r3,r4) PUSH2(r1,r2), PUSH2(r3,r4)
-#     if defined(I386)
-        PUSH4(Edi,Esi,Ebx,Edx), PUSH2(Ecx,Eax), PUSH1(Ebp);
-        sp = (ptr_t)context.Esp;
-#     elif defined(X86_64)
-        PUSH4(Rax,Rcx,Rdx,Rbx); PUSH2(Rbp, Rsi); PUSH1(Rdi);
-        PUSH4(R8, R9, R10, R11); PUSH4(R12, R13, R14, R15);
-        sp = (ptr_t)context.Rsp;
-#     elif defined(ARM32)
-        PUSH4(R0,R1,R2,R3),PUSH4(R4,R5,R6,R7),PUSH4(R8,R9,R10,R11);
-        PUSH1(R12);
-        sp = (ptr_t)context.Sp;
-#     elif defined(SHx)
-        PUSH4(R0,R1,R2,R3), PUSH4(R4,R5,R6,R7), PUSH4(R8,R9,R10,R11);
-        PUSH2(R12,R13), PUSH1(R14);
-        sp = (ptr_t)context.R15;
-#     elif defined(MIPS)
-        PUSH4(IntAt,IntV0,IntV1,IntA0), PUSH4(IntA1,IntA2,IntA3,IntT0);
-        PUSH4(IntT1,IntT2,IntT3,IntT4), PUSH4(IntT5,IntT6,IntT7,IntS0);
-        PUSH4(IntS1,IntS2,IntS3,IntS4), PUSH4(IntS5,IntS6,IntS7,IntT8);
-        PUSH4(IntT9,IntK0,IntK1,IntS8);
-        sp = (ptr_t)context.IntSp;
-#     elif defined(PPC)
-        PUSH4(Gpr0, Gpr3, Gpr4, Gpr5),  PUSH4(Gpr6, Gpr7, Gpr8, Gpr9);
-        PUSH4(Gpr10,Gpr11,Gpr12,Gpr14), PUSH4(Gpr15,Gpr16,Gpr17,Gpr18);
-        PUSH4(Gpr19,Gpr20,Gpr21,Gpr22), PUSH4(Gpr23,Gpr24,Gpr25,Gpr26);
-        PUSH4(Gpr27,Gpr28,Gpr29,Gpr30), PUSH1(Gpr31);
-        sp = (ptr_t)context.Gpr1;
-#     elif defined(ALPHA)
-        PUSH4(IntV0,IntT0,IntT1,IntT2), PUSH4(IntT3,IntT4,IntT5,IntT6);
-        PUSH4(IntT7,IntS0,IntS1,IntS2), PUSH4(IntS3,IntS4,IntS5,IntFp);
-        PUSH4(IntA0,IntA1,IntA2,IntA3), PUSH4(IntA4,IntA5,IntT8,IntT9);
-        PUSH4(IntT10,IntT11,IntT12,IntAt);
-        sp = (ptr_t)context.IntSp;
-#     else
-#       error "architecture is not supported"
-#     endif
-    } /* ! current thread */
+  struct GC_activation_frame_s *activation_frame =
+                                      thread -> activation_frame;
+  if (thread -> id == me) {
+    GC_ASSERT(thread -> thread_blocked_sp == NULL);
+    sp = (ptr_t) &dummy;
+  } else if ((sp = thread -> thread_blocked_sp) == NULL) {
+              /* Use saved sp value for blocked threads. */
+    /* For unblocked threads call GetThreadContext().   */
+    CONTEXT context;
+    context.ContextFlags = CONTEXT_INTEGER|CONTEXT_CONTROL;
+    if (!GetThreadContext(THREAD_HANDLE(thread), &context))
+      ABORT("GetThreadContext failed");
+
+    /* Push all registers that might point into the heap.  Frame        */
+    /* pointer registers are included in case client code was           */
+    /* compiled with the 'omit frame pointer' optimisation.             */
+#   define PUSH1(reg) GC_push_one((word)context.reg)
+#   define PUSH2(r1,r2) PUSH1(r1), PUSH1(r2)
+#   define PUSH4(r1,r2,r3,r4) PUSH2(r1,r2), PUSH2(r3,r4)
+#   if defined(I386)
+      PUSH4(Edi,Esi,Ebx,Edx), PUSH2(Ecx,Eax), PUSH1(Ebp);
+      sp = (ptr_t)context.Esp;
+#   elif defined(X86_64)
+      PUSH4(Rax,Rcx,Rdx,Rbx); PUSH2(Rbp, Rsi); PUSH1(Rdi);
+      PUSH4(R8, R9, R10, R11); PUSH4(R12, R13, R14, R15);
+      sp = (ptr_t)context.Rsp;
+#   elif defined(ARM32)
+      PUSH4(R0,R1,R2,R3),PUSH4(R4,R5,R6,R7),PUSH4(R8,R9,R10,R11);
+      PUSH1(R12);
+      sp = (ptr_t)context.Sp;
+#   elif defined(SHx)
+      PUSH4(R0,R1,R2,R3), PUSH4(R4,R5,R6,R7), PUSH4(R8,R9,R10,R11);
+      PUSH2(R12,R13), PUSH1(R14);
+      sp = (ptr_t)context.R15;
+#   elif defined(MIPS)
+      PUSH4(IntAt,IntV0,IntV1,IntA0), PUSH4(IntA1,IntA2,IntA3,IntT0);
+      PUSH4(IntT1,IntT2,IntT3,IntT4), PUSH4(IntT5,IntT6,IntT7,IntS0);
+      PUSH4(IntS1,IntS2,IntS3,IntS4), PUSH4(IntS5,IntS6,IntS7,IntT8);
+      PUSH4(IntT9,IntK0,IntK1,IntS8);
+      sp = (ptr_t)context.IntSp;
+#   elif defined(PPC)
+      PUSH4(Gpr0, Gpr3, Gpr4, Gpr5),  PUSH4(Gpr6, Gpr7, Gpr8, Gpr9);
+      PUSH4(Gpr10,Gpr11,Gpr12,Gpr14), PUSH4(Gpr15,Gpr16,Gpr17,Gpr18);
+      PUSH4(Gpr19,Gpr20,Gpr21,Gpr22), PUSH4(Gpr23,Gpr24,Gpr25,Gpr26);
+      PUSH4(Gpr27,Gpr28,Gpr29,Gpr30), PUSH1(Gpr31);
+      sp = (ptr_t)context.Gpr1;
+#   elif defined(ALPHA)
+      PUSH4(IntV0,IntT0,IntT1,IntT2), PUSH4(IntT3,IntT4,IntT5,IntT6);
+      PUSH4(IntT7,IntS0,IntS1,IntS2), PUSH4(IntS3,IntS4,IntS5,IntFp);
+      PUSH4(IntA0,IntA1,IntA2,IntA3), PUSH4(IntA4,IntA5,IntT8,IntT9);
+      PUSH4(IntT10,IntT11,IntT12,IntAt);
+      sp = (ptr_t)context.IntSp;
+#   else
+#     error "architecture is not supported"
+#   endif
+  } /* ! current thread */
 
-    /* Set stack_min to the lowest address in the thread stack,         */
-    /* or to an address in the thread stack no larger than sp,          */
-    /* taking advantage of the old value to avoid slow traversals       */
-    /* of large stacks.                                                 */
-    if (thread -> last_stack_min == ADDR_LIMIT) {
-#     ifdef MSWINCE
-        if (GC_dont_query_stack_min) {
-          stack_min = GC_wince_evaluate_stack_min(activation_frame != NULL ?
-                        (ptr_t)activation_frame : thread -> stack_base);
-          /* Keep last_stack_min value unmodified. */
-        } else
-#     endif
-      /* else */ {
-        stack_min = GC_get_stack_min(activation_frame != NULL ?
-                        (ptr_t)activation_frame : thread -> stack_base);
-        UNPROTECT_THREAD(thread);
-        thread -> last_stack_min = stack_min;
-      }
-    } else {
-      /* First, adjust the latest known minimum stack address if we     */
-      /* are inside GC_call_with_gc_active().                           */
-      if (activation_frame != NULL &&
-          thread -> last_stack_min > (ptr_t)activation_frame) {
-        UNPROTECT_THREAD(thread);
-        thread -> last_stack_min = (ptr_t)activation_frame;
-      }
+  /* Set stack_min to the lowest address in the thread stack,   */
+  /* or to an address in the thread stack no larger than sp,    */
+  /* taking advantage of the old value to avoid slow traversals */
+  /* of large stacks.                                           */
+  if (thread -> last_stack_min == ADDR_LIMIT) {
+#   ifdef MSWINCE
+      if (GC_dont_query_stack_min) {
+        stack_min = GC_wince_evaluate_stack_min(activation_frame != NULL ?
+                      (ptr_t)activation_frame : thread -> stack_base);
+        /* Keep last_stack_min value unmodified. */
+      } else
+#   endif
+    /* else */ {
+      stack_min = GC_get_stack_min(activation_frame != NULL ?
+                      (ptr_t)activation_frame : thread -> stack_base);
+      UNPROTECT_THREAD(thread);
+      thread -> last_stack_min = stack_min;
+    }
+  } else {
+    /* First, adjust the latest known minimum stack address if we       */
+    /* are inside GC_call_with_gc_active().                             */
+    if (activation_frame != NULL &&
+        thread -> last_stack_min > (ptr_t)activation_frame) {
+      UNPROTECT_THREAD(thread);
+      thread -> last_stack_min = (ptr_t)activation_frame;
+    }
 
-      if (sp < thread -> stack_base && sp >= thread -> last_stack_min) {
-        stack_min = sp;
+    if (sp < thread -> stack_base && sp >= thread -> last_stack_min) {
+      stack_min = sp;
+    } else {
+      /* In the current thread it is always safe to use sp value.       */
+      if (may_be_in_stack(thread -> id == me &&
+                          sp < thread -> last_stack_min ?
+                          sp : thread -> last_stack_min)) {
+        stack_min = last_info.BaseAddress;
+        /* Do not probe rest of the stack if sp is correct. */
+        if (sp < stack_min || sp >= thread->stack_base)
+          stack_min = GC_get_stack_min(thread -> last_stack_min);
       } else {
-        /* In the current thread it is always safe to use sp value. */
-        if (may_be_in_stack(thread -> id == me &&
-                            sp < thread -> last_stack_min ?
-                            sp : thread -> last_stack_min)) {
-          stack_min = last_info.BaseAddress;
-          /* Do not probe rest of the stack if sp is correct. */
-          if (sp < stack_min || sp >= thread->stack_base)
-            stack_min = GC_get_stack_min(thread -> last_stack_min);
-        } else {
-          /* Stack shrunk?  Is this possible? */
-          stack_min = GC_get_stack_min(thread -> stack_base);
-        }
-        UNPROTECT_THREAD(thread);
-        thread -> last_stack_min = stack_min;
+        /* Stack shrunk?  Is this possible? */
+        stack_min = GC_get_stack_min(thread -> stack_base);
       }
+      UNPROTECT_THREAD(thread);
+      thread -> last_stack_min = stack_min;
     }
+  }
 
-    GC_ASSERT(GC_dont_query_stack_min
-              || stack_min == GC_get_stack_min(thread -> stack_base)
-              || (sp >= stack_min && stack_min < thread -> stack_base
-                  && stack_min > GC_get_stack_min(thread -> stack_base)));
+  GC_ASSERT(GC_dont_query_stack_min
+            || stack_min == GC_get_stack_min(thread -> stack_base)
+            || (sp >= stack_min && stack_min < thread -> stack_base
+                && stack_min > GC_get_stack_min(thread -> stack_base)));
 
-    if (sp >= stack_min && sp < thread->stack_base) {
-#       ifdef DEBUG_THREADS
-          GC_printf("Pushing stack for 0x%x from sp %p to %p from 0x%x\n",
-                    (int)thread -> id, sp, thread -> stack_base, (int)me);
-#       endif
-      GC_push_all_stack_frames(sp, thread->stack_base, activation_frame);
-    } else {
-      /* If not current thread then it is possible for sp to point to   */
-      /* the guarded (untouched yet) page just below the current        */
-      /* stack_min of the thread.                                       */
-      if (thread -> id == me || sp >= thread->stack_base
-          || sp + GC_page_size < stack_min)
-        WARN("Thread stack pointer %p out of range, pushing everything\n",
-             sp);
-#     ifdef DEBUG_THREADS
-        GC_printf("Pushing stack for 0x%x from (min) %p to %p from 0x%x\n",
-                  (int)thread -> id, stack_min,
-                  thread -> stack_base, (int)me);
-#     endif
-      /* Push everything - ignore activation "frames" data.             */
-      GC_push_all_stack(stack_min, thread->stack_base);
-    }
-  } /* thread looks live */
+  if (sp >= stack_min && sp < thread->stack_base) {
+#   ifdef DEBUG_THREADS
+      GC_printf("Pushing stack for 0x%x from sp %p to %p from 0x%x\n",
+                (int)thread -> id, sp, thread -> stack_base, (int)me);
+#   endif
+    GC_push_all_stack_frames(sp, thread->stack_base, activation_frame);
+  } else {
+    /* If not current thread then it is possible for sp to point to     */
+    /* the guarded (untouched yet) page just below the current          */
+    /* stack_min of the thread.                                         */
+    if (thread -> id == me || sp >= thread->stack_base
+        || sp + GC_page_size < stack_min)
+      WARN("Thread stack pointer %p out of range, pushing everything\n",
+           sp);
+#   ifdef DEBUG_THREADS
+      GC_printf("Pushing stack for 0x%x from (min) %p to %p from 0x%x\n",
+                (int)thread -> id, stack_min,
+                thread -> stack_base, (int)me);
+#   endif
+    /* Push everything - ignore activation "frames" data.       */
+    GC_push_all_stack(stack_min, thread->stack_base);
+  }
+  return thread->stack_base - sp; /* stack grows down */
 }
 
 GC_INNER void GC_push_all_stacks(void)
@@ -1262,6 +1260,7 @@ GC_INNER void GC_push_all_stacks(void)
 # ifndef SMALL_CONFIG
     unsigned nthreads = 0;
 # endif
+  word total_size = 0;
 # ifndef GC_NO_DLLMAIN
     if (GC_win32_dll_threads) {
       int i;
@@ -1269,27 +1268,28 @@ GC_INNER void GC_push_all_stacks(void)
 
       for (i = 0; i <= my_max; i++) {
         GC_thread t = (GC_thread)(dll_thread_table + i);
-        if (t -> tm.in_use) {
+        if (t -> tm.in_use && t -> stack_base) {
 #         ifndef SMALL_CONFIG
             ++nthreads;
 #         endif
-          GC_push_stack_for(t);
+          total_size += GC_push_stack_for(t, me);
           if (t -> id == me) found_me = TRUE;
         }
       }
     } else
 # endif
   /* else */ {
-    GC_thread t;
     int i;
-
     for (i = 0; i < THREAD_TABLE_SZ; i++) {
+      GC_thread t;
       for (t = GC_threads[i]; t != 0; t = t -> tm.next) {
-#       ifndef SMALL_CONFIG
-          ++nthreads;
-#       endif
-        if (!KNOWN_FINISHED(t)) GC_push_stack_for(t);
-        if (t -> id == me) found_me = TRUE;
+        if (!KNOWN_FINISHED(t) && t -> stack_base) {
+#         ifndef SMALL_CONFIG
+            ++nthreads;
+#         endif
+          total_size += GC_push_stack_for(t, me);
+          if (t -> id == me) found_me = TRUE;
+        }
       }
     }
   }
@@ -1301,6 +1301,7 @@ GC_INNER void GC_push_all_stacks(void)
 # endif
   if (!found_me && !GC_in_thread_creation)
     ABORT("Collecting from unknown thread.");
+  GC_total_stacksize = total_size;
 }
 
 #ifdef PARALLEL_MARK