]> granicus.if.org Git - gc/commitdiff
Fix start_world not resuming all threads on Darwin
authorDemyan Kimitsa <demyan.kimitsa@gmail.com>
Tue, 28 Aug 2018 16:40:47 +0000 (19:40 +0300)
committerIvan Maidanski <ivmai@mail.ru>
Thu, 30 Aug 2018 13:31:46 +0000 (16:31 +0300)
Issue #231 (bdwgc).

This happens due mach thread ports are released in GC_stop_world as
a result in some cases the system creates new one and GC_start_world
will not find the thread to resume; in turn, the threads will stay
suspended.  When this happens to GDC threads, the application is
terminated as the message loop is halt.

The high-level description of the change: thread mach port is not
deallocated when thread is saved in GC_mach_threads as it causes thread
not being resumed as thread port object can be changed; threads are now
being suspended without check of suspend_count (as it can be suspended
by client); the logic of resume is changed, now the primary cycle
iterates threads to resume in GC_mach_threads.

* darwin_stop_world.c (struct GC_mach_thread): Rename already_suspended
filed to suspended.
* darwin_stop_world.c [!GC_NO_THREADS_DISCOVERY]
(GC_suspend_thread_list): Add my_task argument; remove info an outCount
local variables; call mach_port_deallocate uless threaded is added to
GC_mach_threads; do not call thread_info; update the relevant comments.
* darwin_stop_world.c [!GC_NO_THREADS_DISCOVERY] (GC_stop_world):
Remove i local variable; do not call mach_port_deallocate for prevlist
items; add comment.
* darwin_stop_world.c (GC_thread_resume): Call WARN instead of ABORT
if thread_resume failed.
* darwin_stop_world.c [!GC_NO_THREADS_DISCOVERY] (GC_start_world):
Initialize j to listcount; iterate over GC_mach_threads in the outer
loop; if thread is suspended then find it in act_list; if found then
resume thread; call mach_port_deallocate for act_list items.

darwin_stop_world.c

index ff581a81f2eeaf992748f93479780a2853cc80b9..547bf3f402d47a337e571ea24136b854417c6f99 100644 (file)
@@ -441,7 +441,7 @@ GC_INNER void GC_push_all_stacks(void)
 
   struct GC_mach_thread {
     thread_act_t thread;
-    GC_bool already_suspended;
+    GC_bool suspended;
   };
 
   struct GC_mach_thread GC_mach_threads[GC_MAX_MACH_THREADS];
@@ -451,7 +451,8 @@ GC_INNER void GC_push_all_stacks(void)
 /* returns true if there's a thread in act_list that wasn't in old_list */
 STATIC GC_bool GC_suspend_thread_list(thread_act_array_t act_list, int count,
                                       thread_act_array_t old_list,
-                                      int old_count, mach_port_t my_thread)
+                                      int old_count, task_t my_task,
+                                      mach_port_t my_thread)
 {
   int i;
   int j = -1;
@@ -460,27 +461,22 @@ STATIC GC_bool GC_suspend_thread_list(thread_act_array_t act_list, int count,
   for (i = 0; i < count; i++) {
     thread_act_t thread = act_list[i];
     GC_bool found;
-    struct thread_basic_info info;
-    mach_msg_type_number_t outCount;
     kern_return_t kern_result;
 
     if (thread == my_thread
 #       ifdef MPROTECT_VDB
           || (GC_mach_handler_thread == thread && GC_use_mach_handler_thread)
+#       endif
+#       ifdef PARALLEL_MARK
+          || GC_is_mach_marker(thread) /* ignore the parallel markers */
 #       endif
         ) {
-      /* Don't add our and the handler threads. */
+      /* Do not add our one, parallel marker and the handler threads;   */
+      /* consider it as found (e.g., it was processed earlier).         */
+      mach_port_deallocate(my_task, thread);
       continue;
     }
-#   ifdef PARALLEL_MARK
-      if (GC_is_mach_marker(thread))
-        continue; /* ignore the parallel marker threads */
-#   endif
 
-#   ifdef DEBUG_THREADS
-      GC_log_printf("Attempting to suspend thread %p\n",
-                    (void *)(word)thread);
-#   endif
     /* find the current thread in the old list */
     found = FALSE;
     {
@@ -499,57 +495,42 @@ STATIC GC_bool GC_suspend_thread_list(thread_act_array_t act_list, int count,
             found = TRUE;
             break;
           }
-
-        if (!found) {
-          /* add it to the GC_mach_threads list */
-          if (GC_mach_threads_count == GC_MAX_MACH_THREADS)
-            ABORT("Too many threads");
-          GC_mach_threads[GC_mach_threads_count].thread = thread;
-          /* default is not suspended */
-          GC_mach_threads[GC_mach_threads_count].already_suspended = FALSE;
-          changed = TRUE;
-        }
       }
     }
 
-    outCount = THREAD_INFO_MAX;
-    kern_result = thread_info(thread, THREAD_BASIC_INFO,
-                              (thread_info_t)&info, &outCount);
-    if (kern_result != KERN_SUCCESS) {
-      /* The thread may have quit since the thread_threads() call we  */
-      /* mark already suspended so it's not dealt with anymore later. */
-      if (!found)
-        GC_mach_threads[GC_mach_threads_count++].already_suspended = TRUE;
-      continue;
-    }
-#   ifdef DEBUG_THREADS
-      GC_log_printf("Thread state for %p = %d\n", (void *)(word)thread,
-                    info.run_state);
-#   endif
-    if (info.suspend_count != 0) {
-      /* thread is already suspended. */
-      if (!found)
-        GC_mach_threads[GC_mach_threads_count++].already_suspended = TRUE;
+    if (found) {
+      /* It is already in the list, skip processing, release mach port. */
+      mach_port_deallocate(my_task, thread);
       continue;
     }
 
+    /* add it to the GC_mach_threads list */
+    if (GC_mach_threads_count == GC_MAX_MACH_THREADS)
+      ABORT("Too many threads");
+    GC_mach_threads[GC_mach_threads_count].thread = thread;
+    /* default is not suspended */
+    GC_mach_threads[GC_mach_threads_count].suspended = FALSE;
+    changed = TRUE;
+
 #   ifdef DEBUG_THREADS
       GC_log_printf("Suspending %p\n", (void *)(word)thread);
 #   endif
+    /* Unconditionally suspend the thread.  It will do no     */
+    /* harm if it is already suspended by the client logic.   */
     do {
       kern_result = thread_suspend(thread);
     } while (kern_result == KERN_ABORTED);
     if (kern_result != KERN_SUCCESS) {
       /* The thread may have quit since the thread_threads() call we  */
       /* mark already suspended so it's not dealt with anymore later. */
-      if (!found)
-        GC_mach_threads[GC_mach_threads_count++].already_suspended = TRUE;
-      continue;
+      GC_mach_threads[GC_mach_threads_count].suspended = FALSE;
+    } else {
+      /* Mark the thread as suspended and require resume.     */
+      GC_mach_threads[GC_mach_threads_count].suspended = TRUE;
+      if (GC_on_thread_event)
+        GC_on_thread_event(GC_EVENT_THREAD_SUSPENDED, (void *)(word)thread);
     }
-    if (!found)
-      GC_mach_threads_count++;
-    if (GC_on_thread_event)
-      GC_on_thread_event(GC_EVENT_THREAD_SUSPENDED, (void *)(word)thread);
+    GC_mach_threads_count++;
   }
   return changed;
 }
@@ -581,7 +562,6 @@ GC_INNER void GC_stop_world(void)
 
   if (GC_query_task_threads) {
 #   ifndef GC_NO_THREADS_DISCOVERY
-      unsigned i;
       GC_bool changed;
       thread_act_array_t act_list, prev_list;
       mach_msg_type_number_t listcount, prevcount;
@@ -605,12 +585,14 @@ GC_INNER void GC_stop_world(void)
 
         if (kern_result == KERN_SUCCESS) {
           changed = GC_suspend_thread_list(act_list, listcount, prev_list,
-                                           prevcount, my_thread);
+                                           prevcount, my_task, my_thread);
 
           if (prev_list != NULL) {
-            for (i = 0; i < prevcount; i++)
-              mach_port_deallocate(my_task, prev_list[i]);
-
+            /* Thread ports are not deallocated by list, unused ports   */
+            /* deallocated in GC_suspend_thread_list, used - kept in    */
+            /* GC_mach_threads till GC_start_world as otherwise thread  */
+            /* object change can occur and GC_start_world will not      */
+            /* find the thread to resume which will cause app to hang.  */
             vm_deallocate(my_task, (vm_address_t)prev_list,
                           sizeof(thread_t) * prevcount);
           }
@@ -622,8 +604,7 @@ GC_INNER void GC_stop_world(void)
       } while (changed);
 
       GC_ASSERT(prev_list != 0);
-      for (i = 0; i < prevcount; i++)
-        mach_port_deallocate(my_task, prev_list[i]);
+      /* The thread ports are not deallocated by list, see above.       */
       vm_deallocate(my_task, (vm_address_t)act_list,
                     sizeof(thread_t) * listcount);
 #   endif /* !GC_NO_THREADS_DISCOVERY */
@@ -684,10 +665,11 @@ GC_INLINE void GC_thread_resume(thread_act_t thread)
 # endif
   /* Resume the thread */
   kern_result = thread_resume(thread);
-  if (kern_result != KERN_SUCCESS)
-    ABORT("thread_resume failed");
-  if (GC_on_thread_event)
+  if (kern_result != KERN_SUCCESS) {
+    WARN("thread_resume(%p) failed: mach port invalid\n", thread);
+  } else if (GC_on_thread_event) {
     GC_on_thread_event(GC_EVENT_THREAD_UNSUSPENDED, (void *)(word)thread);
+  }
 }
 
 /* Caller holds allocation lock, and has held it continuously since     */
@@ -706,8 +688,7 @@ GC_INNER void GC_start_world(void)
 
   if (GC_query_task_threads) {
 #   ifndef GC_NO_THREADS_DISCOVERY
-      int i;
-      int j = GC_mach_threads_count;
+      int i, j;
       kern_return_t kern_result;
       thread_act_array_t act_list;
       mach_msg_type_number_t listcount;
@@ -716,39 +697,44 @@ GC_INNER void GC_start_world(void)
       if (kern_result != KERN_SUCCESS)
         ABORT("task_threads failed");
 
-      for (i = 0; i < (int)listcount; i++) {
-        thread_act_t thread = act_list[i];
-        int last_found = j;        /* The thread index found during the   */
-                                   /* previous iteration (count value     */
-                                   /* means no thread found yet).         */
+      j = (int)listcount;
+      for (i = 0; i < GC_mach_threads_count; i++) {
+        thread_act_t thread = GC_mach_threads[i].thread;
 
-        /* Search for the thread starting from the last found one first.  */
-        while (++j < GC_mach_threads_count) {
-          if (GC_mach_threads[j].thread == thread)
-            break;
-        }
-        if (j >= GC_mach_threads_count) {
-          /* If not found, search in the rest (beginning) of the list.    */
-          for (j = 0; j < last_found; j++) {
-            if (GC_mach_threads[j].thread == thread)
+        if (GC_mach_threads[i].suspended) {
+          int last_found = j;   /* The thread index found during the    */
+                                /* previous iteration (count value      */
+                                /* means no thread found yet).          */
+
+          /* Search for the thread starting from the last found one first. */
+          while (++j < (int)listcount) {
+            if (act_list[j] == thread)
               break;
           }
-        }
-
-        if (j != last_found) {
-          /* The thread is found in GC_mach_threads.      */
-          if (GC_mach_threads[j].already_suspended) {
-#           ifdef DEBUG_THREADS
-              GC_log_printf("Not resuming already suspended thread %p\n",
-                            (void *)(word)thread);
-#           endif
-          } else {
+          if (j >= (int)listcount) {
+            /* If not found, search in the rest (beginning) of the list. */
+            for (j = 0; j < last_found; j++) {
+              if (act_list[j] == thread)
+                break;
+            }
+          }
+          if (j != last_found) {
+            /* The thread is alive, resume it.  */
             GC_thread_resume(thread);
           }
+        } else {
+          /* This thread was failed to be suspended by GC_stop_world,   */
+          /* no action needed.                                          */
+#         ifdef DEBUG_THREADS
+            GC_log_printf("Not resuming thread %p as it is not suspended\n",
+                          (void *)(word)thread);
+#         endif
         }
-
         mach_port_deallocate(my_task, thread);
       }
+
+      for (i = 0; i < (int)listcount; i++)
+        mach_port_deallocate(my_task, act_list[i]);
       vm_deallocate(my_task, (vm_address_t)act_list,
                     sizeof(thread_t) * listcount);
 #   endif /* !GC_NO_THREADS_DISCOVERY */