]> granicus.if.org Git - apache/commitdiff
worker and event mpms: transient thread creation errors shouldn't take down
authorGreg Ames <gregames@apache.org>
Wed, 4 May 2005 20:00:23 +0000 (20:00 +0000)
committerGreg Ames <gregames@apache.org>
Wed, 4 May 2005 20:00:23 +0000 (20:00 +0000)
the whole server.

git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@168182 13f79535-47bb-0310-9956-ffa450edef68

CHANGES
server/mpm/experimental/event/event.c
server/mpm/worker/worker.c

diff --git a/CHANGES b/CHANGES
index 639549c39934e03e7d9d18c81d078d8c8b850f3d..c5af454b98103c590f2689192770f0aebd30a708 100644 (file)
--- a/CHANGES
+++ b/CHANGES
@@ -2,6 +2,9 @@ Changes with Apache 2.1.5
 
   [Remove entries to the current 2.0 section below, when backported]
 
+  *) worker and event mpms: don't take down the whole server for a transient
+     thread creation failure.  PR 34514 [Greg Ames]
+
   *) ap_get_local_host() rewritten for APR. [Jim Jagielski]
 
   *) Add the ap_vhost_iterate_given_conn function to expose the information
index 2dff15e21b45fb363152174aebbfdf90a282a788..194cedd6bc8906ea864c1f4d838ff413d8a4f8db 100644 (file)
@@ -160,6 +160,7 @@ static int resource_shortage = 0;
 static fd_queue_t *worker_queue;
 static fd_queue_info_t *worker_queue_info;
 static int mpm_state = AP_MPMQ_STARTING;
+static int sick_child_detected;
 
 apr_thread_mutex_t *timeout_mutex;
 APR_RING_HEAD(timeout_head_t, conn_state_t);
@@ -1166,14 +1167,8 @@ static void create_listener_thread(thread_starter * ts)
     if (rv != APR_SUCCESS) {
         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
                      "apr_thread_create: unable to create listener thread");
-        /* In case system resources are maxxed out, we don't want
-         * Apache running away with the CPU trying to fork over and
-         * over and over again if we exit.
-         * XXX Jeff doesn't see how Apache is going to try to fork again since
-         * the exit code is APEXIT_CHILDFATAL
-         */
-        apr_sleep(apr_time_from_sec(10));
-        clean_child_exit(APEXIT_CHILDFATAL);
+        /* let the parent decide how bad this really is */
+        clean_child_exit(APEXIT_CHILDSICK);
     }
     apr_os_thread_get(&listener_os_thread, ts->listener);
 }
@@ -1251,11 +1246,8 @@ static void *APR_THREAD_FUNC start_threads(apr_thread_t * thd, void *dummy)
             if (rv != APR_SUCCESS) {
                 ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
                              "apr_thread_create: unable to create worker thread");
-                /* In case system resources are maxxed out, we don't want
-                   Apache running away with the CPU trying to fork over and
-                   over and over again if we exit. */
-                apr_sleep(apr_time_from_sec(10));
-                clean_child_exit(APEXIT_CHILDFATAL);
+                /* let the parent decide how bad this really is */
+                clean_child_exit(APEXIT_CHILDSICK);
             }
             threads_created++;
         }
@@ -1627,6 +1619,7 @@ static void perform_idle_server_maintenance(void)
     int free_slots[MAX_SPAWN_RATE];
     int last_non_dead;
     int total_non_dead;
+    int active_thread_count = 0;
 
     /* initialize the free_list */
     free_length = 0;
@@ -1664,13 +1657,15 @@ static void perform_idle_server_maintenance(void)
              * So we hopefully won't need to fork more if we count it.
              * This depends on the ordering of SERVER_READY and SERVER_STARTING.
              */
-            if (status <= SERVER_READY && status != SERVER_DEAD &&
-                !ps->quiescing && ps->generation == ap_my_generation &&
-                /* XXX the following shouldn't be necessary if we clean up 
-                 *     properly after seg faults, but we're not yet    GLA 
-                 */
-                ps->pid != 0) {
-                ++idle_thread_count;
+            if (ps->pid != 0) { /* XXX just set all_dead_threads in outer
+                                   for loop if no pid?  not much else matters */
+                if (status <= SERVER_READY && status != SERVER_DEAD &&
+                        !ps->quiescing && ps->generation == ap_my_generation) {
+                    ++idle_thread_count;
+                }
+                if (status >= SERVER_READY && status < SERVER_GRACEFUL) {
+                    ++active_thread_count;
+                }
             }
         }
         if (any_dead_threads 
@@ -1702,6 +1697,28 @@ static void perform_idle_server_maintenance(void)
             ++total_non_dead;
         }
     }
+
+    if (sick_child_detected) {
+        if (active_thread_count > 0) {
+            /* some child processes appear to be working.  don't kill the
+             * whole server.
+             */
+            sick_child_detected = 0;
+        }
+        else {
+            /* looks like a basket case.  give up.  
+             */
+            shutdown_pending = 1;
+            child_fatal = 1;
+            ap_log_error(APLOG_MARK, APLOG_ALERT, 0,
+                         ap_server_conf,
+                         "No active workers found..."
+                         " Apache is exiting!");
+            /* the child already logged the failure details */
+            return;
+        }
+    }
+                                                    
     ap_max_daemons_limit = last_non_dead + 1;
 
     if (idle_thread_count > max_spare_threads) {
@@ -1775,6 +1792,12 @@ static void server_main_loop(int remaining_children_to_start)
                 child_fatal = 1;
                 return;
             }
+            else if (processed_status == APEXIT_CHILDSICK) {
+                /* tell perform_idle_server_maintenance to check into this
+                 * on the next timer pop
+                 */
+                sick_child_detected = 1;
+            }
             /* non-fatal death... note that it's gone in the scoreboard. */
             child_slot = find_child_by_pid(&pid);
             if (child_slot >= 0) {
index 0a550ac06131672ba201de84302a629cba4a43cd..4eff2020e80717d62751d002eea611ab754a003b 100644 (file)
@@ -134,6 +134,7 @@ static int resource_shortage = 0;
 static fd_queue_t *worker_queue;
 static fd_queue_info_t *worker_queue_info;
 static int mpm_state = AP_MPMQ_STARTING;
+static int sick_child_detected;
 
 /* The structure used to pass unique initialization info to each thread */
 typedef struct {
@@ -896,14 +897,8 @@ static void create_listener_thread(thread_starter *ts)
     if (rv != APR_SUCCESS) {
         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
                      "apr_thread_create: unable to create listener thread");
-        /* In case system resources are maxxed out, we don't want
-         * Apache running away with the CPU trying to fork over and
-         * over and over again if we exit.
-         * XXX Jeff doesn't see how Apache is going to try to fork again since
-         * the exit code is APEXIT_CHILDFATAL
-         */
-        apr_sleep(apr_time_from_sec(10));
-        clean_child_exit(APEXIT_CHILDFATAL);
+        /* let the parent decide how bad this really is */
+        clean_child_exit(APEXIT_CHILDSICK);
     }
     apr_os_thread_get(&listener_os_thread, ts->listener);
 }
@@ -980,11 +975,8 @@ static void * APR_THREAD_FUNC start_threads(apr_thread_t *thd, void *dummy)
             if (rv != APR_SUCCESS) {
                 ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
                     "apr_thread_create: unable to create worker thread");
-                /* In case system resources are maxxed out, we don't want
-                   Apache running away with the CPU trying to fork over and
-                   over and over again if we exit. */
-                apr_sleep(apr_time_from_sec(10));
-                clean_child_exit(APEXIT_CHILDFATAL);
+                /* let the parent decide how bad this really is */
+                clean_child_exit(APEXIT_CHILDSICK);
             }
             threads_created++;
         }
@@ -1373,6 +1365,7 @@ static void perform_idle_server_maintenance(void)
     int free_slots[MAX_SPAWN_RATE];
     int last_non_dead;
     int total_non_dead;
+    int active_thread_count = 0;
 
     /* initialize the free_list */
     free_length = 0;
@@ -1410,14 +1403,16 @@ static void perform_idle_server_maintenance(void)
              * So we hopefully won't need to fork more if we count it.
              * This depends on the ordering of SERVER_READY and SERVER_STARTING.
              */
-            if (status <= SERVER_READY && status != SERVER_DEAD &&
-                    !ps->quiescing &&
-                    ps->generation == ap_my_generation &&
-                 /* XXX the following shouldn't be necessary if we clean up 
-                  *     properly after seg faults, but we're not yet    GLA 
-                  */     
-                    ps->pid != 0) {
-                ++idle_thread_count;
+            if (ps->pid != 0) { /* XXX just set all_dead_threads in outer for
+                                   loop if no pid?  not much else matters */
+                if (status <= SERVER_READY && status != SERVER_DEAD &&
+                        !ps->quiescing &&
+                        ps->generation == ap_my_generation) {
+                    ++idle_thread_count;
+                }
+                if (status >= SERVER_READY && status < SERVER_GRACEFUL) {
+                    ++active_thread_count;
+                }
             }
         }
         if (any_dead_threads && totally_free_length < idle_spawn_rate
@@ -1448,6 +1443,28 @@ static void perform_idle_server_maintenance(void)
             ++total_non_dead;
         }
     }
+
+    if (sick_child_detected) {
+        if (active_thread_count > 0) {
+            /* some child processes appear to be working.  don't kill the
+             * whole server.
+             */
+            sick_child_detected = 0;
+        }
+        else {
+            /* looks like a basket case.  give up.  
+             */
+            shutdown_pending = 1;
+            child_fatal = 1;
+            ap_log_error(APLOG_MARK, APLOG_ALERT, 0,
+                         ap_server_conf,
+                         "No active workers found..."
+                         " Apache is exiting!");
+            /* the child already logged the failure details */
+            return;
+        }
+    }
+                                                    
     ap_max_daemons_limit = last_non_dead + 1;
 
     if (idle_thread_count > max_spare_threads) {
@@ -1521,6 +1538,12 @@ static void server_main_loop(int remaining_children_to_start)
                 child_fatal = 1;
                 return;
             }
+            else if (processed_status == APEXIT_CHILDSICK) {
+                /* tell perform_idle_server_maintenance to check into this
+                 * on the next timer pop
+                 */
+                sick_child_detected = 1;
+            }
             /* non-fatal death... note that it's gone in the scoreboard. */
             child_slot = find_child_by_pid(&pid);
             if (child_slot >= 0) {