From: Greg Ames Date: Wed, 4 May 2005 20:00:23 +0000 (+0000) Subject: worker and event mpms: transient thread creation errors shouldn't take down X-Git-Tag: 2.1.5~135 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5ceb57af0dfb82c8ccdb7702c4dd78ab8e4da49a;p=apache worker and event mpms: transient thread creation errors shouldn't take down the whole server. git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@168182 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/CHANGES b/CHANGES index 639549c399..c5af454b98 100644 --- a/CHANGES +++ b/CHANGES @@ -2,6 +2,9 @@ Changes with Apache 2.1.5 [Remove entries to the current 2.0 section below, when backported] + *) worker and event mpms: don't take down the whole server for a transient + thread creation failure. PR 34514 [Greg Ames] + *) ap_get_local_host() rewritten for APR. [Jim Jagielski] *) Add the ap_vhost_iterate_given_conn function to expose the information diff --git a/server/mpm/experimental/event/event.c b/server/mpm/experimental/event/event.c index 2dff15e21b..194cedd6bc 100644 --- a/server/mpm/experimental/event/event.c +++ b/server/mpm/experimental/event/event.c @@ -160,6 +160,7 @@ static int resource_shortage = 0; static fd_queue_t *worker_queue; static fd_queue_info_t *worker_queue_info; static int mpm_state = AP_MPMQ_STARTING; +static int sick_child_detected; apr_thread_mutex_t *timeout_mutex; APR_RING_HEAD(timeout_head_t, conn_state_t); @@ -1166,14 +1167,8 @@ static void create_listener_thread(thread_starter * ts) if (rv != APR_SUCCESS) { ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf, "apr_thread_create: unable to create listener thread"); - /* In case system resources are maxxed out, we don't want - * Apache running away with the CPU trying to fork over and - * over and over again if we exit. - * XXX Jeff doesn't see how Apache is going to try to fork again since - * the exit code is APEXIT_CHILDFATAL - */ - apr_sleep(apr_time_from_sec(10)); - clean_child_exit(APEXIT_CHILDFATAL); + /* let the parent decide how bad this really is */ + clean_child_exit(APEXIT_CHILDSICK); } apr_os_thread_get(&listener_os_thread, ts->listener); } @@ -1251,11 +1246,8 @@ static void *APR_THREAD_FUNC start_threads(apr_thread_t * thd, void *dummy) if (rv != APR_SUCCESS) { ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf, "apr_thread_create: unable to create worker thread"); - /* In case system resources are maxxed out, we don't want - Apache running away with the CPU trying to fork over and - over and over again if we exit. */ - apr_sleep(apr_time_from_sec(10)); - clean_child_exit(APEXIT_CHILDFATAL); + /* let the parent decide how bad this really is */ + clean_child_exit(APEXIT_CHILDSICK); } threads_created++; } @@ -1627,6 +1619,7 @@ static void perform_idle_server_maintenance(void) int free_slots[MAX_SPAWN_RATE]; int last_non_dead; int total_non_dead; + int active_thread_count = 0; /* initialize the free_list */ free_length = 0; @@ -1664,13 +1657,15 @@ static void perform_idle_server_maintenance(void) * So we hopefully won't need to fork more if we count it. * This depends on the ordering of SERVER_READY and SERVER_STARTING. */ - if (status <= SERVER_READY && status != SERVER_DEAD && - !ps->quiescing && ps->generation == ap_my_generation && - /* XXX the following shouldn't be necessary if we clean up - * properly after seg faults, but we're not yet GLA - */ - ps->pid != 0) { - ++idle_thread_count; + if (ps->pid != 0) { /* XXX just set all_dead_threads in outer + for loop if no pid? not much else matters */ + if (status <= SERVER_READY && status != SERVER_DEAD && + !ps->quiescing && ps->generation == ap_my_generation) { + ++idle_thread_count; + } + if (status >= SERVER_READY && status < SERVER_GRACEFUL) { + ++active_thread_count; + } } } if (any_dead_threads @@ -1702,6 +1697,28 @@ static void perform_idle_server_maintenance(void) ++total_non_dead; } } + + if (sick_child_detected) { + if (active_thread_count > 0) { + /* some child processes appear to be working. don't kill the + * whole server. + */ + sick_child_detected = 0; + } + else { + /* looks like a basket case. give up. + */ + shutdown_pending = 1; + child_fatal = 1; + ap_log_error(APLOG_MARK, APLOG_ALERT, 0, + ap_server_conf, + "No active workers found..." + " Apache is exiting!"); + /* the child already logged the failure details */ + return; + } + } + ap_max_daemons_limit = last_non_dead + 1; if (idle_thread_count > max_spare_threads) { @@ -1775,6 +1792,12 @@ static void server_main_loop(int remaining_children_to_start) child_fatal = 1; return; } + else if (processed_status == APEXIT_CHILDSICK) { + /* tell perform_idle_server_maintenance to check into this + * on the next timer pop + */ + sick_child_detected = 1; + } /* non-fatal death... note that it's gone in the scoreboard. */ child_slot = find_child_by_pid(&pid); if (child_slot >= 0) { diff --git a/server/mpm/worker/worker.c b/server/mpm/worker/worker.c index 0a550ac061..4eff2020e8 100644 --- a/server/mpm/worker/worker.c +++ b/server/mpm/worker/worker.c @@ -134,6 +134,7 @@ static int resource_shortage = 0; static fd_queue_t *worker_queue; static fd_queue_info_t *worker_queue_info; static int mpm_state = AP_MPMQ_STARTING; +static int sick_child_detected; /* The structure used to pass unique initialization info to each thread */ typedef struct { @@ -896,14 +897,8 @@ static void create_listener_thread(thread_starter *ts) if (rv != APR_SUCCESS) { ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf, "apr_thread_create: unable to create listener thread"); - /* In case system resources are maxxed out, we don't want - * Apache running away with the CPU trying to fork over and - * over and over again if we exit. - * XXX Jeff doesn't see how Apache is going to try to fork again since - * the exit code is APEXIT_CHILDFATAL - */ - apr_sleep(apr_time_from_sec(10)); - clean_child_exit(APEXIT_CHILDFATAL); + /* let the parent decide how bad this really is */ + clean_child_exit(APEXIT_CHILDSICK); } apr_os_thread_get(&listener_os_thread, ts->listener); } @@ -980,11 +975,8 @@ static void * APR_THREAD_FUNC start_threads(apr_thread_t *thd, void *dummy) if (rv != APR_SUCCESS) { ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf, "apr_thread_create: unable to create worker thread"); - /* In case system resources are maxxed out, we don't want - Apache running away with the CPU trying to fork over and - over and over again if we exit. */ - apr_sleep(apr_time_from_sec(10)); - clean_child_exit(APEXIT_CHILDFATAL); + /* let the parent decide how bad this really is */ + clean_child_exit(APEXIT_CHILDSICK); } threads_created++; } @@ -1373,6 +1365,7 @@ static void perform_idle_server_maintenance(void) int free_slots[MAX_SPAWN_RATE]; int last_non_dead; int total_non_dead; + int active_thread_count = 0; /* initialize the free_list */ free_length = 0; @@ -1410,14 +1403,16 @@ static void perform_idle_server_maintenance(void) * So we hopefully won't need to fork more if we count it. * This depends on the ordering of SERVER_READY and SERVER_STARTING. */ - if (status <= SERVER_READY && status != SERVER_DEAD && - !ps->quiescing && - ps->generation == ap_my_generation && - /* XXX the following shouldn't be necessary if we clean up - * properly after seg faults, but we're not yet GLA - */ - ps->pid != 0) { - ++idle_thread_count; + if (ps->pid != 0) { /* XXX just set all_dead_threads in outer for + loop if no pid? not much else matters */ + if (status <= SERVER_READY && status != SERVER_DEAD && + !ps->quiescing && + ps->generation == ap_my_generation) { + ++idle_thread_count; + } + if (status >= SERVER_READY && status < SERVER_GRACEFUL) { + ++active_thread_count; + } } } if (any_dead_threads && totally_free_length < idle_spawn_rate @@ -1448,6 +1443,28 @@ static void perform_idle_server_maintenance(void) ++total_non_dead; } } + + if (sick_child_detected) { + if (active_thread_count > 0) { + /* some child processes appear to be working. don't kill the + * whole server. + */ + sick_child_detected = 0; + } + else { + /* looks like a basket case. give up. + */ + shutdown_pending = 1; + child_fatal = 1; + ap_log_error(APLOG_MARK, APLOG_ALERT, 0, + ap_server_conf, + "No active workers found..." + " Apache is exiting!"); + /* the child already logged the failure details */ + return; + } + } + ap_max_daemons_limit = last_non_dead + 1; if (idle_thread_count > max_spare_threads) { @@ -1521,6 +1538,12 @@ static void server_main_loop(int remaining_children_to_start) child_fatal = 1; return; } + else if (processed_status == APEXIT_CHILDSICK) { + /* tell perform_idle_server_maintenance to check into this + * on the next timer pop + */ + sick_child_detected = 1; + } /* non-fatal death... note that it's gone in the scoreboard. */ child_slot = find_child_by_pid(&pid); if (child_slot >= 0) {