From: Jim Jagielski Date: Fri, 1 Mar 2013 15:23:58 +0000 (+0000) Subject: sync w/ current event bug-fixes X-Git-Tag: 2.5.0-alpha~5724 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=fdee075e677ecc2e62167d32245e233a28d3edc9;p=apache sync w/ current event bug-fixes git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@1451614 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/server/mpm/eventopt/eventopt.c b/server/mpm/eventopt/eventopt.c index cebf005a47..4c104b5d8e 100644 --- a/server/mpm/eventopt/eventopt.c +++ b/server/mpm/eventopt/eventopt.c @@ -169,13 +169,18 @@ static int ap_daemons_limit = 0; static int max_workers = 0; static int server_limit = 0; static int thread_limit = 0; +static int had_healthy_child = 0; static int dying = 0; static int workers_may_exit = 0; static int start_thread_may_exit = 0; static int listener_may_exit = 0; -static int conns_this_child; static int num_listensocks = 0; -static apr_uint32_t connection_count = 0; +static apr_int32_t conns_this_child; /* MaxConnectionsPerChild, only access + in listener thread */ +static apr_uint32_t connection_count = 0; /* Number of open connections */ +static apr_uint32_t lingering_count = 0; /* Number of connections in lingering close */ +static apr_uint32_t suspended_count = 0; /* Number of suspended connections */ +static apr_uint32_t clogged_count = 0; /* Number of threads processing ssl conns */ static int resource_shortage = 0; static fd_queue_t *worker_queue; static fd_queue_info_t *worker_queue_info; @@ -400,8 +405,12 @@ static void enable_listensocks(int process_slot) int i; ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00457) "Accepting new connections again: " - "%u active conns, %u idle workers", + "%u active conns (%u lingering/%u clogged/%u suspended), " + "%u idle workers", apr_atomic_read32(&connection_count), + apr_atomic_read32(&lingering_count), + apr_atomic_read32(&clogged_count), + apr_atomic_read32(&suspended_count), ap_queue_info_get_idlers(worker_queue_info)); for (i = 0; i < num_listensocks; i++) apr_pollset_add(event_pollset, &listener_pollfd[i]); @@ -618,7 +627,20 @@ static int child_fatal; static int volatile shutdown_pending; static int volatile restart_pending; -static apr_status_t decrement_connection_count(void *dummy) { +static apr_status_t decrement_connection_count(void *cs_) +{ + event_conn_state_t *cs = cs_; + switch (cs->pub.state) { + case CONN_STATE_LINGER_NORMAL: + case CONN_STATE_LINGER_SHORT: + apr_atomic_dec32(&lingering_count); + break; + case CONN_STATE_SUSPENDED: + apr_atomic_dec32(&suspended_count); + break; + default: + break; + } apr_atomic_dec32(&connection_count); return APR_SUCCESS; } @@ -766,7 +788,7 @@ static void set_signals(void) #endif } -static void process_pollop(pollset_op_t *op) +static int process_pollop(pollset_op_t *op) { apr_status_t rv; event_conn_state_t *cs = op->cs; @@ -791,7 +813,12 @@ static void process_pollop(pollset_op_t *op) if (rv != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rv)) { ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(00467) "%s: apr_pollset_add failure", op->tag); + apr_socket_close(cs->pfd.desc.s); + apr_pool_clear(cs->p); + ap_push_pool(worker_queue_info, cs->p); + return 0; } + return 1; } /* @@ -807,63 +834,101 @@ static void process_pollop(pollset_op_t *op) static int start_lingering_close(event_conn_state_t *cs, ap_equeue_t *eq) { apr_status_t rv; + int ret = 1; - if (ap_start_lingering_close(cs->c)) { - apr_pool_clear(cs->p); - ap_push_pool(worker_queue_info, cs->p); - return 0; + apr_socket_t *csd = ap_get_conn_socket(cs->c); + pollset_op_t localv; + pollset_op_t *v; + + if (eq) { + v = ap_equeue_writer_value(eq); } else { - apr_socket_t *csd = ap_get_conn_socket(cs->c); - pollset_op_t localv; - pollset_op_t *v; - - if (eq) { - v = ap_equeue_writer_value(eq); - } - else { - v = &localv; - } + v = &localv; + } #ifdef AP_DEBUG - { - rv = apr_socket_timeout_set(csd, 0); - AP_DEBUG_ASSERT(rv == APR_SUCCESS); - } + { + rv = apr_socket_timeout_set(csd, 0); + AP_DEBUG_ASSERT(rv == APR_SUCCESS); + } #else - apr_socket_timeout_set(csd, 0); + apr_socket_timeout_set(csd, 0); #endif - /* - * If some module requested a shortened waiting period, only wait for - * 2s (SECONDS_TO_LINGER). This is useful for mitigating certain - * DoS attacks. - */ - if (apr_table_get(cs->c->notes, "short-lingering-close")) { - cs->expiration_time = - apr_time_now() + apr_time_from_sec(SECONDS_TO_LINGER); - v->timeout_type = TIMEOUT_SHORT_LINGER; - v->tag = "start_lingering_close(short)"; - cs->pub.state = CONN_STATE_LINGER_SHORT; - } - else { - cs->expiration_time = - apr_time_now() + apr_time_from_sec(MAX_SECS_TO_LINGER); - v->timeout_type = TIMEOUT_LINGER; - v->tag = "start_lingering_close(normal)"; - cs->pub.state = CONN_STATE_LINGER_NORMAL; - } + /* + * If some module requested a shortened waiting period, only wait for + * 2s (SECONDS_TO_LINGER). This is useful for mitigating certain + * DoS attacks. + */ + if (apr_table_get(cs->c->notes, "short-lingering-close")) { + cs->expiration_time = + apr_time_now() + apr_time_from_sec(SECONDS_TO_LINGER); + v->timeout_type = TIMEOUT_SHORT_LINGER; + v->tag = "start_lingering_close(short)"; + cs->pub.state = CONN_STATE_LINGER_SHORT; + } + else { + cs->expiration_time = + apr_time_now() + apr_time_from_sec(MAX_SECS_TO_LINGER); + v->timeout_type = TIMEOUT_LINGER; + v->tag = "start_lingering_close(normal)"; + cs->pub.state = CONN_STATE_LINGER_NORMAL; + } + apr_atomic_inc32(&lingering_count); + cs->pfd.reqevents = APR_POLLIN | APR_POLLHUP | APR_POLLERR; + v->cs = cs; + if (eq != NULL) { + ap_equeue_writer_onward(eq); + apr_pollset_wakeup(event_pollset); + } + else { + ret = process_pollop(v); + } - cs->pfd.reqevents = APR_POLLIN | APR_POLLHUP | APR_POLLERR; - v->cs = cs; - if (eq != NULL) { - ap_equeue_writer_onward(eq); - apr_pollset_wakeup(event_pollset); - } - else { - process_pollop(v); - } + return ret; +} + +/* + * Close our side of the connection, flushing data to the client first. + * Pre-condition: cs is not in any timeout queue and not in the pollset, + * timeout_mutex is not locked + * return: 0 if connection is fully closed, + * 1 if connection is lingering + * May only be called by worker thread. + */ +static int start_lingering_close_blocking(event_conn_state_t *cs, ap_equeue_t *eq) +{ + if (ap_start_lingering_close(cs->c)) { + apr_pool_clear(cs->p); + ap_push_pool(worker_queue_info, cs->p); + return 0; } - return 1; + return start_lingering_close(cs, eq); +} + +/* + * Close our side of the connection, NOT flushing data to the client. + * This should only be called if there has been an error or if we know + * that our send buffers are empty. + * Pre-condition: cs is not in any timeout queue and not in the pollset, + * timeout_mutex is not locked + * return: 0 if connection is fully closed, + * 1 if connection is lingering + * may be called by listener thread + */ +static int start_lingering_close_nonblocking(event_conn_state_t *cs, ap_equeue_t *eq) +{ + conn_rec *c = cs->c; + apr_socket_t *csd = cs->pfd.desc.s; + + if (c->aborted + || apr_socket_shutdown(csd, APR_SHUTDOWN_WRITE) != APR_SUCCESS) { + apr_socket_close(csd); + apr_pool_clear(cs->p); + ap_push_pool(worker_queue_info, cs->p); + return 0; + } + return start_lingering_close(cs, eq); } /* @@ -890,10 +955,8 @@ static int stop_lingering_close(event_conn_state_t *cs, ap_equeue_t *eq) /* * process one connection in the worker - * return: 1 if the connection has been completed, - * 0 if it is still open and waiting for some event */ -static int process_socket(apr_thread_t *thd, apr_pool_t * p, apr_socket_t * sock, +static void process_socket(apr_thread_t *thd, apr_pool_t * p, apr_socket_t * sock, event_conn_state_t * cs, ap_equeue_t *eq, int my_child_num, @@ -916,10 +979,11 @@ static int process_socket(apr_thread_t *thd, apr_pool_t * p, apr_socket_t * sock apr_bucket_alloc_destroy(cs->bucket_alloc); apr_pool_clear(p); ap_push_pool(worker_queue_info, p); - return 1; + return; } apr_atomic_inc32(&connection_count); - apr_pool_cleanup_register(c->pool, NULL, decrement_connection_count, apr_pool_cleanup_null); + apr_pool_cleanup_register(c->pool, cs, decrement_connection_count, + apr_pool_cleanup_null); c->current_thread = thd; cs->c = c; c->cs = &(cs->pub); @@ -967,10 +1031,12 @@ static int process_socket(apr_thread_t *thd, apr_pool_t * p, apr_socket_t * sock * like mod_ssl, lets just do the normal read from input filters, * like the Worker MPM does. */ + apr_atomic_inc32(&clogged_count); ap_run_process_connection(c); if (cs->pub.state != CONN_STATE_SUSPENDED) { cs->pub.state = CONN_STATE_LINGER; } + apr_atomic_dec32(&clogged_count); } read_request: @@ -1017,7 +1083,7 @@ read_request: ap_equeue_writer_onward(eq); apr_pollset_wakeup(event_pollset); - return 1; + return; } else if (c->keepalive != AP_CONN_KEEPALIVE || c->aborted || listener_may_exit) { @@ -1033,8 +1099,8 @@ read_request: } if (cs->pub.state == CONN_STATE_LINGER) { - if (!start_lingering_close(cs, eq)) { - return 0; + if (!start_lingering_close_blocking(cs, eq)) { + return; } } else if (cs->pub.state == CONN_STATE_CHECK_REQUEST_LINE_READABLE) { @@ -1060,7 +1126,9 @@ read_request: ap_equeue_writer_onward(eq); apr_pollset_wakeup(event_pollset); } - + else if (cs->pub.state == CONN_STATE_SUSPENDED) { + apr_atomic_inc32(&suspended_count); + } /* * Prevent this connection from writing to our connection state after it * is no longer associated with this thread. This would happen if the EOR @@ -1068,8 +1136,7 @@ read_request: * or timeout. */ c->sbh = NULL; - - return 1; + return; } /* conns_this_child has gone to zero or below. See if the admin coded @@ -1078,10 +1145,13 @@ read_request: static void check_infinite_requests(void) { if (ap_max_requests_per_child) { + ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf, + "Stopping process due to MaxConnectionsPerChild"); signal_threads(ST_GRACEFUL); } else { - conns_this_child = INT_MAX; /* keep going */ + /* keep going */ + conns_this_child = APR_INT32_MAX; } } @@ -1462,9 +1532,8 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy) break; } - if (conns_this_child <= 0) { + if (conns_this_child <= 0) check_infinite_requests(); - } now = apr_time_now(); if (APLOGtrace6(ap_server_conf)) { @@ -1472,11 +1541,14 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy) if (now - last_log > apr_time_from_msec(1000)) { last_log = now; ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf, - "connections: %d (write-completion: %d " - "keep-alive: %d lingering: %d)", - connection_count, write_completion_q.count, + "connections: %u (clogged: %u write-completion: %d " + "keep-alive: %d lingering: %d suspended: %u)", + apr_atomic_read32(&connection_count), + apr_atomic_read32(&clogged_count), + write_completion_q.count, keepalive_q.count, - linger_q.count + short_linger_q.count); + apr_atomic_read32(&lingering_count), + apr_atomic_read32(&suspended_count)); } } @@ -1539,7 +1611,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy) /* one of the sockets is readable */ struct timeout_queue *remove_from_q = &write_completion_q; int blocking = 1; - cs = (event_conn_state_t *)pt->baton; + cs = (event_conn_state_t *) pt->baton; switch (cs->pub.state) { case CONN_STATE_CHECK_REQUEST_LINE_READABLE: cs->pub.state = CONN_STATE_READ_REQUEST_LINE; @@ -1562,7 +1634,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy) if (rc != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rc)) { ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf, "pollset remove failed"); - start_lingering_close(cs, NULL); + start_lingering_close_nonblocking(cs, NULL); break; } @@ -1572,7 +1644,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy) * re-connect to a different process. */ if (!have_idle_worker) { - start_lingering_close(cs, NULL); + start_lingering_close_nonblocking(cs, NULL); break; } rc = push2worker(out_pfd, event_pollset); @@ -1663,6 +1735,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy) } if (csd != NULL) { + conns_this_child--; rc = ap_queue_push(worker_queue, csd, NULL, ptrans); if (rc != APR_SUCCESS) { /* trash the connection; we couldn't queue the connected @@ -1735,14 +1808,15 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy) timeout_time + ap_server_conf->keep_alive_timeout + apr_time_from_sec(2), - start_lingering_close); + start_lingering_close_nonblocking); } else { process_timeout_queue(&keepalive_q, timeout_time, - start_lingering_close); + start_lingering_close_nonblocking); } /* Step 2: write completion timeouts */ - process_timeout_queue(&write_completion_q, timeout_time, start_lingering_close); + process_timeout_queue(&write_completion_q, timeout_time, + start_lingering_close_nonblocking); /* Step 3: (normal) lingering close completion timeouts */ process_timeout_queue(&linger_q, timeout_time, stop_lingering_close); /* Step 4: (short) lingering close completion timeouts */ @@ -1750,11 +1824,11 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy) ps = ap_get_scoreboard_process(process_slot); ps->write_completion = write_completion_q.count; - ps->lingering_close = linger_q.count + short_linger_q.count; ps->keep_alive = keepalive_q.count; ps->connections = apr_atomic_read32(&connection_count); - /* XXX: should count CONN_STATE_SUSPENDED and set ps->suspended */ + ps->suspended = apr_atomic_read32(&suspended_count); + ps->lingering_close = apr_atomic_read32(&lingering_count); } if (listeners_disabled && !workers_were_busy && (int)apr_atomic_read32(&connection_count) < @@ -1868,10 +1942,7 @@ static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy) else { is_idle = 0; worker_sockets[thread_slot] = csd; - rv = process_socket(thd, ptrans, csd, cs, eq, process_slot, thread_slot); - if (!rv) { - conns_this_child--; - } + process_socket(thd, ptrans, csd, cs, eq, process_slot, thread_slot); worker_sockets[thread_slot] = NULL; } } @@ -1995,7 +2066,6 @@ static void *APR_THREAD_FUNC start_threads(apr_thread_t * thd, void *dummy) ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO() "start_threads: Using %s", apr_pollset_method_name(event_pollset)); - worker_sockets = apr_pcalloc(pchild, threads_per_child * sizeof(apr_socket_t *)); @@ -2211,7 +2281,7 @@ static void child_main(int child_num_arg) } else { /* coding a value of zero means infinity */ - conns_this_child = INT_MAX; + conns_this_child = APR_INT32_MAX; } /* Setup worker threads */ @@ -2436,6 +2506,7 @@ static void perform_idle_server_maintenance(void) int any_dying_threads = 0; int any_dead_threads = 0; int all_dead_threads = 1; + int child_threads_active = 0; if (i >= retained->max_daemons_limit && totally_free_length == retained->idle_spawn_rate) @@ -2470,10 +2541,11 @@ static void perform_idle_server_maintenance(void) ++idle_thread_count; } if (status >= SERVER_READY && status < SERVER_GRACEFUL) { - ++active_thread_count; + ++child_threads_active; } } } + active_thread_count += child_threads_active; if (any_dead_threads && totally_free_length < retained->idle_spawn_rate && free_length < MAX_SPAWN_RATE @@ -2497,6 +2569,9 @@ static void perform_idle_server_maintenance(void) } ++free_length; } + else if (child_threads_active == threads_per_child) { + had_healthy_child = 1; + } /* XXX if (!ps->quiescing) is probably more reliable GLA */ if (!any_dying_threads) { last_non_dead = i; @@ -2505,14 +2580,15 @@ static void perform_idle_server_maintenance(void) } if (retained->sick_child_detected) { - if (active_thread_count > 0) { - /* some child processes appear to be working. don't kill the - * whole server. + if (had_healthy_child) { + /* Assume this is a transient error, even though it may not be. Leave + * the server up in case it is able to serve some requests or the + * problem will be resolved. */ retained->sick_child_detected = 0; } else { - /* looks like a basket case. give up. + /* looks like a basket case, as no child ever fully initialized; give up. */ shutdown_pending = 1; child_fatal = 1; @@ -2652,6 +2728,11 @@ static void server_main_loop(int remaining_children_to_start) event_note_child_killed(-1, /* already out of the scoreboard */ pid.pid, old_gen); + if (processed_status == APEXIT_CHILDSICK + && old_gen == retained->my_generation) { + /* resource shortage, minimize the fork rate */ + retained->idle_spawn_rate = 1; + } #if APR_HAS_OTHER_CHILD } else if (apr_proc_other_child_alert(&pid, APR_OC_REASON_DEATH, @@ -2979,6 +3060,7 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog, ap_daemons_limit = server_limit; threads_per_child = DEFAULT_THREADS_PER_CHILD; max_workers = ap_daemons_limit * threads_per_child; + had_healthy_child = 0; ap_extended_status = 0; return OK;