1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * This MPM tries to fix the 'keep alive problem' in HTTP.
20 * After a client completes the first request, the client can keep the
21 * connection open to send more requests with the same socket. This can save
22 * significant overhead in creating TCP connections. However, the major
23 * disadvantage is that Apache traditionally keeps an entire child
24 * process/thread waiting for data from the client. To solve this problem,
25 * this MPM has a dedicated thread for handling both the Listening sockets,
26 * and all sockets that are in a Keep Alive status.
28 * The MPM assumes the underlying apr_pollset implementation is somewhat
29 * threadsafe. This currently is only compatible with KQueue and EPoll. This
30 * enables the MPM to avoid extra high level locking or having to wake up the
31 * listener thread when a keep-alive socket needs to be sent to it.
33 * This MPM does not perform well on older platforms that do not have very good
34 * threading, like Linux with a 2.4 kernel, but this does not matter, since we
35 * require EPoll or KQueue.
37 * For FreeBSD, use 5.3. It is possible to run this MPM on FreeBSD 5.2.1, if
38 * you use libkse (see `man libmap.conf`).
40 * For NetBSD, use at least 2.0.
42 * For Linux, you should use a 2.6 kernel, and make sure your glibc has epoll
43 * support compiled in.
48 #include "apr_portable.h"
49 #include "apr_strings.h"
50 #include "apr_file_io.h"
51 #include "apr_thread_proc.h"
52 #include "apr_signal.h"
53 #include "apr_thread_mutex.h"
56 #include "apr_queue.h"
57 #include "apr_atomic.h"
58 #define APR_WANT_STRFUNC
60 #include "apr_version.h"
67 #if APR_HAVE_SYS_SOCKET_H
68 #include <sys/socket.h>
70 #if APR_HAVE_SYS_WAIT_H
73 #ifdef HAVE_SYS_PROCESSOR_H
74 #include <sys/processor.h> /* for bindprocessor() */
78 #error The Event MPM requires APR threads, but they are unavailable.
81 #include "ap_config.h"
83 #include "http_main.h"
85 #include "http_config.h" /* for read_config */
86 #include "http_core.h" /* for get_remote_host */
87 #include "http_connection.h"
88 #include "http_protocol.h"
90 #include "mpm_common.h"
91 #include "ap_listen.h"
92 #include "scoreboard.h"
94 #include "mpm_default.h"
95 #include "http_vhost.h"
97 #include "apr_skiplist.h"
100 #include <limits.h> /* for INT_MAX */
103 /* Limit on the total --- clients will be locked out if more servers than
104 * this are needed. It is intended solely to keep the server from crashing
105 * when things get out of hand.
107 * We keep a hard maximum number of servers, for two reasons --- first off,
108 * in case something goes seriously wrong, we want to stop the fork bomb
109 * short of actually crashing the machine we're running on by filling some
110 * kernel table. Secondly, it keeps the size of the scoreboard file small
111 * enough that we can read the whole thing without worrying too much about
114 #ifndef DEFAULT_SERVER_LIMIT
115 #define DEFAULT_SERVER_LIMIT 16
118 /* Admin can't tune ServerLimit beyond MAX_SERVER_LIMIT. We want
119 * some sort of compile-time limit to help catch typos.
121 #ifndef MAX_SERVER_LIMIT
122 #define MAX_SERVER_LIMIT 20000
125 /* Limit on the threads per process. Clients will be locked out if more than
128 * We keep this for one reason it keeps the size of the scoreboard file small
129 * enough that we can read the whole thing without worrying too much about
132 #ifndef DEFAULT_THREAD_LIMIT
133 #define DEFAULT_THREAD_LIMIT 64
136 /* Admin can't tune ThreadLimit beyond MAX_THREAD_LIMIT. We want
137 * some sort of compile-time limit to help catch typos.
139 #ifndef MAX_THREAD_LIMIT
140 #define MAX_THREAD_LIMIT 100000
143 #define MPM_CHILD_PID(i) (ap_scoreboard_image->parent[i].pid)
145 #if !APR_VERSION_AT_LEAST(1,4,0)
146 #define apr_time_from_msec(x) (x * 1000)
149 #ifndef MAX_SECS_TO_LINGER
150 #define MAX_SECS_TO_LINGER 30
152 #define SECONDS_TO_LINGER 2
155 * Actual definitions of config globals
158 #ifndef DEFAULT_WORKER_FACTOR
159 #define DEFAULT_WORKER_FACTOR 2
161 #define WORKER_FACTOR_SCALE 16 /* scale factor to allow fractional values */
162 static unsigned int worker_factor = DEFAULT_WORKER_FACTOR * WORKER_FACTOR_SCALE;
163 /* AsyncRequestWorkerFactor * 16 */
165 static int threads_per_child = 0; /* ThreadsPerChild */
166 static int ap_daemons_to_start = 0; /* StartServers */
167 static int min_spare_threads = 0; /* MinSpareThreads */
168 static int max_spare_threads = 0; /* MaxSpareThreads */
169 static int active_daemons_limit = 0; /* MaxRequestWorkers / ThreadsPerChild */
170 static int active_daemons = 0; /* workers that still active, i.e. are
171 not shutting down gracefully */
172 static int max_workers = 0; /* MaxRequestWorkers */
173 static int server_limit = 0; /* ServerLimit */
174 static int thread_limit = 0; /* ThreadLimit */
175 static int had_healthy_child = 0;
176 static int dying = 0;
177 static int workers_may_exit = 0;
178 static int start_thread_may_exit = 0;
179 static int listener_may_exit = 0;
180 static int num_listensocks = 0;
181 static apr_int32_t conns_this_child; /* MaxConnectionsPerChild, only access
182 in listener thread */
183 static apr_uint32_t connection_count = 0; /* Number of open connections */
184 static apr_uint32_t lingering_count = 0; /* Number of connections in lingering close */
185 static apr_uint32_t suspended_count = 0; /* Number of suspended connections */
186 static apr_uint32_t clogged_count = 0; /* Number of threads processing ssl conns */
187 static apr_uint32_t threads_shutdown = 0; /* Number of threads that have shutdown
188 early during graceful termination */
189 static int resource_shortage = 0;
190 static fd_queue_t *worker_queue;
191 static fd_queue_info_t *worker_queue_info;
193 static apr_thread_mutex_t *timeout_mutex;
195 module AP_MODULE_DECLARE_DATA mpm_event_module;
197 /* forward declare */
198 struct event_srv_cfg_s;
199 typedef struct event_srv_cfg_s event_srv_cfg;
201 struct event_conn_state_t {
202 /** APR_RING of expiration timeouts */
203 APR_RING_ENTRY(event_conn_state_t) timeout_list;
204 /** the time when the entry was queued */
205 apr_time_t queue_timestamp;
206 /** connection record this struct refers to */
208 /** request record (if any) this struct refers to */
210 /** server config this struct refers to */
212 /** is the current conn_rec suspended? (disassociated with
213 * a particular MPM thread; for suspend_/resume_connection
217 /** memory pool to allocate from */
219 /** bucket allocator */
220 apr_bucket_alloc_t *bucket_alloc;
221 /** poll file descriptor information */
223 /** public parts of the connection state */
226 APR_RING_HEAD(timeout_head_t, event_conn_state_t);
228 struct timeout_queue {
229 struct timeout_head_t head;
231 apr_interval_time_t timeout;
232 struct timeout_queue *next;
235 * Several timeout queues that use different timeouts, so that we always can
236 * simply append to the end.
237 * write_completion_q uses vhost's TimeOut
238 * keepalive_q uses vhost's KeepAliveTimeOut
239 * linger_q uses MAX_SECS_TO_LINGER
240 * short_linger_q uses SECONDS_TO_LINGER
242 static struct timeout_queue *write_completion_q,
247 static apr_pollfd_t *listener_pollfd;
250 * Macros for accessing struct timeout_queue.
251 * For TO_QUEUE_APPEND and TO_QUEUE_REMOVE, timeout_mutex must be held.
253 #define TO_QUEUE_APPEND(q, el) \
255 APR_RING_INSERT_TAIL(&(q)->head, el, event_conn_state_t, \
261 #define TO_QUEUE_REMOVE(q, el) \
263 APR_RING_REMOVE(el, timeout_list); \
268 #define TO_QUEUE_INIT(q, p, t, v) \
270 struct timeout_queue *b = (v); \
271 (q) = apr_palloc((p), sizeof *(q)); \
272 APR_RING_INIT(&(q)->head, event_conn_state_t, timeout_list); \
273 (q)->total = (b) ? (b)->total : apr_pcalloc((p), sizeof *(q)->total); \
275 (q)->timeout = (t); \
279 #define TO_QUEUE_ELEM_INIT(el) APR_RING_ELEM_INIT(el, timeout_list)
282 * The pollset for sockets that are in any of the timeout queues. Currently
283 * we use the timeout_mutex to make sure that connections are added/removed
284 * atomically to/from both event_pollset and a timeout queue. Otherwise
285 * some confusion can happen under high load if timeout queues and pollset
287 * XXX: It should be possible to make the lock unnecessary in many or even all
290 static apr_pollset_t *event_pollset;
292 /* The structure used to pass unique initialization info to each thread */
295 int pslot; /* process slot */
296 int tslot; /* worker slot of the thread */
299 /* Structure used to pass information to the thread responsible for
300 * creating the rest of the threads.
304 apr_thread_t **threads;
305 apr_thread_t *listener;
307 apr_threadattr_t *threadattr;
320 } listener_poll_type;
322 /* data retained by event across load/unload of the module
323 * allocated on first call to pre-config hook; located on
324 * subsequent calls to pre-config hook
326 typedef struct event_retained_data {
327 ap_unixd_mpm_retained_data *mpm;
329 int first_server_limit;
330 int first_thread_limit;
331 int sick_child_detected;
332 int maxclients_reported;
334 * The max child slot ever assigned, preserved across restarts. Necessary
335 * to deal with MaxRequestWorkers changes across AP_SIG_GRACEFUL restarts.
336 * We use this value to optimize routines that have to scan the entire
339 int max_daemons_limit;
342 * All running workers, active and shutting down, including those that
343 * may be left from before a graceful restart.
344 * Not kept up-to-date when shutdown is pending.
349 * idle_spawn_rate is the number of children that will be spawned on the
350 * next maintenance cycle if there aren't enough idle servers. It is
351 * maintained per listeners bucket, doubled up to MAX_SPAWN_RATE, and
352 * reset only when a cycle goes by without the need to spawn.
354 int *idle_spawn_rate;
355 #ifndef MAX_SPAWN_RATE
356 #define MAX_SPAWN_RATE (32)
358 int hold_off_on_exponential_spawning;
359 } event_retained_data;
360 static event_retained_data *retained;
362 typedef struct event_child_bucket {
364 ap_listen_rec *listeners;
365 } event_child_bucket;
366 static event_child_bucket *all_buckets, /* All listeners buckets */
367 *my_bucket; /* Current child bucket */
369 struct event_srv_cfg_s {
370 struct timeout_queue *wc_q,
374 #define ID_FROM_CHILD_THREAD(c, t) ((c * thread_limit) + t)
376 /* The event MPM respects a couple of runtime flags that can aid
377 * in debugging. Setting the -DNO_DETACH flag will prevent the root process
378 * from detaching from its controlling terminal. Additionally, setting
379 * the -DONE_PROCESS flag (which implies -DNO_DETACH) will get you the
380 * child_main loop running in the process which originally started up.
381 * This gives you a pretty nice debugging environment. (You'll get a SIGHUP
382 * early in standalone_main; just continue through. This is the server
383 * trying to kill off any child processes which it might have lying
384 * around --- Apache doesn't keep track of their pids, it just sends
385 * SIGHUP to the process group, ignoring it in the root process.
386 * Continue through and you'll be fine.).
389 static int one_process = 0;
392 int raise_sigstop_flags;
395 static apr_pool_t *pconf; /* Pool for config stuff */
396 static apr_pool_t *pchild; /* Pool for httpd child stuff */
398 static pid_t ap_my_pid; /* Linux getpid() doesn't work except in main
399 thread. Use this instead */
400 static pid_t parent_pid;
401 static apr_os_thread_t *listener_os_thread;
403 /* The LISTENER_SIGNAL signal will be sent from the main thread to the
404 * listener thread to wake it up for graceful termination (what a child
405 * process from an old generation does when the admin does "apachectl
406 * graceful"). This signal will be blocked in all threads of a child
407 * process except for the listener thread.
409 #define LISTENER_SIGNAL SIGHUP
411 /* An array of socket descriptors in use by each thread used to
412 * perform a non-graceful (forced) shutdown of the server.
414 static apr_socket_t **worker_sockets;
416 static void disable_listensocks(int process_slot)
419 for (i = 0; i < num_listensocks; i++) {
420 apr_pollset_remove(event_pollset, &listener_pollfd[i]);
422 ap_scoreboard_image->parent[process_slot].not_accepting = 1;
425 static void enable_listensocks(int process_slot)
428 if (listener_may_exit) {
431 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00457)
432 "Accepting new connections again: "
433 "%u active conns (%u lingering/%u clogged/%u suspended), "
435 apr_atomic_read32(&connection_count),
436 apr_atomic_read32(&lingering_count),
437 apr_atomic_read32(&clogged_count),
438 apr_atomic_read32(&suspended_count),
439 ap_queue_info_get_idlers(worker_queue_info));
440 for (i = 0; i < num_listensocks; i++)
441 apr_pollset_add(event_pollset, &listener_pollfd[i]);
443 * XXX: This is not yet optimal. If many workers suddenly become available,
444 * XXX: the parent may kill some processes off too soon.
446 ap_scoreboard_image->parent[process_slot].not_accepting = 0;
449 static void close_worker_sockets(void)
452 for (i = 0; i < threads_per_child; i++) {
453 if (worker_sockets[i]) {
454 apr_socket_close(worker_sockets[i]);
455 worker_sockets[i] = NULL;
460 static void wakeup_listener(void)
462 listener_may_exit = 1;
463 if (!listener_os_thread) {
464 /* XXX there is an obscure path that this doesn't handle perfectly:
465 * right after listener thread is created but before
466 * listener_os_thread is set, the first worker thread hits an
467 * error and starts graceful termination
472 /* unblock the listener if it's waiting for a worker */
473 ap_queue_info_term(worker_queue_info);
476 * we should just be able to "kill(ap_my_pid, LISTENER_SIGNAL)" on all
477 * platforms and wake up the listener thread since it is the only thread
478 * with SIGHUP unblocked, but that doesn't work on Linux
480 #ifdef HAVE_PTHREAD_KILL
481 pthread_kill(*listener_os_thread, LISTENER_SIGNAL);
483 kill(ap_my_pid, LISTENER_SIGNAL);
488 #define ST_GRACEFUL 1
489 #define ST_UNGRACEFUL 2
491 static int terminate_mode = ST_INIT;
493 static void signal_threads(int mode)
495 if (terminate_mode == mode) {
498 terminate_mode = mode;
499 retained->mpm->mpm_state = AP_MPMQ_STOPPING;
501 /* in case we weren't called from the listener thread, wake up the
506 /* for ungraceful termination, let the workers exit now;
507 * for graceful termination, the listener thread will notify the
508 * workers to exit once it has stopped accepting new connections
510 if (mode == ST_UNGRACEFUL) {
511 workers_may_exit = 1;
512 ap_queue_interrupt_all(worker_queue);
513 close_worker_sockets(); /* forcefully kill all current connections */
517 static int event_query(int query_code, int *result, apr_status_t *rv)
520 switch (query_code) {
521 case AP_MPMQ_MAX_DAEMON_USED:
522 *result = retained->max_daemons_limit;
524 case AP_MPMQ_IS_THREADED:
525 *result = AP_MPMQ_STATIC;
527 case AP_MPMQ_IS_FORKED:
528 *result = AP_MPMQ_DYNAMIC;
530 case AP_MPMQ_IS_ASYNC:
533 case AP_MPMQ_HARD_LIMIT_DAEMONS:
534 *result = server_limit;
536 case AP_MPMQ_HARD_LIMIT_THREADS:
537 *result = thread_limit;
539 case AP_MPMQ_MAX_THREADS:
540 *result = threads_per_child;
542 case AP_MPMQ_MIN_SPARE_DAEMONS:
545 case AP_MPMQ_MIN_SPARE_THREADS:
546 *result = min_spare_threads;
548 case AP_MPMQ_MAX_SPARE_DAEMONS:
551 case AP_MPMQ_MAX_SPARE_THREADS:
552 *result = max_spare_threads;
554 case AP_MPMQ_MAX_REQUESTS_DAEMON:
555 *result = ap_max_requests_per_child;
557 case AP_MPMQ_MAX_DAEMONS:
558 *result = active_daemons_limit;
560 case AP_MPMQ_MPM_STATE:
561 *result = retained->mpm->mpm_state;
563 case AP_MPMQ_GENERATION:
564 *result = retained->mpm->my_generation;
573 static void event_note_child_killed(int childnum, pid_t pid, ap_generation_t gen)
575 if (childnum != -1) { /* child had a scoreboard slot? */
576 ap_run_child_status(ap_server_conf,
577 ap_scoreboard_image->parent[childnum].pid,
578 ap_scoreboard_image->parent[childnum].generation,
579 childnum, MPM_CHILD_EXITED);
580 ap_scoreboard_image->parent[childnum].pid = 0;
583 ap_run_child_status(ap_server_conf, pid, gen, -1, MPM_CHILD_EXITED);
587 static void event_note_child_started(int slot, pid_t pid)
589 ap_scoreboard_image->parent[slot].pid = pid;
590 ap_run_child_status(ap_server_conf,
591 ap_scoreboard_image->parent[slot].pid,
592 retained->mpm->my_generation, slot, MPM_CHILD_STARTED);
595 static const char *event_get_name(void)
600 /* a clean exit from a child with proper cleanup */
601 static void clean_child_exit(int code) __attribute__ ((noreturn));
602 static void clean_child_exit(int code)
604 retained->mpm->mpm_state = AP_MPMQ_STOPPING;
606 apr_pool_destroy(pchild);
610 event_note_child_killed(/* slot */ 0, 0, 0);
616 static void just_die(int sig)
621 /*****************************************************************
622 * Connection structures and accounting...
625 static int child_fatal;
627 static apr_status_t decrement_connection_count(void *cs_)
629 event_conn_state_t *cs = cs_;
630 switch (cs->pub.state) {
631 case CONN_STATE_LINGER_NORMAL:
632 case CONN_STATE_LINGER_SHORT:
633 apr_atomic_dec32(&lingering_count);
635 case CONN_STATE_SUSPENDED:
636 apr_atomic_dec32(&suspended_count);
641 apr_atomic_dec32(&connection_count);
645 static void notify_suspend(event_conn_state_t *cs)
647 ap_run_suspend_connection(cs->c, cs->r);
652 static void notify_resume(event_conn_state_t *cs, ap_sb_handle_t *sbh)
656 ap_run_resume_connection(cs->c, cs->r);
659 static int start_lingering_close_common(event_conn_state_t *cs, int in_worker)
662 struct timeout_queue *q;
663 apr_socket_t *csd = cs->pfd.desc.s;
666 rv = apr_socket_timeout_set(csd, 0);
667 AP_DEBUG_ASSERT(rv == APR_SUCCESS);
670 apr_socket_timeout_set(csd, 0);
672 cs->queue_timestamp = apr_time_now();
674 * If some module requested a shortened waiting period, only wait for
675 * 2s (SECONDS_TO_LINGER). This is useful for mitigating certain
678 if (apr_table_get(cs->c->notes, "short-lingering-close")) {
680 cs->pub.state = CONN_STATE_LINGER_SHORT;
684 cs->pub.state = CONN_STATE_LINGER_NORMAL;
686 apr_atomic_inc32(&lingering_count);
693 apr_thread_mutex_lock(timeout_mutex);
694 TO_QUEUE_APPEND(q, cs);
695 cs->pfd.reqevents = (
696 cs->pub.sense == CONN_SENSE_WANT_WRITE ? APR_POLLOUT :
697 APR_POLLIN) | APR_POLLHUP | APR_POLLERR;
698 cs->pub.sense = CONN_SENSE_DEFAULT;
699 rv = apr_pollset_add(event_pollset, &cs->pfd);
700 apr_thread_mutex_unlock(timeout_mutex);
701 if (rv != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rv)) {
702 ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(03092)
703 "start_lingering_close: apr_pollset_add failure");
704 apr_thread_mutex_lock(timeout_mutex);
705 TO_QUEUE_REMOVE(q, cs);
706 apr_thread_mutex_unlock(timeout_mutex);
707 apr_socket_close(cs->pfd.desc.s);
708 ap_push_pool(worker_queue_info, cs->p);
715 * Close our side of the connection, flushing data to the client first.
716 * Pre-condition: cs is not in any timeout queue and not in the pollset,
717 * timeout_mutex is not locked
718 * return: 0 if connection is fully closed,
719 * 1 if connection is lingering
720 * May only be called by worker thread.
722 static int start_lingering_close_blocking(event_conn_state_t *cs)
724 if (ap_start_lingering_close(cs->c)) {
726 ap_push_pool(worker_queue_info, cs->p);
729 return start_lingering_close_common(cs, 1);
733 * Close our side of the connection, NOT flushing data to the client.
734 * This should only be called if there has been an error or if we know
735 * that our send buffers are empty.
736 * Pre-condition: cs is not in any timeout queue and not in the pollset,
737 * timeout_mutex is not locked
738 * return: 0 if connection is fully closed,
739 * 1 if connection is lingering
740 * may be called by listener thread
742 static int start_lingering_close_nonblocking(event_conn_state_t *cs)
745 apr_socket_t *csd = cs->pfd.desc.s;
747 if (ap_prep_lingering_close(c)
749 || ap_shutdown_conn(c, 0) != APR_SUCCESS || c->aborted
750 || apr_socket_shutdown(csd, APR_SHUTDOWN_WRITE) != APR_SUCCESS) {
751 apr_socket_close(csd);
752 ap_push_pool(worker_queue_info, cs->p);
754 ap_queue_interrupt_one(worker_queue);
757 return start_lingering_close_common(cs, 0);
761 * forcibly close a lingering connection after the lingering period has
763 * Pre-condition: cs is not in any timeout queue and not in the pollset
764 * return: irrelevant (need same prototype as start_lingering_close)
766 static int stop_lingering_close(event_conn_state_t *cs)
769 apr_socket_t *csd = ap_get_conn_socket(cs->c);
770 ap_log_error(APLOG_MARK, APLOG_TRACE4, 0, ap_server_conf,
771 "socket reached timeout in lingering-close state");
772 rv = apr_socket_close(csd);
773 if (rv != APR_SUCCESS) {
774 ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(00468) "error closing socket");
777 ap_push_pool(worker_queue_info, cs->p);
779 ap_queue_interrupt_one(worker_queue);
784 * This runs before any non-MPM cleanup code on the connection;
785 * if the connection is currently suspended as far as modules
786 * know, provide notification of resumption.
788 static apr_status_t ptrans_pre_cleanup(void *dummy)
790 event_conn_state_t *cs = dummy;
793 notify_resume(cs, NULL);
799 * event_pre_read_request() and event_request_cleanup() track the
800 * current r for a given connection.
802 static apr_status_t event_request_cleanup(void *dummy)
805 event_conn_state_t *cs = ap_get_module_config(c->conn_config,
812 static void event_pre_read_request(request_rec *r, conn_rec *c)
814 event_conn_state_t *cs = ap_get_module_config(c->conn_config,
818 cs->sc = ap_get_module_config(ap_server_conf->module_config,
820 apr_pool_cleanup_register(r->pool, c, event_request_cleanup,
821 apr_pool_cleanup_null);
825 * event_post_read_request() tracks the current server config for a
828 static int event_post_read_request(request_rec *r)
830 conn_rec *c = r->connection;
831 event_conn_state_t *cs = ap_get_module_config(c->conn_config,
834 /* To preserve legacy behaviour (consistent with other MPMs), use
835 * the keepalive timeout from the base server (first on this IP:port)
836 * when none is explicitly configured on this server.
838 if (r->server->keep_alive_timeout_set) {
839 cs->sc = ap_get_module_config(r->server->module_config,
843 cs->sc = ap_get_module_config(c->base_server->module_config,
850 * process one connection in the worker
852 static void process_socket(apr_thread_t *thd, apr_pool_t * p, apr_socket_t * sock,
853 event_conn_state_t * cs, int my_child_num,
857 long conn_id = ID_FROM_CHILD_THREAD(my_child_num, my_thread_num);
861 /* XXX: This will cause unbounded mem usage for long lasting connections */
862 ap_create_sb_handle(&sbh, p, my_child_num, my_thread_num);
864 if (cs == NULL) { /* This is a new connection */
865 listener_poll_type *pt = apr_pcalloc(p, sizeof(*pt));
866 cs = apr_pcalloc(p, sizeof(event_conn_state_t));
867 cs->bucket_alloc = apr_bucket_alloc_create(p);
868 c = ap_run_create_connection(p, ap_server_conf, sock,
869 conn_id, sbh, cs->bucket_alloc);
871 ap_push_pool(worker_queue_info, p);
874 apr_atomic_inc32(&connection_count);
875 apr_pool_cleanup_register(c->pool, cs, decrement_connection_count,
876 apr_pool_cleanup_null);
877 ap_set_module_config(c->conn_config, &mpm_event_module, cs);
878 c->current_thread = thd;
882 cs->sc = ap_get_module_config(ap_server_conf->module_config,
884 cs->pfd.desc_type = APR_POLL_SOCKET;
885 cs->pfd.reqevents = APR_POLLIN;
886 cs->pfd.desc.s = sock;
889 cs->pfd.client_data = pt;
890 apr_pool_pre_cleanup_register(p, cs, ptrans_pre_cleanup);
891 TO_QUEUE_ELEM_INIT(cs);
893 ap_update_vhost_given_ip(c);
895 rc = ap_run_pre_connection(c, sock);
896 if (rc != OK && rc != DONE) {
897 ap_log_cerror(APLOG_MARK, APLOG_DEBUG, 0, c, APLOGNO(00469)
898 "process_socket: connection aborted");
903 * XXX If the platform does not have a usable way of bundling
904 * accept() with a socket readability check, like Win32,
905 * and there are measurable delays before the
906 * socket is readable due to the first data packet arriving,
907 * it might be better to create the cs on the listener thread
908 * with the state set to CONN_STATE_CHECK_REQUEST_LINE_READABLE
910 * FreeBSD users will want to enable the HTTP accept filter
911 * module in their kernel for the highest performance
912 * When the accept filter is active, sockets are kept in the
913 * kernel until a HTTP request is received.
915 cs->pub.state = CONN_STATE_READ_REQUEST_LINE;
917 cs->pub.sense = CONN_SENSE_DEFAULT;
921 notify_resume(cs, sbh);
922 c->current_thread = thd;
923 /* Subsequent request on a conn, and thread number is part of ID */
927 if (c->clogging_input_filters && !c->aborted) {
928 /* Since we have an input filter which 'clogs' the input stream,
929 * like mod_ssl used to, lets just do the normal read from input
930 * filters, like the Worker MPM does. Filters that need to write
931 * where they would otherwise read, or read where they would
932 * otherwise write, should set the sense appropriately.
934 apr_atomic_inc32(&clogged_count);
935 ap_run_process_connection(c);
936 if (cs->pub.state != CONN_STATE_SUSPENDED) {
937 cs->pub.state = CONN_STATE_LINGER;
939 apr_atomic_dec32(&clogged_count);
943 if (cs->pub.state == CONN_STATE_READ_REQUEST_LINE) {
945 ap_run_process_connection(c);
947 /* state will be updated upon return
948 * fall thru to either wait for readability/timeout or
953 cs->pub.state = CONN_STATE_LINGER;
957 if (cs->pub.state == CONN_STATE_WRITE_COMPLETION) {
958 ap_filter_t *output_filter = c->output_filters;
960 ap_update_child_status(sbh, SERVER_BUSY_WRITE, NULL);
961 while (output_filter->next != NULL) {
962 output_filter = output_filter->next;
964 rv = output_filter->frec->filter_func.out_func(output_filter, NULL);
965 if (rv != APR_SUCCESS) {
966 ap_log_cerror(APLOG_MARK, APLOG_DEBUG, rv, c, APLOGNO(00470)
967 "network write failure in core output filter");
968 cs->pub.state = CONN_STATE_LINGER;
970 else if (c->data_in_output_filters) {
971 /* Still in WRITE_COMPLETION_STATE:
972 * Set a write timeout for this connection, and let the
973 * event thread poll for writeability.
975 cs->queue_timestamp = apr_time_now();
977 apr_thread_mutex_lock(timeout_mutex);
978 TO_QUEUE_APPEND(cs->sc->wc_q, cs);
979 cs->pfd.reqevents = (
980 cs->pub.sense == CONN_SENSE_WANT_READ ? APR_POLLIN :
981 APR_POLLOUT) | APR_POLLHUP | APR_POLLERR;
982 cs->pub.sense = CONN_SENSE_DEFAULT;
983 rc = apr_pollset_add(event_pollset, &cs->pfd);
984 apr_thread_mutex_unlock(timeout_mutex);
987 else if (c->keepalive != AP_CONN_KEEPALIVE || c->aborted ||
989 cs->pub.state = CONN_STATE_LINGER;
991 else if (c->data_in_input_filters) {
992 cs->pub.state = CONN_STATE_READ_REQUEST_LINE;
996 cs->pub.state = CONN_STATE_CHECK_REQUEST_LINE_READABLE;
1000 if (cs->pub.state == CONN_STATE_LINGER) {
1001 start_lingering_close_blocking(cs);
1003 else if (cs->pub.state == CONN_STATE_CHECK_REQUEST_LINE_READABLE) {
1004 /* It greatly simplifies the logic to use a single timeout value per q
1005 * because the new element can just be added to the end of the list and
1006 * it will stay sorted in expiration time sequence. If brand new
1007 * sockets are sent to the event thread for a readability check, this
1008 * will be a slight behavior change - they use the non-keepalive
1009 * timeout today. With a normal client, the socket will be readable in
1010 * a few milliseconds anyway.
1012 cs->queue_timestamp = apr_time_now();
1014 apr_thread_mutex_lock(timeout_mutex);
1015 TO_QUEUE_APPEND(cs->sc->ka_q, cs);
1017 /* Add work to pollset. */
1018 cs->pfd.reqevents = APR_POLLIN;
1019 rc = apr_pollset_add(event_pollset, &cs->pfd);
1020 apr_thread_mutex_unlock(timeout_mutex);
1022 if (rc != APR_SUCCESS) {
1023 ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf, APLOGNO(03093)
1024 "process_socket: apr_pollset_add failure");
1025 AP_DEBUG_ASSERT(rc == APR_SUCCESS);
1028 else if (cs->pub.state == CONN_STATE_SUSPENDED) {
1029 apr_atomic_inc32(&suspended_count);
1034 /* conns_this_child has gone to zero or below. See if the admin coded
1035 "MaxConnectionsPerChild 0", and keep going in that case. Doing it this way
1036 simplifies the hot path in worker_thread */
1037 static void check_infinite_requests(void)
1039 if (ap_max_requests_per_child) {
1040 ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
1041 "Stopping process due to MaxConnectionsPerChild");
1042 signal_threads(ST_GRACEFUL);
1046 conns_this_child = APR_INT32_MAX;
1050 static void close_listeners(int process_slot, int *closed)
1054 disable_listensocks(process_slot);
1055 ap_close_listeners_ex(my_bucket->listeners);
1058 ap_scoreboard_image->parent[process_slot].quiescing = 1;
1059 for (i = 0; i < threads_per_child; ++i) {
1060 ap_update_child_status_from_indexes(process_slot, i,
1061 SERVER_GRACEFUL, NULL);
1063 /* wake up the main thread */
1064 kill(ap_my_pid, SIGTERM);
1066 ap_free_idle_pools(worker_queue_info);
1067 ap_queue_interrupt_all(worker_queue);
1071 static void unblock_signal(int sig)
1075 sigemptyset(&sig_mask);
1076 sigaddset(&sig_mask, sig);
1077 #if defined(SIGPROCMASK_SETS_THREAD_MASK)
1078 sigprocmask(SIG_UNBLOCK, &sig_mask, NULL);
1080 pthread_sigmask(SIG_UNBLOCK, &sig_mask, NULL);
1084 static void dummy_signal_handler(int sig)
1086 /* XXX If specifying SIG_IGN is guaranteed to unblock a syscall,
1087 * then we don't need this goofy function.
1092 static apr_status_t init_pollset(apr_pool_t *p)
1095 listener_poll_type *pt;
1098 listener_pollfd = apr_palloc(p, sizeof(apr_pollfd_t) * num_listensocks);
1099 for (lr = my_bucket->listeners; lr != NULL; lr = lr->next, i++) {
1101 AP_DEBUG_ASSERT(i < num_listensocks);
1102 pfd = &listener_pollfd[i];
1103 pt = apr_pcalloc(p, sizeof(*pt));
1104 pfd->desc_type = APR_POLL_SOCKET;
1105 pfd->desc.s = lr->sd;
1106 pfd->reqevents = APR_POLLIN;
1108 pt->type = PT_ACCEPT;
1111 pfd->client_data = pt;
1113 apr_socket_opt_set(pfd->desc.s, APR_SO_NONBLOCK, 1);
1114 apr_pollset_add(event_pollset, pfd);
1116 lr->accept_func = ap_unixd_accept;
1122 static apr_status_t push_timer2worker(timer_event_t* te)
1124 return ap_queue_push_timer(worker_queue, te);
1128 * Pre-condition: pfd->cs is neither in pollset nor timeout queue
1129 * this function may only be called by the listener
1131 static apr_status_t push2worker(const apr_pollfd_t * pfd,
1132 apr_pollset_t * pollset)
1134 listener_poll_type *pt = (listener_poll_type *) pfd->client_data;
1135 event_conn_state_t *cs = (event_conn_state_t *) pt->baton;
1138 rc = ap_queue_push(worker_queue, cs->pfd.desc.s, cs, cs->p);
1139 if (rc != APR_SUCCESS) {
1140 /* trash the connection; we couldn't queue the connected
1141 * socket to a worker
1143 apr_bucket_alloc_destroy(cs->bucket_alloc);
1144 apr_socket_close(cs->pfd.desc.s);
1145 ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1146 ap_server_conf, APLOGNO(00471) "push2worker: ap_queue_push failed");
1147 ap_push_pool(worker_queue_info, cs->p);
1154 * If *have_idle_worker_p == 0, reserve a worker thread, and set
1155 * *have_idle_worker_p = 1.
1156 * If *have_idle_worker_p is already 1, will do nothing.
1157 * If blocking == 1, block if all workers are currently busy.
1158 * If no worker was available immediately, will set *all_busy to 1.
1159 * XXX: If there are no workers, we should not block immediately but
1160 * XXX: close all keep-alive connections first.
1162 static void get_worker(int *have_idle_worker_p, int blocking, int *all_busy)
1166 if (*have_idle_worker_p) {
1167 /* already reserved a worker thread - must have hit a
1168 * transient error on a previous pass
1174 rc = ap_queue_info_wait_for_idler(worker_queue_info, all_busy);
1176 rc = ap_queue_info_try_get_idler(worker_queue_info);
1178 if (rc == APR_SUCCESS || APR_STATUS_IS_EOF(rc)) {
1179 *have_idle_worker_p = 1;
1181 else if (!blocking && rc == APR_EAGAIN) {
1185 ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf, APLOGNO(00472)
1186 "ap_queue_info_wait_for_idler failed. "
1187 "Attempting to shutdown process gracefully");
1188 signal_threads(ST_GRACEFUL);
1192 /* Structures to reuse */
1193 static APR_RING_HEAD(timer_free_ring_t, timer_event_t) timer_free_ring;
1195 static apr_skiplist *timer_skiplist;
1197 /* The following compare function is used by apr_skiplist_insert() to keep the
1198 * elements (timers) sorted and provide O(log n) complexity (this is also true
1199 * for apr_skiplist_{find,remove}(), but those are not used in MPM event where
1200 * inserted timers are not searched nor removed, but with apr_skiplist_pop()
1201 * which does use any compare function). It is meant to return 0 when a == b,
1202 * <0 when a < b, and >0 when a > b. However apr_skiplist_insert() will not
1203 * add duplicates (i.e. a == b), and apr_skiplist_add() is only available in
1204 * APR 1.6, yet multiple timers could possibly be created in the same micro-
1205 * second (duplicates with regard to apr_time_t); therefore we implement the
1206 * compare function to return +1 instead of 0 when compared timers are equal,
1207 * thus duplicates are still added after each other (in order of insertion).
1209 static int timer_comp(void *a, void *b)
1211 apr_time_t t1 = (apr_time_t) ((timer_event_t *)a)->when;
1212 apr_time_t t2 = (apr_time_t) ((timer_event_t *)b)->when;
1213 AP_DEBUG_ASSERT(t1);
1214 AP_DEBUG_ASSERT(t2);
1215 return ((t1 < t2) ? -1 : 1);
1218 static apr_thread_mutex_t *g_timer_skiplist_mtx;
1220 static apr_status_t event_register_timed_callback(apr_time_t t,
1221 ap_mpm_callback_fn_t *cbfn,
1225 /* oh yeah, and make locking smarter/fine grained. */
1226 apr_thread_mutex_lock(g_timer_skiplist_mtx);
1228 if (!APR_RING_EMPTY(&timer_free_ring, timer_event_t, link)) {
1229 te = APR_RING_FIRST(&timer_free_ring);
1230 APR_RING_REMOVE(te, link);
1233 te = apr_skiplist_alloc(timer_skiplist, sizeof(timer_event_t));
1234 APR_RING_ELEM_INIT(te, link);
1239 /* XXXXX: optimize */
1240 te->when = t + apr_time_now();
1242 /* Okay, add sorted by when.. */
1243 apr_skiplist_insert(timer_skiplist, te);
1245 apr_thread_mutex_unlock(g_timer_skiplist_mtx);
1252 * Close socket and clean up if remote closed its end while we were in
1254 * Only to be called in the listener thread;
1255 * Pre-condition: cs is in one of the linger queues and in the pollset
1257 static void process_lingering_close(event_conn_state_t *cs, const apr_pollfd_t *pfd)
1259 apr_socket_t *csd = ap_get_conn_socket(cs->c);
1260 char dummybuf[2048];
1263 struct timeout_queue *q;
1264 q = (cs->pub.state == CONN_STATE_LINGER_SHORT) ? short_linger_q : linger_q;
1266 /* socket is already in non-blocking state */
1268 nbytes = sizeof(dummybuf);
1269 rv = apr_socket_recv(csd, dummybuf, &nbytes);
1270 } while (rv == APR_SUCCESS);
1272 if (APR_STATUS_IS_EAGAIN(rv)) {
1276 apr_thread_mutex_lock(timeout_mutex);
1277 rv = apr_pollset_remove(event_pollset, pfd);
1278 AP_DEBUG_ASSERT(rv == APR_SUCCESS);
1280 rv = apr_socket_close(csd);
1281 AP_DEBUG_ASSERT(rv == APR_SUCCESS);
1283 TO_QUEUE_REMOVE(q, cs);
1284 apr_thread_mutex_unlock(timeout_mutex);
1285 TO_QUEUE_ELEM_INIT(cs);
1287 ap_push_pool(worker_queue_info, cs->p);
1289 ap_queue_interrupt_one(worker_queue);
1292 /* call 'func' for all elements of 'q' with timeout less than 'timeout_time'.
1293 * Pre-condition: timeout_mutex must already be locked
1294 * Post-condition: timeout_mutex will be locked again
1296 static void process_timeout_queue(struct timeout_queue *q,
1297 apr_time_t timeout_time,
1298 int (*func)(event_conn_state_t *))
1300 int total = 0, count;
1301 event_conn_state_t *first, *cs, *last;
1302 struct timeout_head_t trash;
1303 struct timeout_queue *qp;
1310 APR_RING_INIT(&trash, event_conn_state_t, timeout_list);
1311 for (qp = q; qp; qp = qp->next) {
1313 cs = first = last = APR_RING_FIRST(&qp->head);
1314 while (cs != APR_RING_SENTINEL(&qp->head, event_conn_state_t,
1316 /* Trash the entry if:
1317 * - no timeout_time was given (asked for all), or
1318 * - it expired (according to the queue timeout), or
1319 * - the system clock skewed in the past: no entry should be
1320 * registered above the given timeout_time (~now) + the queue
1321 * timeout, we won't keep any here (eg. for centuries).
1322 * Stop otherwise, no following entry will match thanks to the
1323 * single timeout per queue (entries are added to the end!).
1324 * This allows maintenance in O(1).
1327 || cs->queue_timestamp + qp->timeout < timeout_time
1328 || cs->queue_timestamp > timeout_time + qp->timeout)) {
1330 rv = apr_pollset_remove(event_pollset, &cs->pfd);
1331 if (rv != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rv)) {
1332 ap_log_cerror(APLOG_MARK, APLOG_ERR, rv, cs->c, APLOGNO(00473)
1333 "apr_pollset_remove failed");
1335 cs = APR_RING_NEXT(cs, timeout_list);
1341 APR_RING_UNSPLICE(first, last, timeout_list);
1342 APR_RING_SPLICE_TAIL(&trash, first, last, event_conn_state_t,
1350 AP_DEBUG_ASSERT(*q->total >= total);
1352 apr_thread_mutex_unlock(timeout_mutex);
1353 first = APR_RING_FIRST(&trash);
1355 cs = APR_RING_NEXT(first, timeout_list);
1356 TO_QUEUE_ELEM_INIT(first);
1360 apr_thread_mutex_lock(timeout_mutex);
1363 static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
1368 proc_info *ti = dummy;
1369 int process_slot = ti->pslot;
1370 apr_pool_t *tpool = apr_thread_pool_get(thd);
1372 apr_pool_t *ptrans; /* Pool for per-transaction stuff */
1374 int have_idle_worker = 0;
1375 const apr_pollfd_t *out_pfd;
1376 apr_int32_t num = 0;
1377 apr_interval_time_t timeout_interval;
1378 apr_time_t timeout_time = 0, now, last_log;
1379 listener_poll_type *pt;
1380 int closed = 0, listeners_disabled = 0;
1382 last_log = apr_time_now();
1385 /* the following times out events that are really close in the future
1386 * to prevent extra poll calls
1388 * current value is .1 second
1390 #define TIMEOUT_FUDGE_FACTOR 100000
1391 #define EVENT_FUDGE_FACTOR 10000
1393 rc = init_pollset(tpool);
1394 if (rc != APR_SUCCESS) {
1395 ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
1396 "failed to initialize pollset, "
1397 "attempting to shutdown process gracefully");
1398 signal_threads(ST_GRACEFUL);
1402 /* Unblock the signal used to wake this thread up, and set a handler for
1405 unblock_signal(LISTENER_SIGNAL);
1406 apr_signal(LISTENER_SIGNAL, dummy_signal_handler);
1409 int workers_were_busy = 0;
1410 if (listener_may_exit) {
1411 close_listeners(process_slot, &closed);
1412 if (terminate_mode == ST_UNGRACEFUL
1413 || apr_atomic_read32(&connection_count) == 0)
1417 if (conns_this_child <= 0)
1418 check_infinite_requests();
1420 now = apr_time_now();
1421 if (APLOGtrace6(ap_server_conf)) {
1422 /* trace log status every second */
1423 if (now - last_log > apr_time_from_msec(1000)) {
1425 apr_thread_mutex_lock(timeout_mutex);
1426 ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf,
1427 "connections: %u (clogged: %u write-completion: %d "
1428 "keep-alive: %d lingering: %d suspended: %u)",
1429 apr_atomic_read32(&connection_count),
1430 apr_atomic_read32(&clogged_count),
1431 *write_completion_q->total,
1432 *keepalive_q->total,
1433 apr_atomic_read32(&lingering_count),
1434 apr_atomic_read32(&suspended_count));
1436 ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf,
1437 "%u/%u workers shutdown",
1438 apr_atomic_read32(&threads_shutdown),
1441 apr_thread_mutex_unlock(timeout_mutex);
1445 apr_thread_mutex_lock(g_timer_skiplist_mtx);
1446 te = apr_skiplist_peek(timer_skiplist);
1448 if (te->when > now) {
1449 timeout_interval = te->when - now;
1452 timeout_interval = 1;
1456 timeout_interval = apr_time_from_msec(100);
1458 apr_thread_mutex_unlock(g_timer_skiplist_mtx);
1460 rc = apr_pollset_poll(event_pollset, timeout_interval, &num, &out_pfd);
1461 if (rc != APR_SUCCESS) {
1462 if (APR_STATUS_IS_EINTR(rc)) {
1465 if (!APR_STATUS_IS_TIMEUP(rc)) {
1466 ap_log_error(APLOG_MARK, APLOG_CRIT, rc, ap_server_conf,
1467 "apr_pollset_poll failed. Attempting to "
1468 "shutdown process gracefully");
1469 signal_threads(ST_GRACEFUL);
1473 if (listener_may_exit) {
1474 close_listeners(process_slot, &closed);
1475 if (terminate_mode == ST_UNGRACEFUL
1476 || apr_atomic_read32(&connection_count) == 0)
1480 now = apr_time_now();
1481 apr_thread_mutex_lock(g_timer_skiplist_mtx);
1482 ep = apr_skiplist_peek(timer_skiplist);
1484 if (ep->when < now + EVENT_FUDGE_FACTOR) {
1485 apr_skiplist_pop(timer_skiplist, NULL);
1486 push_timer2worker(ep);
1491 ep = apr_skiplist_peek(timer_skiplist);
1493 apr_thread_mutex_unlock(g_timer_skiplist_mtx);
1496 pt = (listener_poll_type *) out_pfd->client_data;
1497 if (pt->type == PT_CSD) {
1498 /* one of the sockets is readable */
1499 event_conn_state_t *cs = (event_conn_state_t *) pt->baton;
1500 struct timeout_queue *remove_from_q = cs->sc->wc_q;
1503 switch (cs->pub.state) {
1504 case CONN_STATE_CHECK_REQUEST_LINE_READABLE:
1505 cs->pub.state = CONN_STATE_READ_REQUEST_LINE;
1506 remove_from_q = cs->sc->ka_q;
1507 /* don't wait for a worker for a keepalive request */
1510 case CONN_STATE_WRITE_COMPLETION:
1511 get_worker(&have_idle_worker, blocking,
1512 &workers_were_busy);
1513 apr_thread_mutex_lock(timeout_mutex);
1514 TO_QUEUE_REMOVE(remove_from_q, cs);
1515 rc = apr_pollset_remove(event_pollset, &cs->pfd);
1516 apr_thread_mutex_unlock(timeout_mutex);
1519 * Some of the pollset backends, like KQueue or Epoll
1520 * automagically remove the FD if the socket is closed,
1521 * therefore, we can accept _SUCCESS or _NOTFOUND,
1522 * and we still want to keep going
1524 if (rc != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rc)) {
1525 ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
1526 APLOGNO(03094) "pollset remove failed");
1527 start_lingering_close_nonblocking(cs);
1531 TO_QUEUE_ELEM_INIT(cs);
1532 /* If we didn't get a worker immediately for a keep-alive
1533 * request, we close the connection, so that the client can
1534 * re-connect to a different process.
1536 if (!have_idle_worker) {
1537 start_lingering_close_nonblocking(cs);
1540 rc = push2worker(out_pfd, event_pollset);
1541 if (rc != APR_SUCCESS) {
1542 ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1543 ap_server_conf, APLOGNO(03095)
1544 "push2worker failed");
1547 have_idle_worker = 0;
1550 case CONN_STATE_LINGER_NORMAL:
1551 case CONN_STATE_LINGER_SHORT:
1552 process_lingering_close(cs, out_pfd);
1555 ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1556 ap_server_conf, APLOGNO(03096)
1557 "event_loop: unexpected state %d",
1562 else if (pt->type == PT_ACCEPT) {
1563 /* A Listener Socket is ready for an accept() */
1564 if (workers_were_busy) {
1565 if (!listeners_disabled)
1566 disable_listensocks(process_slot);
1567 listeners_disabled = 1;
1568 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
1569 "All workers busy, not accepting new conns "
1572 else if ( (int)apr_atomic_read32(&connection_count)
1573 - (int)apr_atomic_read32(&lingering_count)
1575 + ap_queue_info_get_idlers(worker_queue_info) *
1576 worker_factor / WORKER_FACTOR_SCALE)
1578 if (!listeners_disabled)
1579 disable_listensocks(process_slot);
1580 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
1581 "Too many open connections (%u), "
1582 "not accepting new conns in this process",
1583 apr_atomic_read32(&connection_count));
1584 ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
1586 ap_queue_info_get_idlers(worker_queue_info));
1587 listeners_disabled = 1;
1589 else if (listeners_disabled) {
1590 listeners_disabled = 0;
1591 enable_listensocks(process_slot);
1593 if (!listeners_disabled) {
1594 lr = (ap_listen_rec *) pt->baton;
1595 ap_pop_pool(&ptrans, worker_queue_info);
1597 if (ptrans == NULL) {
1598 /* create a new transaction pool for each accepted socket */
1599 apr_allocator_t *allocator;
1601 apr_allocator_create(&allocator);
1602 apr_allocator_max_free_set(allocator,
1604 apr_pool_create_ex(&ptrans, pconf, NULL, allocator);
1605 apr_allocator_owner_set(allocator, ptrans);
1606 if (ptrans == NULL) {
1607 ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1608 ap_server_conf, APLOGNO(03097)
1609 "Failed to create transaction pool");
1610 signal_threads(ST_GRACEFUL);
1614 apr_pool_tag(ptrans, "transaction");
1616 get_worker(&have_idle_worker, 1, &workers_were_busy);
1617 rc = lr->accept_func(&csd, lr, ptrans);
1619 /* later we trash rv and rely on csd to indicate
1622 AP_DEBUG_ASSERT(rc == APR_SUCCESS || !csd);
1624 if (rc == APR_EGENERAL) {
1625 /* E[NM]FILE, ENOMEM, etc */
1626 resource_shortage = 1;
1627 signal_threads(ST_GRACEFUL);
1632 rc = ap_queue_push(worker_queue, csd, NULL, ptrans);
1633 if (rc != APR_SUCCESS) {
1634 /* trash the connection; we couldn't queue the connected
1635 * socket to a worker
1637 apr_socket_close(csd);
1638 ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1639 ap_server_conf, APLOGNO(03098)
1640 "ap_queue_push failed");
1641 ap_push_pool(worker_queue_info, ptrans);
1644 have_idle_worker = 0;
1648 ap_push_pool(worker_queue_info, ptrans);
1651 } /* if:else on pt->type */
1654 } /* while for processing poll */
1656 /* XXX possible optimization: stash the current time for use as
1657 * r->request_time for new requests
1659 now = apr_time_now();
1660 /* We only do this once per 0.1s (TIMEOUT_FUDGE_FACTOR), or on a clock
1661 * skew (if the system time is set back in the meantime, timeout_time
1662 * will exceed now + TIMEOUT_FUDGE_FACTOR, can't happen otherwise).
1664 if (now > timeout_time || now + TIMEOUT_FUDGE_FACTOR < timeout_time ) {
1665 struct process_score *ps;
1666 timeout_time = now + TIMEOUT_FUDGE_FACTOR;
1668 /* handle timed out sockets */
1669 apr_thread_mutex_lock(timeout_mutex);
1671 /* Step 1: keepalive timeouts */
1672 /* If all workers are busy, we kill older keep-alive connections so that they
1673 * may connect to another process.
1675 if ((workers_were_busy || dying) && *keepalive_q->total) {
1677 ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
1678 "All workers are busy, will close %d keep-alive "
1680 *keepalive_q->total);
1681 process_timeout_queue(keepalive_q, 0,
1682 start_lingering_close_nonblocking);
1685 process_timeout_queue(keepalive_q, timeout_time,
1686 start_lingering_close_nonblocking);
1688 /* Step 2: write completion timeouts */
1689 process_timeout_queue(write_completion_q, timeout_time,
1690 start_lingering_close_nonblocking);
1691 /* Step 3: (normal) lingering close completion timeouts */
1692 process_timeout_queue(linger_q, timeout_time, stop_lingering_close);
1693 /* Step 4: (short) lingering close completion timeouts */
1694 process_timeout_queue(short_linger_q, timeout_time, stop_lingering_close);
1696 ps = ap_get_scoreboard_process(process_slot);
1697 ps->write_completion = *write_completion_q->total;
1698 ps->keep_alive = *keepalive_q->total;
1699 apr_thread_mutex_unlock(timeout_mutex);
1701 ps->connections = apr_atomic_read32(&connection_count);
1702 ps->suspended = apr_atomic_read32(&suspended_count);
1703 ps->lingering_close = apr_atomic_read32(&lingering_count);
1705 if (listeners_disabled && !workers_were_busy
1706 && (int)apr_atomic_read32(&connection_count)
1707 - (int)apr_atomic_read32(&lingering_count)
1708 < ((int)ap_queue_info_get_idlers(worker_queue_info) - 1)
1709 * worker_factor / WORKER_FACTOR_SCALE + threads_per_child)
1711 listeners_disabled = 0;
1712 enable_listensocks(process_slot);
1715 * XXX: do we need to set some timeout that re-enables the listensocks
1716 * XXX: in case no other event occurs?
1718 } /* listener main loop */
1720 close_listeners(process_slot, &closed);
1721 ap_queue_term(worker_queue);
1723 apr_thread_exit(thd, APR_SUCCESS);
1728 * During graceful shutdown, if there are more running worker threads than
1729 * open connections, exit one worker thread.
1731 * return 1 if thread should exit, 0 if it should continue running.
1733 static int worker_thread_should_exit_early(void)
1736 apr_uint32_t conns = apr_atomic_read32(&connection_count);
1737 apr_uint32_t dead = apr_atomic_read32(&threads_shutdown);
1738 apr_uint32_t newdead;
1740 AP_DEBUG_ASSERT(dead <= threads_per_child);
1741 if (conns >= threads_per_child - dead)
1745 if (apr_atomic_cas32(&threads_shutdown, newdead, dead) == dead) {
1747 * No other thread has exited in the mean time, safe to exit
1755 /* XXX For ungraceful termination/restart, we definitely don't want to
1756 * wait for active connections to finish but we may want to wait
1757 * for idle workers to get out of the queue code and release mutexes,
1758 * since those mutexes are cleaned up pretty soon and some systems
1759 * may not react favorably (i.e., segfault) if operations are attempted
1760 * on cleaned-up mutexes.
1762 static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy)
1764 proc_info *ti = dummy;
1765 int process_slot = ti->pslot;
1766 int thread_slot = ti->tslot;
1767 apr_socket_t *csd = NULL;
1768 event_conn_state_t *cs;
1769 apr_pool_t *ptrans; /* Pool for per-transaction stuff */
1772 timer_event_t *te = NULL;
1776 ap_scoreboard_image->servers[process_slot][thread_slot].pid = ap_my_pid;
1777 ap_scoreboard_image->servers[process_slot][thread_slot].tid = apr_os_thread_current();
1778 ap_scoreboard_image->servers[process_slot][thread_slot].generation = retained->mpm->my_generation;
1779 ap_update_child_status_from_indexes(process_slot, thread_slot,
1780 SERVER_STARTING, NULL);
1782 while (!workers_may_exit) {
1784 rv = ap_queue_info_set_idle(worker_queue_info, NULL);
1785 if (rv != APR_SUCCESS) {
1786 ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf,
1787 "ap_queue_info_set_idle failed. Attempting to "
1788 "shutdown process gracefully.");
1789 signal_threads(ST_GRACEFUL);
1795 ap_update_child_status_from_indexes(process_slot, thread_slot,
1796 dying ? SERVER_GRACEFUL
1797 : SERVER_READY, NULL);
1799 if (workers_may_exit) {
1802 if (dying && worker_thread_should_exit_early()) {
1807 rv = ap_queue_pop_something(worker_queue, &csd, &cs, &ptrans, &te);
1809 if (rv != APR_SUCCESS) {
1810 /* We get APR_EOF during a graceful shutdown once all the
1811 * connections accepted by this server process have been handled.
1813 if (APR_STATUS_IS_EOF(rv)) {
1816 /* We get APR_EINTR whenever ap_queue_pop() has been interrupted
1817 * from an explicit call to ap_queue_interrupt_all(). This allows
1818 * us to unblock threads stuck in ap_queue_pop() when a shutdown
1821 * If workers_may_exit is set and this is ungraceful termination/
1822 * restart, we are bound to get an error on some systems (e.g.,
1823 * AIX, which sanity-checks mutex operations) since the queue
1824 * may have already been cleaned up. Don't log the "error" if
1825 * workers_may_exit is set.
1827 else if (APR_STATUS_IS_EINTR(rv)) {
1830 /* We got some other error. */
1831 else if (!workers_may_exit) {
1832 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
1833 APLOGNO(03099) "ap_queue_pop failed");
1838 te->cbfunc(te->baton);
1841 apr_thread_mutex_lock(g_timer_skiplist_mtx);
1842 APR_RING_INSERT_TAIL(&timer_free_ring, te, timer_event_t, link);
1843 apr_thread_mutex_unlock(g_timer_skiplist_mtx);
1848 worker_sockets[thread_slot] = csd;
1849 process_socket(thd, ptrans, csd, cs, process_slot, thread_slot);
1850 worker_sockets[thread_slot] = NULL;
1854 ap_update_child_status_from_indexes(process_slot, thread_slot,
1856 : SERVER_GRACEFUL, NULL);
1858 apr_thread_exit(thd, APR_SUCCESS);
1862 static int check_signal(int signum)
1874 static void create_listener_thread(thread_starter * ts)
1876 int my_child_num = ts->child_num_arg;
1877 apr_threadattr_t *thread_attr = ts->threadattr;
1881 my_info = (proc_info *) ap_malloc(sizeof(proc_info));
1882 my_info->pslot = my_child_num;
1883 my_info->tslot = -1; /* listener thread doesn't have a thread slot */
1884 rv = apr_thread_create(&ts->listener, thread_attr, listener_thread,
1886 if (rv != APR_SUCCESS) {
1887 ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf, APLOGNO(00474)
1888 "apr_thread_create: unable to create listener thread");
1889 /* let the parent decide how bad this really is */
1890 clean_child_exit(APEXIT_CHILDSICK);
1892 apr_os_thread_get(&listener_os_thread, ts->listener);
1895 /* XXX under some circumstances not understood, children can get stuck
1896 * in start_threads forever trying to take over slots which will
1897 * never be cleaned up; for now there is an APLOG_DEBUG message issued
1898 * every so often when this condition occurs
1900 static void *APR_THREAD_FUNC start_threads(apr_thread_t * thd, void *dummy)
1902 thread_starter *ts = dummy;
1903 apr_thread_t **threads = ts->threads;
1904 apr_threadattr_t *thread_attr = ts->threadattr;
1905 int my_child_num = ts->child_num_arg;
1909 int threads_created = 0;
1910 int listener_started = 0;
1912 int prev_threads_created;
1913 int max_recycled_pools = -1;
1914 int good_methods[] = {APR_POLLSET_KQUEUE, APR_POLLSET_PORT, APR_POLLSET_EPOLL};
1916 /* We must create the fd queues before we start up the listener
1917 * and worker threads. */
1918 worker_queue = apr_pcalloc(pchild, sizeof(*worker_queue));
1919 rv = ap_queue_init(worker_queue, threads_per_child, pchild);
1920 if (rv != APR_SUCCESS) {
1921 ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf, APLOGNO(03100)
1922 "ap_queue_init() failed");
1923 clean_child_exit(APEXIT_CHILDFATAL);
1926 if (ap_max_mem_free != APR_ALLOCATOR_MAX_FREE_UNLIMITED) {
1927 /* If we want to conserve memory, let's not keep an unlimited number of
1928 * pools & allocators.
1929 * XXX: This should probably be a separate config directive
1931 max_recycled_pools = threads_per_child * 3 / 4 ;
1933 rv = ap_queue_info_create(&worker_queue_info, pchild,
1934 threads_per_child, max_recycled_pools);
1935 if (rv != APR_SUCCESS) {
1936 ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf, APLOGNO(03101)
1937 "ap_queue_info_create() failed");
1938 clean_child_exit(APEXIT_CHILDFATAL);
1941 /* Create the timeout mutex and main pollset before the listener
1944 rv = apr_thread_mutex_create(&timeout_mutex, APR_THREAD_MUTEX_DEFAULT,
1946 if (rv != APR_SUCCESS) {
1947 ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(03102)
1948 "creation of the timeout mutex failed.");
1949 clean_child_exit(APEXIT_CHILDFATAL);
1952 /* Create the main pollset */
1953 for (i = 0; i < sizeof(good_methods) / sizeof(good_methods[0]); i++) {
1954 rv = apr_pollset_create_ex(&event_pollset,
1955 threads_per_child*2, /* XXX don't we need more, to handle
1956 * connections in K-A or lingering
1959 pchild, APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY | APR_POLLSET_NODEFAULT,
1961 if (rv == APR_SUCCESS) {
1965 if (rv != APR_SUCCESS) {
1966 rv = apr_pollset_create(&event_pollset,
1967 threads_per_child*2, /* XXX don't we need more, to handle
1968 * connections in K-A or lingering
1971 pchild, APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY);
1973 if (rv != APR_SUCCESS) {
1974 ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(03103)
1975 "apr_pollset_create with Thread Safety failed.");
1976 clean_child_exit(APEXIT_CHILDFATAL);
1979 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(02471)
1980 "start_threads: Using %s", apr_pollset_method_name(event_pollset));
1981 worker_sockets = apr_pcalloc(pchild, threads_per_child
1982 * sizeof(apr_socket_t *));
1984 loops = prev_threads_created = 0;
1986 /* threads_per_child does not include the listener thread */
1987 for (i = 0; i < threads_per_child; i++) {
1989 ap_scoreboard_image->servers[my_child_num][i].status;
1991 if (status != SERVER_DEAD) {
1995 my_info = (proc_info *) ap_malloc(sizeof(proc_info));
1996 my_info->pslot = my_child_num;
1999 /* We are creating threads right now */
2000 ap_update_child_status_from_indexes(my_child_num, i,
2001 SERVER_STARTING, NULL);
2002 /* We let each thread update its own scoreboard entry. This is
2003 * done because it lets us deal with tid better.
2005 rv = apr_thread_create(&threads[i], thread_attr,
2006 worker_thread, my_info, pchild);
2007 if (rv != APR_SUCCESS) {
2008 ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
2010 "apr_thread_create: unable to create worker thread");
2011 /* let the parent decide how bad this really is */
2012 clean_child_exit(APEXIT_CHILDSICK);
2017 /* Start the listener only when there are workers available */
2018 if (!listener_started && threads_created) {
2019 create_listener_thread(ts);
2020 listener_started = 1;
2024 if (start_thread_may_exit || threads_created == threads_per_child) {
2027 /* wait for previous generation to clean up an entry */
2028 apr_sleep(apr_time_from_sec(1));
2030 if (loops % 120 == 0) { /* every couple of minutes */
2031 if (prev_threads_created == threads_created) {
2032 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
2033 "child %" APR_PID_T_FMT " isn't taking over "
2034 "slots very quickly (%d of %d)",
2035 ap_my_pid, threads_created,
2038 prev_threads_created = threads_created;
2042 /* What state should this child_main process be listed as in the
2044 * ap_update_child_status_from_indexes(my_child_num, i, SERVER_STARTING,
2045 * (request_rec *) NULL);
2047 * This state should be listed separately in the scoreboard, in some kind
2048 * of process_status, not mixed in with the worker threads' status.
2049 * "life_status" is almost right, but it's in the worker's structure, and
2050 * the name could be clearer. gla
2052 apr_thread_exit(thd, APR_SUCCESS);
2056 static void join_workers(apr_thread_t * listener, apr_thread_t ** threads)
2059 apr_status_t rv, thread_rv;
2064 /* deal with a rare timing window which affects waking up the
2065 * listener thread... if the signal sent to the listener thread
2066 * is delivered between the time it verifies that the
2067 * listener_may_exit flag is clear and the time it enters a
2068 * blocking syscall, the signal didn't do any good... work around
2069 * that by sleeping briefly and sending it again
2073 while (iter < 10 && !dying) {
2074 /* listener has not stopped accepting yet */
2075 apr_sleep(apr_time_make(0, 500000));
2080 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00475)
2081 "the listener thread didn't stop accepting");
2084 rv = apr_thread_join(&thread_rv, listener);
2085 if (rv != APR_SUCCESS) {
2086 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf, APLOGNO(00476)
2087 "apr_thread_join: unable to join listener thread");
2092 for (i = 0; i < threads_per_child; i++) {
2093 if (threads[i]) { /* if we ever created this thread */
2094 rv = apr_thread_join(&thread_rv, threads[i]);
2095 if (rv != APR_SUCCESS) {
2096 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf, APLOGNO(00477)
2097 "apr_thread_join: unable to join worker "
2104 static void join_start_thread(apr_thread_t * start_thread_id)
2106 apr_status_t rv, thread_rv;
2108 start_thread_may_exit = 1; /* tell it to give up in case it is still
2109 * trying to take over slots from a
2110 * previous generation
2112 rv = apr_thread_join(&thread_rv, start_thread_id);
2113 if (rv != APR_SUCCESS) {
2114 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf, APLOGNO(00478)
2115 "apr_thread_join: unable to join the start " "thread");
2119 static void child_main(int child_num_arg, int child_bucket)
2121 apr_thread_t **threads;
2124 apr_threadattr_t *thread_attr;
2125 apr_thread_t *start_thread_id;
2128 /* for benefit of any hooks that run as this child initializes */
2129 retained->mpm->mpm_state = AP_MPMQ_STARTING;
2131 ap_my_pid = getpid();
2132 ap_fatal_signal_child_setup(ap_server_conf);
2133 apr_pool_create(&pchild, pconf);
2135 /* close unused listeners and pods */
2136 for (i = 0; i < retained->mpm->num_buckets; i++) {
2137 if (i != child_bucket) {
2138 ap_close_listeners_ex(all_buckets[i].listeners);
2139 ap_mpm_podx_close(all_buckets[i].pod);
2143 /*stuff to do before we switch id's, so we have permissions. */
2144 ap_reopen_scoreboard(pchild, NULL, 0);
2146 if (ap_run_drop_privileges(pchild, ap_server_conf)) {
2147 clean_child_exit(APEXIT_CHILDFATAL);
2150 apr_thread_mutex_create(&g_timer_skiplist_mtx, APR_THREAD_MUTEX_DEFAULT, pchild);
2151 APR_RING_INIT(&timer_free_ring, timer_event_t, link);
2152 apr_skiplist_init(&timer_skiplist, pchild);
2153 apr_skiplist_set_compare(timer_skiplist, timer_comp, timer_comp);
2154 ap_run_child_init(pchild, ap_server_conf);
2156 /* done with init critical section */
2158 /* Just use the standard apr_setup_signal_thread to block all signals
2159 * from being received. The child processes no longer use signals for
2160 * any communication with the parent process.
2162 rv = apr_setup_signal_thread();
2163 if (rv != APR_SUCCESS) {
2164 ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf, APLOGNO(00479)
2165 "Couldn't initialize signal thread");
2166 clean_child_exit(APEXIT_CHILDFATAL);
2169 if (ap_max_requests_per_child) {
2170 conns_this_child = ap_max_requests_per_child;
2173 /* coding a value of zero means infinity */
2174 conns_this_child = APR_INT32_MAX;
2177 /* Setup worker threads */
2179 /* clear the storage; we may not create all our threads immediately,
2180 * and we want a 0 entry to indicate a thread which was not created
2182 threads = ap_calloc(threads_per_child, sizeof(apr_thread_t *));
2183 ts = apr_palloc(pchild, sizeof(*ts));
2185 apr_threadattr_create(&thread_attr, pchild);
2186 /* 0 means PTHREAD_CREATE_JOINABLE */
2187 apr_threadattr_detach_set(thread_attr, 0);
2189 if (ap_thread_stacksize != 0) {
2190 rv = apr_threadattr_stacksize_set(thread_attr, ap_thread_stacksize);
2191 if (rv != APR_SUCCESS && rv != APR_ENOTIMPL) {
2192 ap_log_error(APLOG_MARK, APLOG_WARNING, rv, ap_server_conf, APLOGNO(02436)
2193 "WARNING: ThreadStackSize of %" APR_SIZE_T_FMT " is "
2194 "inappropriate, using default",
2195 ap_thread_stacksize);
2199 ts->threads = threads;
2200 ts->listener = NULL;
2201 ts->child_num_arg = child_num_arg;
2202 ts->threadattr = thread_attr;
2204 rv = apr_thread_create(&start_thread_id, thread_attr, start_threads,
2206 if (rv != APR_SUCCESS) {
2207 ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf, APLOGNO(00480)
2208 "apr_thread_create: unable to create worker thread");
2209 /* let the parent decide how bad this really is */
2210 clean_child_exit(APEXIT_CHILDSICK);
2213 retained->mpm->mpm_state = AP_MPMQ_RUNNING;
2215 /* If we are only running in one_process mode, we will want to
2216 * still handle signals. */
2218 /* Block until we get a terminating signal. */
2219 apr_signal_thread(check_signal);
2220 /* make sure the start thread has finished; signal_threads()
2221 * and join_workers() depend on that
2223 /* XXX join_start_thread() won't be awakened if one of our
2224 * threads encounters a critical error and attempts to
2225 * shutdown this child
2227 join_start_thread(start_thread_id);
2229 /* helps us terminate a little more quickly than the dispatch of the
2230 * signal thread; beats the Pipe of Death and the browsers
2232 signal_threads(ST_UNGRACEFUL);
2234 /* A terminating signal was received. Now join each of the
2235 * workers to clean them up.
2236 * If the worker already exited, then the join frees
2237 * their resources and returns.
2238 * If the worker hasn't exited, then this blocks until
2239 * they have (then cleans up).
2241 join_workers(ts->listener, threads);
2243 else { /* !one_process */
2244 /* remove SIGTERM from the set of blocked signals... if one of
2245 * the other threads in the process needs to take us down
2246 * (e.g., for MaxConnectionsPerChild) it will send us SIGTERM
2248 unblock_signal(SIGTERM);
2249 apr_signal(SIGTERM, dummy_signal_handler);
2250 /* Watch for any messages from the parent over the POD */
2252 rv = ap_mpm_podx_check(my_bucket->pod);
2253 if (rv == AP_MPM_PODX_NORESTART) {
2254 /* see if termination was triggered while we slept */
2255 switch (terminate_mode) {
2257 rv = AP_MPM_PODX_GRACEFUL;
2260 rv = AP_MPM_PODX_RESTART;
2264 if (rv == AP_MPM_PODX_GRACEFUL || rv == AP_MPM_PODX_RESTART) {
2265 /* make sure the start thread has finished;
2266 * signal_threads() and join_workers depend on that
2268 join_start_thread(start_thread_id);
2269 signal_threads(rv ==
2270 AP_MPM_PODX_GRACEFUL ? ST_GRACEFUL : ST_UNGRACEFUL);
2275 /* A terminating signal was received. Now join each of the
2276 * workers to clean them up.
2277 * If the worker already exited, then the join frees
2278 * their resources and returns.
2279 * If the worker hasn't exited, then this blocks until
2280 * they have (then cleans up).
2282 join_workers(ts->listener, threads);
2287 clean_child_exit(resource_shortage ? APEXIT_CHILDSICK : 0);
2290 static int make_child(server_rec * s, int slot, int bucket)
2294 if (slot + 1 > retained->max_daemons_limit) {
2295 retained->max_daemons_limit = slot + 1;
2298 if (ap_scoreboard_image->parent[slot].pid != 0) {
2299 /* XXX replace with assert or remove ? */
2300 ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(03455)
2301 "BUG: Scoreboard slot %d should be empty but is "
2302 "in use by pid %" APR_PID_T_FMT,
2303 slot, ap_scoreboard_image->parent[slot].pid);
2308 my_bucket = &all_buckets[0];
2310 event_note_child_started(slot, getpid());
2311 child_main(slot, 0);
2317 if ((pid = fork()) == -1) {
2318 ap_log_error(APLOG_MARK, APLOG_ERR, errno, s, APLOGNO(00481)
2319 "fork: Unable to fork new process");
2321 /* fork didn't succeed. There's no need to touch the scoreboard;
2322 * if we were trying to replace a failed child process, then
2323 * server_main_loop() marked its workers SERVER_DEAD, and if
2324 * we were trying to replace a child process that exited normally,
2325 * its worker_thread()s left SERVER_DEAD or SERVER_GRACEFUL behind.
2328 /* In case system resources are maxxed out, we don't want
2329 Apache running away with the CPU trying to fork over and
2330 over and over again. */
2331 apr_sleep(apr_time_from_sec(10));
2337 my_bucket = &all_buckets[bucket];
2339 #ifdef HAVE_BINDPROCESSOR
2340 /* By default, AIX binds to a single processor. This bit unbinds
2341 * children which will then bind to another CPU.
2343 int status = bindprocessor(BINDPROCESS, (int) getpid(),
2344 PROCESSOR_CLASS_ANY);
2346 ap_log_error(APLOG_MARK, APLOG_DEBUG, errno,
2347 ap_server_conf, APLOGNO(00482)
2348 "processor unbind failed");
2350 RAISE_SIGSTOP(MAKE_CHILD);
2352 apr_signal(SIGTERM, just_die);
2353 child_main(slot, bucket);
2359 ap_scoreboard_image->parent[slot].quiescing = 0;
2360 ap_scoreboard_image->parent[slot].not_accepting = 0;
2361 ap_scoreboard_image->parent[slot].bucket = bucket;
2362 event_note_child_started(slot, pid);
2364 retained->total_daemons++;
2368 /* start up a bunch of children */
2369 static void startup_children(int number_to_start)
2373 for (i = 0; number_to_start && i < server_limit; ++i) {
2374 if (ap_scoreboard_image->parent[i].pid != 0) {
2377 if (make_child(ap_server_conf, i, i % retained->mpm->num_buckets) < 0) {
2384 static void perform_idle_server_maintenance(int child_bucket, int num_buckets)
2387 int idle_thread_count = 0;
2390 int free_length = 0;
2391 int free_slots[MAX_SPAWN_RATE];
2392 int last_non_dead = -1;
2393 int active_thread_count = 0;
2395 for (i = 0; i < server_limit; ++i) {
2396 /* Initialization to satisfy the compiler. It doesn't know
2397 * that threads_per_child is always > 0 */
2398 int status = SERVER_DEAD;
2399 int child_threads_active = 0;
2401 if (i >= retained->max_daemons_limit &&
2402 free_length == retained->idle_spawn_rate[child_bucket]) {
2403 /* short cut if all active processes have been examined and
2404 * enough empty scoreboard slots have been found
2409 ps = &ap_scoreboard_image->parent[i];
2411 for (j = 0; j < threads_per_child; j++) {
2412 ws = &ap_scoreboard_image->servers[i][j];
2413 status = ws->status;
2415 /* We consider a starting server as idle because we started it
2416 * at least a cycle ago, and if it still hasn't finished starting
2417 * then we're just going to swamp things worse by forking more.
2418 * So we hopefully won't need to fork more if we count it.
2419 * This depends on the ordering of SERVER_READY and SERVER_STARTING.
2421 if (status <= SERVER_READY && !ps->quiescing && !ps->not_accepting
2422 && ps->generation == retained->mpm->my_generation
2423 && ps->bucket == child_bucket)
2425 ++idle_thread_count;
2427 if (status >= SERVER_READY && status < SERVER_GRACEFUL) {
2428 ++child_threads_active;
2433 active_thread_count += child_threads_active;
2434 if (!ps->pid && free_length < retained->idle_spawn_rate[child_bucket])
2435 free_slots[free_length++] = i;
2436 else if (child_threads_active == threads_per_child)
2437 had_healthy_child = 1;
2440 if (retained->sick_child_detected) {
2441 if (had_healthy_child) {
2442 /* Assume this is a transient error, even though it may not be. Leave
2443 * the server up in case it is able to serve some requests or the
2444 * problem will be resolved.
2446 retained->sick_child_detected = 0;
2449 /* looks like a basket case, as no child ever fully initialized; give up.
2451 retained->mpm->shutdown_pending = 1;
2453 ap_log_error(APLOG_MARK, APLOG_ALERT, 0,
2454 ap_server_conf, APLOGNO(02324)
2455 "A resource shortage or other unrecoverable failure "
2456 "was encountered before any child process initialized "
2457 "successfully... httpd is exiting!");
2458 /* the child already logged the failure details */
2463 retained->max_daemons_limit = last_non_dead + 1;
2465 if (idle_thread_count > max_spare_threads / num_buckets)
2468 * Child processes that we ask to shut down won't die immediately
2469 * but may stay around for a long time when they finish their
2470 * requests. If the server load changes many times, many such
2471 * gracefully finishing processes may accumulate, filling up the
2472 * scoreboard. To avoid running out of scoreboard entries, we
2473 * don't shut down more processes when the total number of processes
2476 * XXX It would be nice if we could
2477 * XXX - kill processes without keepalive connections first
2478 * XXX - tell children to stop accepting new connections, and
2479 * XXX depending on server load, later be able to resurrect them
2482 if (retained->total_daemons <= active_daemons_limit &&
2483 retained->total_daemons < server_limit) {
2484 /* Kill off one child */
2485 ap_mpm_podx_signal(all_buckets[child_bucket].pod,
2486 AP_MPM_PODX_GRACEFUL);
2487 retained->idle_spawn_rate[child_bucket] = 1;
2490 ap_log_error(APLOG_MARK, APLOG_TRACE5, 0, ap_server_conf,
2491 "Not shutting down child: total daemons %d / "
2492 "active limit %d / ServerLimit %d",
2493 retained->total_daemons, active_daemons_limit,
2497 else if (idle_thread_count < min_spare_threads / num_buckets) {
2498 if (active_thread_count >= max_workers) {
2499 if (!retained->maxclients_reported) {
2500 /* only report this condition once */
2501 ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(00484)
2502 "server reached MaxRequestWorkers setting, "
2503 "consider raising the MaxRequestWorkers "
2505 retained->maxclients_reported = 1;
2507 retained->idle_spawn_rate[child_bucket] = 1;
2509 else if (free_length == 0) { /* scoreboard is full, can't fork */
2510 ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(03490)
2511 "scoreboard is full, not at MaxRequestWorkers."
2512 "Increase ServerLimit.");
2513 retained->idle_spawn_rate[child_bucket] = 1;
2516 if (free_length > retained->idle_spawn_rate[child_bucket]) {
2517 free_length = retained->idle_spawn_rate[child_bucket];
2519 if (retained->idle_spawn_rate[child_bucket] >= 8) {
2520 ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, APLOGNO(00486)
2521 "server seems busy, (you may need "
2522 "to increase StartServers, ThreadsPerChild "
2523 "or Min/MaxSpareThreads), "
2524 "spawning %d children, there are around %d idle "
2525 "threads, %d active children, and %d children "
2526 "that are shutting down", free_length,
2527 idle_thread_count, active_daemons,
2528 retained->total_daemons);
2530 for (i = 0; i < free_length; ++i) {
2531 ap_log_error(APLOG_MARK, APLOG_TRACE5, 0, ap_server_conf,
2532 "Spawning new child: slot %d active / "
2533 "total daemons: %d/%d",
2534 free_slots[i], active_daemons,
2535 retained->total_daemons);
2536 make_child(ap_server_conf, free_slots[i], child_bucket);
2538 /* the next time around we want to spawn twice as many if this
2539 * wasn't good enough, but not if we've just done a graceful
2541 if (retained->hold_off_on_exponential_spawning) {
2542 --retained->hold_off_on_exponential_spawning;
2544 else if (retained->idle_spawn_rate[child_bucket]
2545 < MAX_SPAWN_RATE / num_buckets) {
2546 retained->idle_spawn_rate[child_bucket] *= 2;
2551 retained->idle_spawn_rate[child_bucket] = 1;
2555 static void server_main_loop(int remaining_children_to_start, int num_buckets)
2558 apr_exit_why_e exitwhy;
2559 int status, processed_status;
2563 while (!retained->mpm->restart_pending && !retained->mpm->shutdown_pending) {
2564 ap_wait_or_timeout(&exitwhy, &status, &pid, pconf, ap_server_conf);
2566 if (pid.pid != -1) {
2567 processed_status = ap_process_child_status(&pid, exitwhy, status);
2568 child_slot = ap_find_child_by_pid(&pid);
2569 if (processed_status == APEXIT_CHILDFATAL) {
2570 /* fix race condition found in PR 39311
2571 * A child created at the same time as a graceful happens
2572 * can find the lock missing and create a fatal error.
2573 * It is not fatal for the last generation to be in this state.
2576 || ap_get_scoreboard_process(child_slot)->generation
2577 == retained->mpm->my_generation) {
2578 retained->mpm->shutdown_pending = 1;
2581 * total_daemons counting will be off now, but as we
2582 * are shutting down, that is not an issue anymore.
2587 ap_log_error(APLOG_MARK, APLOG_WARNING, 0, ap_server_conf, APLOGNO(00487)
2588 "Ignoring fatal error in child of previous "
2589 "generation (pid %ld).",
2591 retained->sick_child_detected = 1;
2594 else if (processed_status == APEXIT_CHILDSICK) {
2595 /* tell perform_idle_server_maintenance to check into this
2596 * on the next timer pop
2598 retained->sick_child_detected = 1;
2600 /* non-fatal death... note that it's gone in the scoreboard. */
2601 if (child_slot >= 0) {
2604 for (i = 0; i < threads_per_child; i++)
2605 ap_update_child_status_from_indexes(child_slot, i,
2608 event_note_child_killed(child_slot, 0, 0);
2609 ps = &ap_scoreboard_image->parent[child_slot];
2613 /* NOTE: We don't dec in the (child_slot < 0) case! */
2614 retained->total_daemons--;
2615 if (processed_status == APEXIT_CHILDSICK) {
2616 /* resource shortage, minimize the fork rate */
2617 retained->idle_spawn_rate[ps->bucket] = 1;
2619 else if (remaining_children_to_start) {
2620 /* we're still doing a 1-for-1 replacement of dead
2621 * children with new children
2623 make_child(ap_server_conf, child_slot, ps->bucket);
2624 --remaining_children_to_start;
2627 #if APR_HAS_OTHER_CHILD
2628 else if (apr_proc_other_child_alert(&pid, APR_OC_REASON_DEATH,
2633 else if (retained->mpm->was_graceful) {
2634 /* Great, we've probably just lost a slot in the
2635 * scoreboard. Somehow we don't know about this child.
2637 ap_log_error(APLOG_MARK, APLOG_WARNING, 0,
2638 ap_server_conf, APLOGNO(00488)
2639 "long lost child came home! (pid %ld)",
2642 /* Don't perform idle maintenance when a child dies,
2643 * only do it when there's a timeout. Remember only a
2644 * finite number of children can die, and it's pretty
2645 * pathological for a lot to die suddenly.
2649 else if (remaining_children_to_start) {
2650 /* we hit a 1 second timeout in which none of the previous
2651 * generation of children needed to be reaped... so assume
2652 * they're all done, and pick up the slack if any is left.
2654 startup_children(remaining_children_to_start);
2655 remaining_children_to_start = 0;
2656 /* In any event we really shouldn't do the code below because
2657 * few of the servers we just started are in the IDLE state
2658 * yet, so we'd mistakenly create an extra server.
2663 for (i = 0; i < num_buckets; i++) {
2664 perform_idle_server_maintenance(i, num_buckets);
2669 static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
2671 int num_buckets = retained->mpm->num_buckets;
2672 int remaining_children_to_start;
2675 ap_log_pid(pconf, ap_pid_fname);
2677 if (!retained->mpm->was_graceful) {
2678 if (ap_run_pre_mpm(s->process->pool, SB_SHARED) != OK) {
2679 retained->mpm->mpm_state = AP_MPMQ_STOPPING;
2682 /* fix the generation number in the global score; we just got a new,
2683 * cleared scoreboard
2685 ap_scoreboard_image->global->running_generation = retained->mpm->my_generation;
2689 ap_fatal_signal_setup(ap_server_conf, pconf);
2691 ap_unixd_mpm_set_signals(pconf, one_process);
2693 /* Don't thrash since num_buckets depends on the
2694 * system and the number of online CPU cores...
2696 if (active_daemons_limit < num_buckets)
2697 active_daemons_limit = num_buckets;
2698 if (ap_daemons_to_start < num_buckets)
2699 ap_daemons_to_start = num_buckets;
2700 /* We want to create as much children at a time as the number of buckets,
2701 * so to optimally accept connections (evenly distributed across buckets).
2702 * Thus min_spare_threads should at least maintain num_buckets children,
2703 * and max_spare_threads allow num_buckets more children w/o triggering
2704 * immediately (e.g. num_buckets idle threads margin, one per bucket).
2706 if (min_spare_threads < threads_per_child * (num_buckets - 1) + num_buckets)
2707 min_spare_threads = threads_per_child * (num_buckets - 1) + num_buckets;
2708 if (max_spare_threads < min_spare_threads + (threads_per_child + 1) * num_buckets)
2709 max_spare_threads = min_spare_threads + (threads_per_child + 1) * num_buckets;
2711 /* If we're doing a graceful_restart then we're going to see a lot
2712 * of children exiting immediately when we get into the main loop
2713 * below (because we just sent them AP_SIG_GRACEFUL). This happens pretty
2714 * rapidly... and for each one that exits we may start a new one, until
2715 * there are at least min_spare_threads idle threads, counting across
2716 * all children. But we may be permitted to start more children than
2717 * that, so we'll just keep track of how many we're
2718 * supposed to start up without the 1 second penalty between each fork.
2720 remaining_children_to_start = ap_daemons_to_start;
2721 if (remaining_children_to_start > active_daemons_limit) {
2722 remaining_children_to_start = active_daemons_limit;
2724 if (!retained->mpm->was_graceful) {
2725 startup_children(remaining_children_to_start);
2726 remaining_children_to_start = 0;
2729 /* give the system some time to recover before kicking into
2730 * exponential mode */
2731 retained->hold_off_on_exponential_spawning = 10;
2734 ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00489)
2735 "%s configured -- resuming normal operations",
2736 ap_get_server_description());
2737 ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, APLOGNO(00490)
2738 "Server built: %s", ap_get_server_built());
2739 ap_log_command_line(plog, s);
2740 ap_log_mpm_common(s);
2742 retained->mpm->mpm_state = AP_MPMQ_RUNNING;
2744 server_main_loop(remaining_children_to_start, num_buckets);
2745 retained->mpm->mpm_state = AP_MPMQ_STOPPING;
2747 if (retained->mpm->shutdown_pending && retained->mpm->is_ungraceful) {
2748 /* Time to shut down:
2749 * Kill child processes, tell them to call child_exit, etc...
2751 for (i = 0; i < num_buckets; i++) {
2752 ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
2753 AP_MPM_PODX_RESTART);
2755 ap_reclaim_child_processes(1, /* Start with SIGTERM */
2756 event_note_child_killed);
2759 /* cleanup pid file on normal shutdown */
2760 ap_remove_pid(pconf, ap_pid_fname);
2761 ap_log_error(APLOG_MARK, APLOG_NOTICE, 0,
2762 ap_server_conf, APLOGNO(00491) "caught SIGTERM, shutting down");
2768 if (retained->mpm->shutdown_pending) {
2769 /* Time to gracefully shut down:
2770 * Kill child processes, tell them to call child_exit, etc...
2772 int active_children;
2774 apr_time_t cutoff = 0;
2776 /* Close our listeners, and then ask our children to do same */
2777 ap_close_listeners();
2778 for (i = 0; i < num_buckets; i++) {
2779 ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
2780 AP_MPM_PODX_GRACEFUL);
2782 ap_relieve_child_processes(event_note_child_killed);
2785 /* cleanup pid file on normal shutdown */
2786 ap_remove_pid(pconf, ap_pid_fname);
2787 ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00492)
2788 "caught " AP_SIG_GRACEFUL_STOP_STRING
2789 ", shutting down gracefully");
2792 if (ap_graceful_shutdown_timeout) {
2793 cutoff = apr_time_now() +
2794 apr_time_from_sec(ap_graceful_shutdown_timeout);
2797 /* Don't really exit until each child has finished */
2798 retained->mpm->shutdown_pending = 0;
2800 /* Pause for a second */
2801 apr_sleep(apr_time_from_sec(1));
2803 /* Relieve any children which have now exited */
2804 ap_relieve_child_processes(event_note_child_killed);
2806 active_children = 0;
2807 for (index = 0; index < retained->max_daemons_limit; ++index) {
2808 if (ap_mpm_safe_kill(MPM_CHILD_PID(index), 0) == APR_SUCCESS) {
2809 active_children = 1;
2810 /* Having just one child is enough to stay around */
2814 } while (!retained->mpm->shutdown_pending && active_children &&
2815 (!ap_graceful_shutdown_timeout || apr_time_now() < cutoff));
2817 /* We might be here because we received SIGTERM, either
2818 * way, try and make sure that all of our processes are
2821 for (i = 0; i < num_buckets; i++) {
2822 ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
2823 AP_MPM_PODX_RESTART);
2825 ap_reclaim_child_processes(1, event_note_child_killed);
2830 /* we've been told to restart */
2832 /* not worth thinking about */
2836 /* advance to the next generation */
2837 /* XXX: we really need to make sure this new generation number isn't in
2838 * use by any of the children.
2840 ++retained->mpm->my_generation;
2841 ap_scoreboard_image->global->running_generation = retained->mpm->my_generation;
2843 if (!retained->mpm->is_ungraceful) {
2844 ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00493)
2845 AP_SIG_GRACEFUL_STRING
2846 " received. Doing graceful restart");
2847 /* wake up the children...time to die. But we'll have more soon */
2848 for (i = 0; i < num_buckets; i++) {
2849 ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
2850 AP_MPM_PODX_GRACEFUL);
2853 /* This is mostly for debugging... so that we know what is still
2854 * gracefully dealing with existing request.
2859 /* Kill 'em all. Since the child acts the same on the parents SIGTERM
2860 * and a SIGHUP, we may as well use the same signal, because some user
2861 * pthreads are stealing signals from us left and right.
2863 for (i = 0; i < num_buckets; i++) {
2864 ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
2865 AP_MPM_PODX_RESTART);
2868 ap_reclaim_child_processes(1, /* Start with SIGTERM */
2869 event_note_child_killed);
2870 ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00494)
2871 "SIGHUP received. Attempting to restart");
2879 static void setup_slave_conn(conn_rec *c, void *csd)
2881 event_conn_state_t *mcs;
2882 event_conn_state_t *cs;
2884 mcs = ap_get_module_config(c->master->conn_config, &mpm_event_module);
2886 cs = apr_pcalloc(c->pool, sizeof(*cs));
2892 cs->bucket_alloc = c->bucket_alloc;
2895 cs->pub.state = CONN_STATE_READ_REQUEST_LINE;
2896 cs->pub.sense = CONN_SENSE_DEFAULT;
2899 ap_set_module_config(c->conn_config, &mpm_event_module, cs);
2902 static int event_pre_connection(conn_rec *c, void *csd)
2904 if (c->master && (!c->cs || c->cs == c->master->cs)) {
2905 setup_slave_conn(c, csd);
2910 static int event_protocol_switch(conn_rec *c, request_rec *r, server_rec *s,
2911 const char *protocol)
2914 /* connection based switching of protocol, set the correct server
2915 * configuration, so that timeouts, keepalives and such are used
2916 * for the server that the connection was switched on.
2917 * Normally, we set this on post_read_request, but on a protocol
2918 * other than http/1.1, this might never happen.
2920 event_conn_state_t *cs;
2922 cs = ap_get_module_config(c->conn_config, &mpm_event_module);
2923 cs->sc = ap_get_module_config(s->module_config, &mpm_event_module);
2928 /* This really should be a post_config hook, but the error log is already
2929 * redirected by that point, so we need to do this in the open_logs phase.
2931 static int event_open_logs(apr_pool_t * p, apr_pool_t * plog,
2932 apr_pool_t * ptemp, server_rec * s)
2935 int level_flags = 0;
2936 int num_buckets = 0;
2937 ap_listen_rec **listen_buckets;
2943 /* the reverse of pre_config, we want this only the first time around */
2944 if (retained->mpm->module_loads == 1) {
2946 level_flags |= APLOG_STARTUP;
2949 if ((num_listensocks = ap_setup_listeners(ap_server_conf)) < 1) {
2950 ap_log_error(APLOG_MARK, APLOG_ALERT | level_flags, 0,
2951 (startup ? NULL : s),
2952 "no listening sockets available, shutting down");
2959 else if (retained->mpm->was_graceful) {
2960 /* Preserve the number of buckets on graceful restarts. */
2961 num_buckets = retained->mpm->num_buckets;
2963 if ((rv = ap_duplicate_listeners(pconf, ap_server_conf,
2964 &listen_buckets, &num_buckets))) {
2965 ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
2966 (startup ? NULL : s),
2967 "could not duplicate listeners");
2971 all_buckets = apr_pcalloc(pconf, num_buckets * sizeof(*all_buckets));
2972 for (i = 0; i < num_buckets; i++) {
2973 if (!one_process && /* no POD in one_process mode */
2974 (rv = ap_mpm_podx_open(pconf, &all_buckets[i].pod))) {
2975 ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
2976 (startup ? NULL : s),
2977 "could not open pipe-of-death");
2980 all_buckets[i].listeners = listen_buckets[i];
2983 if (retained->mpm->max_buckets < num_buckets) {
2984 int new_max, *new_ptr;
2985 new_max = retained->mpm->max_buckets * 2;
2986 if (new_max < num_buckets) {
2987 new_max = num_buckets;
2989 new_ptr = (int *)apr_palloc(ap_pglobal, new_max * sizeof(int));
2990 memcpy(new_ptr, retained->idle_spawn_rate,
2991 retained->mpm->num_buckets * sizeof(int));
2992 retained->idle_spawn_rate = new_ptr;
2993 retained->mpm->max_buckets = new_max;
2995 if (retained->mpm->num_buckets < num_buckets) {
2997 /* If new buckets are added, set their idle spawn rate to
2998 * the highest so far, so that they get filled as quickly
2999 * as the existing ones.
3001 for (i = 0; i < retained->mpm->num_buckets; i++) {
3002 if (rate_max < retained->idle_spawn_rate[i]) {
3003 rate_max = retained->idle_spawn_rate[i];
3006 for (/* up to date i */; i < num_buckets; i++) {
3007 retained->idle_spawn_rate[i] = rate_max;
3010 retained->mpm->num_buckets = num_buckets;
3013 srand((unsigned int)apr_time_now());
3017 static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
3020 int no_detach, debug, foreground;
3022 const char *userdata_key = "mpm_event_module";
3023 int test_atomics = 0;
3025 debug = ap_exists_config_define("DEBUG");
3028 foreground = one_process = 1;
3032 one_process = ap_exists_config_define("ONE_PROCESS");
3033 no_detach = ap_exists_config_define("NO_DETACH");
3034 foreground = ap_exists_config_define("FOREGROUND");
3037 retained = ap_retained_data_get(userdata_key);
3039 retained = ap_retained_data_create(userdata_key, sizeof(*retained));
3040 retained->mpm = ap_unixd_mpm_get_retained_data();
3041 retained->max_daemons_limit = -1;
3042 if (retained->mpm->module_loads) {
3046 retained->mpm->mpm_state = AP_MPMQ_STARTING;
3047 if (retained->mpm->baton != retained) {
3048 retained->mpm->was_graceful = 0;
3049 retained->mpm->baton = retained;
3051 ++retained->mpm->module_loads;
3053 /* test once for correct operation of fdqueue */
3054 if (test_atomics || retained->mpm->module_loads == 2) {
3055 static apr_uint32_t foo1, foo2;
3057 apr_atomic_set32(&foo1, 100);
3058 foo2 = apr_atomic_add32(&foo1, -10);
3059 if (foo2 != 100 || foo1 != 90) {
3060 ap_log_error(APLOG_MARK, APLOG_CRIT, 0, NULL, APLOGNO(02405)
3061 "atomics not working as expected - add32 of negative number");
3062 return HTTP_INTERNAL_SERVER_ERROR;
3066 /* sigh, want this only the second time around */
3067 if (retained->mpm->module_loads == 2) {
3068 rv = apr_pollset_create(&event_pollset, 1, plog,
3069 APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY);
3070 if (rv != APR_SUCCESS) {
3071 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL, APLOGNO(00495)
3072 "Couldn't create a Thread Safe Pollset. "
3073 "Is it supported on your platform?"
3074 "Also check system or user limits!");
3075 return HTTP_INTERNAL_SERVER_ERROR;
3077 apr_pollset_destroy(event_pollset);
3079 if (!one_process && !foreground) {
3080 /* before we detach, setup crash handlers to log to errorlog */
3081 ap_fatal_signal_setup(ap_server_conf, pconf);
3082 rv = apr_proc_detach(no_detach ? APR_PROC_DETACH_FOREGROUND
3083 : APR_PROC_DETACH_DAEMONIZE);
3084 if (rv != APR_SUCCESS) {
3085 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL, APLOGNO(00496)
3086 "apr_proc_detach failed");
3087 return HTTP_INTERNAL_SERVER_ERROR;
3092 parent_pid = ap_my_pid = getpid();
3094 ap_listen_pre_config();
3095 ap_daemons_to_start = DEFAULT_START_DAEMON;
3096 min_spare_threads = DEFAULT_MIN_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD;
3097 max_spare_threads = DEFAULT_MAX_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD;
3098 server_limit = DEFAULT_SERVER_LIMIT;
3099 thread_limit = DEFAULT_THREAD_LIMIT;
3100 active_daemons_limit = server_limit;
3101 threads_per_child = DEFAULT_THREADS_PER_CHILD;
3102 max_workers = active_daemons_limit * threads_per_child;
3103 had_healthy_child = 0;
3104 ap_extended_status = 0;
3109 static int event_post_config(apr_pool_t *pconf, apr_pool_t *plog,
3110 apr_pool_t *ptemp, server_rec *s)
3113 struct timeout_queue *tail, *q;
3117 /* Not needed in pre_config stage */
3118 if (ap_state_query(AP_SQ_MAIN_STATE) == AP_SQ_MS_CREATE_PRE_CONFIG) {
3122 wc.tail = ka.tail = NULL;
3123 wc.hash = apr_hash_make(ptemp);
3124 ka.hash = apr_hash_make(ptemp);
3126 TO_QUEUE_INIT(linger_q, pconf,
3127 apr_time_from_sec(MAX_SECS_TO_LINGER), NULL);
3128 TO_QUEUE_INIT(short_linger_q, pconf,
3129 apr_time_from_sec(SECONDS_TO_LINGER), NULL);
3131 for (; s; s = s->next) {
3132 event_srv_cfg *sc = apr_pcalloc(pconf, sizeof *sc);
3134 ap_set_module_config(s->module_config, &mpm_event_module, sc);
3136 /* The main server uses the global queues */
3137 TO_QUEUE_INIT(wc.q, pconf, s->timeout, NULL);
3138 apr_hash_set(wc.hash, &s->timeout, sizeof s->timeout, wc.q);
3139 wc.tail = write_completion_q = wc.q;
3141 TO_QUEUE_INIT(ka.q, pconf, s->keep_alive_timeout, NULL);
3142 apr_hash_set(ka.hash, &s->keep_alive_timeout,
3143 sizeof s->keep_alive_timeout, ka.q);
3144 ka.tail = keepalive_q = ka.q;
3147 /* The vhosts use any existing queue with the same timeout,
3148 * or their own queue(s) if there isn't */
3149 wc.q = apr_hash_get(wc.hash, &s->timeout, sizeof s->timeout);
3151 TO_QUEUE_INIT(wc.q, pconf, s->timeout, wc.tail);
3152 apr_hash_set(wc.hash, &s->timeout, sizeof s->timeout, wc.q);
3153 wc.tail = wc.tail->next = wc.q;
3156 ka.q = apr_hash_get(ka.hash, &s->keep_alive_timeout,
3157 sizeof s->keep_alive_timeout);
3159 TO_QUEUE_INIT(ka.q, pconf, s->keep_alive_timeout, ka.tail);
3160 apr_hash_set(ka.hash, &s->keep_alive_timeout,
3161 sizeof s->keep_alive_timeout, ka.q);
3162 ka.tail = ka.tail->next = ka.q;
3172 static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
3173 apr_pool_t *ptemp, server_rec *s)
3177 /* the reverse of pre_config, we want this only the first time around */
3178 if (retained->mpm->module_loads == 1) {
3182 if (server_limit > MAX_SERVER_LIMIT) {
3184 ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00497)
3185 "WARNING: ServerLimit of %d exceeds compile-time "
3186 "limit of %d servers, decreasing to %d.",
3187 server_limit, MAX_SERVER_LIMIT, MAX_SERVER_LIMIT);
3189 ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00498)
3190 "ServerLimit of %d exceeds compile-time limit "
3191 "of %d, decreasing to match",
3192 server_limit, MAX_SERVER_LIMIT);
3194 server_limit = MAX_SERVER_LIMIT;
3196 else if (server_limit < 1) {
3198 ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00499)
3199 "WARNING: ServerLimit of %d not allowed, "
3200 "increasing to 1.", server_limit);
3202 ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00500)
3203 "ServerLimit of %d not allowed, increasing to 1",
3209 /* you cannot change ServerLimit across a restart; ignore
3212 if (!retained->first_server_limit) {
3213 retained->first_server_limit = server_limit;
3215 else if (server_limit != retained->first_server_limit) {
3216 /* don't need a startup console version here */
3217 ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00501)
3218 "changing ServerLimit to %d from original value of %d "
3219 "not allowed during restart",
3220 server_limit, retained->first_server_limit);
3221 server_limit = retained->first_server_limit;
3224 if (thread_limit > MAX_THREAD_LIMIT) {
3226 ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00502)
3227 "WARNING: ThreadLimit of %d exceeds compile-time "
3228 "limit of %d threads, decreasing to %d.",
3229 thread_limit, MAX_THREAD_LIMIT, MAX_THREAD_LIMIT);
3231 ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00503)
3232 "ThreadLimit of %d exceeds compile-time limit "
3233 "of %d, decreasing to match",
3234 thread_limit, MAX_THREAD_LIMIT);
3236 thread_limit = MAX_THREAD_LIMIT;
3238 else if (thread_limit < 1) {
3240 ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00504)
3241 "WARNING: ThreadLimit of %d not allowed, "
3242 "increasing to 1.", thread_limit);
3244 ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00505)
3245 "ThreadLimit of %d not allowed, increasing to 1",
3251 /* you cannot change ThreadLimit across a restart; ignore
3254 if (!retained->first_thread_limit) {
3255 retained->first_thread_limit = thread_limit;
3257 else if (thread_limit != retained->first_thread_limit) {
3258 /* don't need a startup console version here */
3259 ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00506)
3260 "changing ThreadLimit to %d from original value of %d "
3261 "not allowed during restart",
3262 thread_limit, retained->first_thread_limit);
3263 thread_limit = retained->first_thread_limit;
3266 if (threads_per_child > thread_limit) {
3268 ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00507)
3269 "WARNING: ThreadsPerChild of %d exceeds ThreadLimit "
3270 "of %d threads, decreasing to %d. "
3271 "To increase, please see the ThreadLimit directive.",
3272 threads_per_child, thread_limit, thread_limit);
3274 ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00508)
3275 "ThreadsPerChild of %d exceeds ThreadLimit "
3276 "of %d, decreasing to match",
3277 threads_per_child, thread_limit);
3279 threads_per_child = thread_limit;
3281 else if (threads_per_child < 1) {
3283 ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00509)
3284 "WARNING: ThreadsPerChild of %d not allowed, "
3285 "increasing to 1.", threads_per_child);
3287 ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00510)
3288 "ThreadsPerChild of %d not allowed, increasing to 1",
3291 threads_per_child = 1;
3294 if (max_workers < threads_per_child) {
3296 ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00511)
3297 "WARNING: MaxRequestWorkers of %d is less than "
3298 "ThreadsPerChild of %d, increasing to %d. "
3299 "MaxRequestWorkers must be at least as large "
3300 "as the number of threads in a single server.",
3301 max_workers, threads_per_child, threads_per_child);
3303 ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00512)
3304 "MaxRequestWorkers of %d is less than ThreadsPerChild "
3305 "of %d, increasing to match",
3306 max_workers, threads_per_child);
3308 max_workers = threads_per_child;
3311 active_daemons_limit = max_workers / threads_per_child;
3313 if (max_workers % threads_per_child) {
3314 int tmp_max_workers = active_daemons_limit * threads_per_child;
3317 ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00513)
3318 "WARNING: MaxRequestWorkers of %d is not an integer "
3319 "multiple of ThreadsPerChild of %d, decreasing to nearest "
3320 "multiple %d, for a maximum of %d servers.",
3321 max_workers, threads_per_child, tmp_max_workers,
3322 active_daemons_limit);
3324 ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00514)
3325 "MaxRequestWorkers of %d is not an integer multiple "
3326 "of ThreadsPerChild of %d, decreasing to nearest "
3327 "multiple %d", max_workers, threads_per_child,
3330 max_workers = tmp_max_workers;
3333 if (active_daemons_limit > server_limit) {
3335 ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00515)
3336 "WARNING: MaxRequestWorkers of %d would require %d servers "
3337 "and would exceed ServerLimit of %d, decreasing to %d. "
3338 "To increase, please see the ServerLimit directive.",
3339 max_workers, active_daemons_limit, server_limit,
3340 server_limit * threads_per_child);
3342 ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00516)
3343 "MaxRequestWorkers of %d would require %d servers and "
3344 "exceed ServerLimit of %d, decreasing to %d",
3345 max_workers, active_daemons_limit, server_limit,
3346 server_limit * threads_per_child);
3348 active_daemons_limit = server_limit;
3351 /* ap_daemons_to_start > active_daemons_limit checked in ap_mpm_run() */
3352 if (ap_daemons_to_start < 1) {
3354 ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00517)
3355 "WARNING: StartServers of %d not allowed, "
3356 "increasing to 1.", ap_daemons_to_start);
3358 ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00518)
3359 "StartServers of %d not allowed, increasing to 1",
3360 ap_daemons_to_start);
3362 ap_daemons_to_start = 1;
3365 if (min_spare_threads < 1) {
3367 ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00519)
3368 "WARNING: MinSpareThreads of %d not allowed, "
3369 "increasing to 1 to avoid almost certain server "
3370 "failure. Please read the documentation.",
3373 ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00520)
3374 "MinSpareThreads of %d not allowed, increasing to 1",
3377 min_spare_threads = 1;
3380 /* max_spare_threads < min_spare_threads + threads_per_child
3381 * checked in ap_mpm_run()
3387 static void event_hooks(apr_pool_t * p)
3389 /* Our open_logs hook function must run before the core's, or stderr
3390 * will be redirected to a file, and the messages won't print to the
3393 static const char *const aszSucc[] = { "core.c", NULL };
3396 ap_hook_open_logs(event_open_logs, NULL, aszSucc, APR_HOOK_REALLY_FIRST);
3397 /* we need to set the MPM state before other pre-config hooks use MPM query
3398 * to retrieve it, so register as REALLY_FIRST
3400 ap_hook_pre_config(event_pre_config, NULL, NULL, APR_HOOK_REALLY_FIRST);
3401 ap_hook_post_config(event_post_config, NULL, NULL, APR_HOOK_MIDDLE);
3402 ap_hook_check_config(event_check_config, NULL, NULL, APR_HOOK_MIDDLE);
3403 ap_hook_mpm(event_run, NULL, NULL, APR_HOOK_MIDDLE);
3404 ap_hook_mpm_query(event_query, NULL, NULL, APR_HOOK_MIDDLE);
3405 ap_hook_mpm_register_timed_callback(event_register_timed_callback, NULL, NULL,
3407 ap_hook_pre_read_request(event_pre_read_request, NULL, NULL, APR_HOOK_MIDDLE);
3408 ap_hook_post_read_request(event_post_read_request, NULL, NULL, APR_HOOK_MIDDLE);
3409 ap_hook_mpm_get_name(event_get_name, NULL, NULL, APR_HOOK_MIDDLE);
3411 ap_hook_pre_connection(event_pre_connection, NULL, NULL, APR_HOOK_REALLY_FIRST);
3412 ap_hook_protocol_switch(event_protocol_switch, NULL, NULL, APR_HOOK_REALLY_FIRST);
3415 static const char *set_daemons_to_start(cmd_parms *cmd, void *dummy,
3418 const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3423 ap_daemons_to_start = atoi(arg);
3427 static const char *set_min_spare_threads(cmd_parms * cmd, void *dummy,
3430 const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3435 min_spare_threads = atoi(arg);
3439 static const char *set_max_spare_threads(cmd_parms * cmd, void *dummy,
3442 const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3447 max_spare_threads = atoi(arg);
3451 static const char *set_max_workers(cmd_parms * cmd, void *dummy,
3454 const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3458 if (!strcasecmp(cmd->cmd->name, "MaxClients")) {
3459 ap_log_error(APLOG_MARK, APLOG_INFO, 0, NULL, APLOGNO(00521)
3460 "MaxClients is deprecated, use MaxRequestWorkers "
3463 max_workers = atoi(arg);
3467 static const char *set_threads_per_child(cmd_parms * cmd, void *dummy,
3470 const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3475 threads_per_child = atoi(arg);
3478 static const char *set_server_limit (cmd_parms *cmd, void *dummy, const char *arg)
3480 const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3485 server_limit = atoi(arg);
3489 static const char *set_thread_limit(cmd_parms * cmd, void *dummy,
3492 const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3497 thread_limit = atoi(arg);
3501 static const char *set_worker_factor(cmd_parms * cmd, void *dummy,
3506 const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3511 val = strtod(arg, &endptr);
3513 return "error parsing value";
3516 return "AsyncRequestWorkerFactor argument must be a positive number";
3518 worker_factor = val * WORKER_FACTOR_SCALE;
3519 if (worker_factor == 0)
3525 static const command_rec event_cmds[] = {
3527 AP_INIT_TAKE1("StartServers", set_daemons_to_start, NULL, RSRC_CONF,
3528 "Number of child processes launched at server startup"),
3529 AP_INIT_TAKE1("ServerLimit", set_server_limit, NULL, RSRC_CONF,
3530 "Maximum number of child processes for this run of Apache"),
3531 AP_INIT_TAKE1("MinSpareThreads", set_min_spare_threads, NULL, RSRC_CONF,
3532 "Minimum number of idle threads, to handle request spikes"),
3533 AP_INIT_TAKE1("MaxSpareThreads", set_max_spare_threads, NULL, RSRC_CONF,
3534 "Maximum number of idle threads"),
3535 AP_INIT_TAKE1("MaxClients", set_max_workers, NULL, RSRC_CONF,
3536 "Deprecated name of MaxRequestWorkers"),
3537 AP_INIT_TAKE1("MaxRequestWorkers", set_max_workers, NULL, RSRC_CONF,
3538 "Maximum number of threads alive at the same time"),
3539 AP_INIT_TAKE1("ThreadsPerChild", set_threads_per_child, NULL, RSRC_CONF,
3540 "Number of threads each child creates"),
3541 AP_INIT_TAKE1("ThreadLimit", set_thread_limit, NULL, RSRC_CONF,
3542 "Maximum number of worker threads per child process for this "
3543 "run of Apache - Upper limit for ThreadsPerChild"),
3544 AP_INIT_TAKE1("AsyncRequestWorkerFactor", set_worker_factor, NULL, RSRC_CONF,
3545 "How many additional connects will be accepted per idle "
3547 AP_GRACEFUL_SHUTDOWN_TIMEOUT_COMMAND,
3551 AP_DECLARE_MODULE(mpm_event) = {
3553 NULL, /* hook to run before apache parses args */
3554 NULL, /* create per-directory config structure */
3555 NULL, /* merge per-directory config structures */
3556 NULL, /* create per-server config structure */
3557 NULL, /* merge per-server config structures */
3558 event_cmds, /* command apr_table_t */
3559 event_hooks /* register_hooks */