granicus.if.org Git - apache/blob - server/mpm/event/event.c

   1 /* Licensed to the Apache Software Foundation (ASF) under one or more
   2  * contributor license agreements.  See the NOTICE file distributed with
   3  * this work for additional information regarding copyright ownership.
   4  * The ASF licenses this file to You under the Apache License, Version 2.0
   5  * (the "License"); you may not use this file except in compliance with
   6  * the License.  You may obtain a copy of the License at
   7  *
   8  *     http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /**
  18  * This MPM tries to fix the 'keep alive problem' in HTTP.
  19  *
  20  * After a client completes the first request, the client can keep the
  21  * connection open to send more requests with the same socket.  This can save
  22  * significant overhead in creating TCP connections.  However, the major
  23  * disadvantage is that Apache traditionally keeps an entire child
  24  * process/thread waiting for data from the client.  To solve this problem,
  25  * this MPM has a dedicated thread for handling both the Listening sockets,
  26  * and all sockets that are in a Keep Alive status.
  27  *
  28  * The MPM assumes the underlying apr_pollset implementation is somewhat
  29  * threadsafe.  This currently is only compatible with KQueue and EPoll.  This
  30  * enables the MPM to avoid extra high level locking or having to wake up the
  31  * listener thread when a keep-alive socket needs to be sent to it.
  32  *
  33  * This MPM does not perform well on older platforms that do not have very good
  34  * threading, like Linux with a 2.4 kernel, but this does not matter, since we
  35  * require EPoll or KQueue.
  36  *
  37  * For FreeBSD, use 5.3.  It is possible to run this MPM on FreeBSD 5.2.1, if
  38  * you use libkse (see `man libmap.conf`).
  39  *
  40  * For NetBSD, use at least 2.0.
  41  *
  42  * For Linux, you should use a 2.6 kernel, and make sure your glibc has epoll
  43  * support compiled in.
  44  *
  45  */
  46
  47 #include "apr.h"
  48 #include "apr_portable.h"
  49 #include "apr_strings.h"
  50 #include "apr_file_io.h"
  51 #include "apr_thread_proc.h"
  52 #include "apr_signal.h"
  53 #include "apr_thread_mutex.h"
  54 #include "apr_poll.h"
  55 #include "apr_ring.h"
  56 #include "apr_queue.h"
  57 #include "apr_atomic.h"
  58 #define APR_WANT_STRFUNC
  59 #include "apr_want.h"
  60 #include "apr_version.h"
  61
  62 #include <stdlib.h>
  63
  64 #if APR_HAVE_UNISTD_H
  65 #include <unistd.h>
  66 #endif
  67 #if APR_HAVE_SYS_SOCKET_H
  68 #include <sys/socket.h>
  69 #endif
  70 #if APR_HAVE_SYS_WAIT_H
  71 #include <sys/wait.h>
  72 #endif
  73 #ifdef HAVE_SYS_PROCESSOR_H
  74 #include <sys/processor.h>      /* for bindprocessor() */
  75 #endif
  76
  77 #if !APR_HAS_THREADS
  78 #error The Event MPM requires APR threads, but they are unavailable.
  79 #endif
  80
  81 #include "ap_config.h"
  82 #include "httpd.h"
  83 #include "http_main.h"
  84 #include "http_log.h"
  85 #include "http_config.h"        /* for read_config */
  86 #include "http_core.h"          /* for get_remote_host */
  87 #include "http_connection.h"
  88 #include "http_protocol.h"
  89 #include "ap_mpm.h"
  90 #include "mpm_common.h"
  91 #include "ap_listen.h"
  92 #include "scoreboard.h"
  93 #include "fdqueue.h"
  94 #include "mpm_default.h"
  95 #include "http_vhost.h"
  96 #include "unixd.h"
  97 #include "apr_skiplist.h"
  98
  99 #include <signal.h>
 100 #include <limits.h>             /* for INT_MAX */
 101
 102
 103 /* Limit on the total --- clients will be locked out if more servers than
 104  * this are needed.  It is intended solely to keep the server from crashing
 105  * when things get out of hand.
 106  *
 107  * We keep a hard maximum number of servers, for two reasons --- first off,
 108  * in case something goes seriously wrong, we want to stop the fork bomb
 109  * short of actually crashing the machine we're running on by filling some
 110  * kernel table.  Secondly, it keeps the size of the scoreboard file small
 111  * enough that we can read the whole thing without worrying too much about
 112  * the overhead.
 113  */
 114 #ifndef DEFAULT_SERVER_LIMIT
 115 #define DEFAULT_SERVER_LIMIT 16
 116 #endif
 117
 118 /* Admin can't tune ServerLimit beyond MAX_SERVER_LIMIT.  We want
 119  * some sort of compile-time limit to help catch typos.
 120  */
 121 #ifndef MAX_SERVER_LIMIT
 122 #define MAX_SERVER_LIMIT 20000
 123 #endif
 124
 125 /* Limit on the threads per process.  Clients will be locked out if more than
 126  * this are needed.
 127  *
 128  * We keep this for one reason it keeps the size of the scoreboard file small
 129  * enough that we can read the whole thing without worrying too much about
 130  * the overhead.
 131  */
 132 #ifndef DEFAULT_THREAD_LIMIT
 133 #define DEFAULT_THREAD_LIMIT 64
 134 #endif
 135
 136 /* Admin can't tune ThreadLimit beyond MAX_THREAD_LIMIT.  We want
 137  * some sort of compile-time limit to help catch typos.
 138  */
 139 #ifndef MAX_THREAD_LIMIT
 140 #define MAX_THREAD_LIMIT 100000
 141 #endif
 142
 143 #define MPM_CHILD_PID(i) (ap_scoreboard_image->parent[i].pid)
 144
 145 #if !APR_VERSION_AT_LEAST(1,4,0)
 146 #define apr_time_from_msec(x) (x * 1000)
 147 #endif
 148
 149 #ifndef MAX_SECS_TO_LINGER
 150 #define MAX_SECS_TO_LINGER 30
 151 #endif
 152 #define SECONDS_TO_LINGER  2
 153
 154 /*
 155  * Actual definitions of config globals
 156  */
 157
 158 #ifndef DEFAULT_WORKER_FACTOR
 159 #define DEFAULT_WORKER_FACTOR 2
 160 #endif
 161 #define WORKER_FACTOR_SCALE   16  /* scale factor to allow fractional values */
 162 static unsigned int worker_factor = DEFAULT_WORKER_FACTOR * WORKER_FACTOR_SCALE;
 163     /* AsyncRequestWorkerFactor * 16 */
 164
 165 static int threads_per_child = 0;           /* ThreadsPerChild */
 166 static int ap_daemons_to_start = 0;         /* StartServers */
 167 static int min_spare_threads = 0;           /* MinSpareThreads */
 168 static int max_spare_threads = 0;           /* MaxSpareThreads */
 169 static int active_daemons_limit = 0;        /* MaxRequestWorkers / ThreadsPerChild */
 170 static int active_daemons = 0;              /* workers that still active, i.e. are
 171                                                not shutting down gracefully */
 172 static int max_workers = 0;                 /* MaxRequestWorkers */
 173 static int server_limit = 0;                /* ServerLimit */
 174 static int thread_limit = 0;                /* ThreadLimit */
 175 static int had_healthy_child = 0;
 176 static int dying = 0;
 177 static int workers_may_exit = 0;
 178 static int start_thread_may_exit = 0;
 179 static int listener_may_exit = 0;
 180 static int listener_is_wakeable = 0;        /* Pollset supports APR_POLLSET_WAKEABLE */
 181 static int num_listensocks = 0;
 182 static apr_int32_t conns_this_child;        /* MaxConnectionsPerChild, only access
 183                                                in listener thread */
 184 static apr_uint32_t connection_count = 0;   /* Number of open connections */
 185 static apr_uint32_t lingering_count = 0;    /* Number of connections in lingering close */
 186 static apr_uint32_t suspended_count = 0;    /* Number of suspended connections */
 187 static apr_uint32_t clogged_count = 0;      /* Number of threads processing ssl conns */
 188 static apr_uint32_t threads_shutdown = 0;   /* Number of threads that have shutdown
 189                                                early during graceful termination */
 190 static int resource_shortage = 0;
 191 static fd_queue_t *worker_queue;
 192 static fd_queue_info_t *worker_queue_info;
 193
 194 static apr_thread_mutex_t *timeout_mutex;
 195
 196 module AP_MODULE_DECLARE_DATA mpm_event_module;
 197
 198 /* forward declare */
 199 struct event_srv_cfg_s;
 200 typedef struct event_srv_cfg_s event_srv_cfg;
 201
 202 static apr_pollfd_t *listener_pollfd;
 203
 204 /*
 205  * The pollset for sockets that are in any of the timeout queues. Currently
 206  * we use the timeout_mutex to make sure that connections are added/removed
 207  * atomically to/from both event_pollset and a timeout queue. Otherwise
 208  * some confusion can happen under high load if timeout queues and pollset
 209  * get out of sync.
 210  * XXX: It should be possible to make the lock unnecessary in many or even all
 211  * XXX: cases.
 212  */
 213 static apr_pollset_t *event_pollset;
 214
 215 /*
 216  * The chain of connections to be shutdown by a worker thread (deferred),
 217  * linked list updated atomically.
 218  */
 219 static event_conn_state_t *volatile defer_linger_chain;
 220
 221 struct event_conn_state_t {
 222     /** APR_RING of expiration timeouts */
 223     APR_RING_ENTRY(event_conn_state_t) timeout_list;
 224     /** the time when the entry was queued */
 225     apr_time_t queue_timestamp;
 226     /** connection record this struct refers to */
 227     conn_rec *c;
 228     /** request record (if any) this struct refers to */
 229     request_rec *r;
 230     /** server config this struct refers to */
 231     event_srv_cfg *sc;
 232     /** is the current conn_rec suspended?  (disassociated with
 233      * a particular MPM thread; for suspend_/resume_connection
 234      * hooks)
 235      */
 236     int suspended;
 237     /** memory pool to allocate from */
 238     apr_pool_t *p;
 239     /** bucket allocator */
 240     apr_bucket_alloc_t *bucket_alloc;
 241     /** poll file descriptor information */
 242     apr_pollfd_t pfd;
 243     /** public parts of the connection state */
 244     conn_state_t pub;
 245     /** chaining in defer_linger_chain */
 246     struct event_conn_state_t *chain;
 247 };
 248
 249 APR_RING_HEAD(timeout_head_t, event_conn_state_t);
 250
 251 struct timeout_queue {
 252     struct timeout_head_t head;
 253     apr_interval_time_t timeout;
 254     apr_uint32_t count;         /* for this queue */
 255     apr_uint32_t *total;        /* for all chained/related queues */
 256     struct timeout_queue *next; /* chaining */
 257 };
 258 /*
 259  * Several timeout queues that use different timeouts, so that we always can
 260  * simply append to the end.
 261  *   write_completion_q uses vhost's TimeOut
 262  *   keepalive_q        uses vhost's KeepAliveTimeOut
 263  *   linger_q           uses MAX_SECS_TO_LINGER
 264  *   short_linger_q     uses SECONDS_TO_LINGER
 265  */
 266 static struct timeout_queue *write_completion_q,
 267                             *keepalive_q,
 268                             *linger_q,
 269                             *short_linger_q;
 270 static volatile apr_time_t  queues_next_expiry;
 271
 272 /* Prevent extra poll/wakeup calls for timeouts close in the future (queues
 273  * have the granularity of a second anyway).
 274  * XXX: Wouldn't 0.5s (instead of 0.1s) be "enough"?
 275  */
 276 #define TIMEOUT_FUDGE_FACTOR apr_time_from_msec(100)
 277
 278 /*
 279  * Macros for accessing struct timeout_queue.
 280  * For TO_QUEUE_APPEND and TO_QUEUE_REMOVE, timeout_mutex must be held.
 281  */
 282 static void TO_QUEUE_APPEND(struct timeout_queue *q, event_conn_state_t *el)
 283 {
 284     apr_time_t q_expiry;
 285     apr_time_t next_expiry;
 286
 287     APR_RING_INSERT_TAIL(&q->head, el, event_conn_state_t, timeout_list);
 288     apr_atomic_inc32(q->total);
 289     ++q->count;
 290
 291     /* Cheaply update the overall queues' next expiry according to the
 292      * first entry of this queue (oldest), if necessary.
 293      */
 294     el = APR_RING_FIRST(&q->head);
 295     q_expiry = el->queue_timestamp + q->timeout;
 296     next_expiry = queues_next_expiry;
 297     if (!next_expiry || next_expiry > q_expiry + TIMEOUT_FUDGE_FACTOR) {
 298         queues_next_expiry = q_expiry;
 299         /* Unblock the poll()ing listener for it to update its timeout. */
 300         if (listener_is_wakeable) {
 301             apr_pollset_wakeup(event_pollset);
 302         }
 303     }
 304 }
 305
 306 static void TO_QUEUE_REMOVE(struct timeout_queue *q, event_conn_state_t *el)
 307 {
 308     APR_RING_REMOVE(el, timeout_list);
 309     APR_RING_ELEM_INIT(el, timeout_list);
 310     apr_atomic_dec32(q->total);
 311     --q->count;
 312 }
 313
 314 static struct timeout_queue *TO_QUEUE_MAKE(apr_pool_t *p, apr_time_t t,
 315                                            struct timeout_queue *ref)
 316 {
 317     struct timeout_queue *q;
 318
 319     q = apr_pcalloc(p, sizeof *q);
 320     APR_RING_INIT(&q->head, event_conn_state_t, timeout_list);
 321     q->total = (ref) ? ref->total : apr_pcalloc(p, sizeof *q->total);
 322     q->timeout = t;
 323
 324     return q;
 325 }
 326
 327 #define TO_QUEUE_ELEM_INIT(el) \
 328     APR_RING_ELEM_INIT((el), timeout_list)
 329
 330 /* The structure used to pass unique initialization info to each thread */
 331 typedef struct
 332 {
 333     int pslot;  /* process slot */
 334     int tslot;  /* worker slot of the thread */
 335 } proc_info;
 336
 337 /* Structure used to pass information to the thread responsible for
 338  * creating the rest of the threads.
 339  */
 340 typedef struct
 341 {
 342     apr_thread_t **threads;
 343     apr_thread_t *listener;
 344     int child_num_arg;
 345     apr_threadattr_t *threadattr;
 346 } thread_starter;
 347
 348 typedef enum
 349 {
 350     PT_CSD,
 351     PT_ACCEPT
 352 } poll_type_e;
 353
 354 typedef struct
 355 {
 356     poll_type_e type;
 357     void *baton;
 358 } listener_poll_type;
 359
 360 /* data retained by event across load/unload of the module
 361  * allocated on first call to pre-config hook; located on
 362  * subsequent calls to pre-config hook
 363  */
 364 typedef struct event_retained_data {
 365     ap_unixd_mpm_retained_data *mpm;
 366
 367     int first_server_limit;
 368     int first_thread_limit;
 369     int sick_child_detected;
 370     int maxclients_reported;
 371     /*
 372      * The max child slot ever assigned, preserved across restarts.  Necessary
 373      * to deal with MaxRequestWorkers changes across AP_SIG_GRACEFUL restarts.
 374      * We use this value to optimize routines that have to scan the entire
 375      * scoreboard.
 376      */
 377     int max_daemons_limit;
 378
 379     /*
 380      * All running workers, active and shutting down, including those that
 381      * may be left from before a graceful restart.
 382      * Not kept up-to-date when shutdown is pending.
 383      */
 384     int total_daemons;
 385
 386     /*
 387      * idle_spawn_rate is the number of children that will be spawned on the
 388      * next maintenance cycle if there aren't enough idle servers.  It is
 389      * maintained per listeners bucket, doubled up to MAX_SPAWN_RATE, and
 390      * reset only when a cycle goes by without the need to spawn.
 391      */
 392     int *idle_spawn_rate;
 393 #ifndef MAX_SPAWN_RATE
 394 #define MAX_SPAWN_RATE        (32)
 395 #endif
 396     int hold_off_on_exponential_spawning;
 397 } event_retained_data;
 398 static event_retained_data *retained;
 399
 400 typedef struct event_child_bucket {
 401     ap_pod_t *pod;
 402     ap_listen_rec *listeners;
 403 } event_child_bucket;
 404 static event_child_bucket *all_buckets, /* All listeners buckets */
 405                           *my_bucket;   /* Current child bucket */
 406
 407 struct event_srv_cfg_s {
 408     struct timeout_queue *wc_q,
 409                          *ka_q;
 410 };
 411
 412 #define ID_FROM_CHILD_THREAD(c, t)    ((c * thread_limit) + t)
 413
 414 /* The event MPM respects a couple of runtime flags that can aid
 415  * in debugging. Setting the -DNO_DETACH flag will prevent the root process
 416  * from detaching from its controlling terminal. Additionally, setting
 417  * the -DONE_PROCESS flag (which implies -DNO_DETACH) will get you the
 418  * child_main loop running in the process which originally started up.
 419  * This gives you a pretty nice debugging environment.  (You'll get a SIGHUP
 420  * early in standalone_main; just continue through.  This is the server
 421  * trying to kill off any child processes which it might have lying
 422  * around --- Apache doesn't keep track of their pids, it just sends
 423  * SIGHUP to the process group, ignoring it in the root process.
 424  * Continue through and you'll be fine.).
 425  */
 426
 427 static int one_process = 0;
 428
 429 #ifdef DEBUG_SIGSTOP
 430 int raise_sigstop_flags;
 431 #endif
 432
 433 static apr_pool_t *pconf;       /* Pool for config stuff */
 434 static apr_pool_t *pchild;      /* Pool for httpd child stuff */
 435
 436 static pid_t ap_my_pid;         /* Linux getpid() doesn't work except in main
 437                                    thread. Use this instead */
 438 static pid_t parent_pid;
 439 static apr_os_thread_t *listener_os_thread;
 440
 441 /* The LISTENER_SIGNAL signal will be sent from the main thread to the
 442  * listener thread to wake it up for graceful termination (what a child
 443  * process from an old generation does when the admin does "apachectl
 444  * graceful").  This signal will be blocked in all threads of a child
 445  * process except for the listener thread.
 446  */
 447 #define LISTENER_SIGNAL     SIGHUP
 448
 449 /* An array of socket descriptors in use by each thread used to
 450  * perform a non-graceful (forced) shutdown of the server.
 451  */
 452 static apr_socket_t **worker_sockets;
 453
 454 static void disable_listensocks(int process_slot)
 455 {
 456     int i;
 457     for (i = 0; i < num_listensocks; i++) {
 458         apr_pollset_remove(event_pollset, &listener_pollfd[i]);
 459     }
 460     ap_scoreboard_image->parent[process_slot].not_accepting = 1;
 461 }
 462
 463 static void enable_listensocks(int process_slot)
 464 {
 465     int i;
 466     if (listener_may_exit) {
 467         return;
 468     }
 469     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00457)
 470                  "Accepting new connections again: "
 471                  "%u active conns (%u lingering/%u clogged/%u suspended), "
 472                  "%u idle workers",
 473                  apr_atomic_read32(&connection_count),
 474                  apr_atomic_read32(&lingering_count),
 475                  apr_atomic_read32(&clogged_count),
 476                  apr_atomic_read32(&suspended_count),
 477                  ap_queue_info_get_idlers(worker_queue_info));
 478     for (i = 0; i < num_listensocks; i++)
 479         apr_pollset_add(event_pollset, &listener_pollfd[i]);
 480     /*
 481      * XXX: This is not yet optimal. If many workers suddenly become available,
 482      * XXX: the parent may kill some processes off too soon.
 483      */
 484     ap_scoreboard_image->parent[process_slot].not_accepting = 0;
 485 }
 486
 487 static void abort_socket_nonblocking(apr_socket_t *csd)
 488 {
 489     apr_status_t rv;
 490     apr_socket_timeout_set(csd, 0);
 491 #if defined(SOL_SOCKET) && defined(SO_LINGER)
 492     /* This socket is over now, and we don't want to block nor linger
 493      * anymore, so reset it. A normal close could still linger in the
 494      * system, while RST is fast, nonblocking, and what the peer will
 495      * get if it sends us further data anyway.
 496      */
 497     {
 498         apr_os_sock_t osd = -1;
 499         struct linger opt;
 500         opt.l_onoff = 1;
 501         opt.l_linger = 0; /* zero timeout is RST */
 502         apr_os_sock_get(&osd, csd);
 503         setsockopt(osd, SOL_SOCKET, SO_LINGER, (void *)&opt, sizeof opt);
 504     }
 505 #endif
 506     rv = apr_socket_close(csd);
 507     if (rv != APR_SUCCESS) {
 508         ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(00468)
 509                      "error closing socket");
 510         AP_DEBUG_ASSERT(0);
 511     }
 512 }
 513
 514 static void close_worker_sockets(void)
 515 {
 516     int i;
 517     for (i = 0; i < threads_per_child; i++) {
 518         apr_socket_t *csd = worker_sockets[i];
 519         if (csd) {
 520             worker_sockets[i] = NULL;
 521             abort_socket_nonblocking(csd);
 522         }
 523     }
 524     for (;;) {
 525         event_conn_state_t *cs = defer_linger_chain;
 526         if (!cs) {
 527             break;
 528         }
 529         if (apr_atomic_casptr((void *)&defer_linger_chain, cs->chain,
 530                               cs) != cs) {
 531             /* Race lost, try again */
 532             continue;
 533         }
 534         cs->chain = NULL;
 535         abort_socket_nonblocking(cs->pfd.desc.s);
 536     }
 537 }
 538
 539 static void wakeup_listener(void)
 540 {
 541     listener_may_exit = 1;
 542     if (!listener_os_thread) {
 543         /* XXX there is an obscure path that this doesn't handle perfectly:
 544          *     right after listener thread is created but before
 545          *     listener_os_thread is set, the first worker thread hits an
 546          *     error and starts graceful termination
 547          */
 548         return;
 549     }
 550
 551     /* Unblock the listener if it's poll()ing */
 552     if (listener_is_wakeable) {
 553         apr_pollset_wakeup(event_pollset);
 554     }
 555
 556     /* unblock the listener if it's waiting for a worker */
 557     ap_queue_info_term(worker_queue_info);
 558
 559     /*
 560      * we should just be able to "kill(ap_my_pid, LISTENER_SIGNAL)" on all
 561      * platforms and wake up the listener thread since it is the only thread
 562      * with SIGHUP unblocked, but that doesn't work on Linux
 563      */
 564 #ifdef HAVE_PTHREAD_KILL
 565     pthread_kill(*listener_os_thread, LISTENER_SIGNAL);
 566 #else
 567     kill(ap_my_pid, LISTENER_SIGNAL);
 568 #endif
 569 }
 570
 571 #define ST_INIT              0
 572 #define ST_GRACEFUL          1
 573 #define ST_UNGRACEFUL        2
 574
 575 static int terminate_mode = ST_INIT;
 576
 577 static void signal_threads(int mode)
 578 {
 579     if (terminate_mode == mode) {
 580         return;
 581     }
 582     terminate_mode = mode;
 583     retained->mpm->mpm_state = AP_MPMQ_STOPPING;
 584
 585     /* in case we weren't called from the listener thread, wake up the
 586      * listener thread
 587      */
 588     wakeup_listener();
 589
 590     /* for ungraceful termination, let the workers exit now;
 591      * for graceful termination, the listener thread will notify the
 592      * workers to exit once it has stopped accepting new connections
 593      */
 594     if (mode == ST_UNGRACEFUL) {
 595         workers_may_exit = 1;
 596         ap_queue_interrupt_all(worker_queue);
 597         close_worker_sockets(); /* forcefully kill all current connections */
 598     }
 599 }
 600
 601 static int event_query(int query_code, int *result, apr_status_t *rv)
 602 {
 603     *rv = APR_SUCCESS;
 604     switch (query_code) {
 605     case AP_MPMQ_MAX_DAEMON_USED:
 606         *result = retained->max_daemons_limit;
 607         break;
 608     case AP_MPMQ_IS_THREADED:
 609         *result = AP_MPMQ_STATIC;
 610         break;
 611     case AP_MPMQ_IS_FORKED:
 612         *result = AP_MPMQ_DYNAMIC;
 613         break;
 614     case AP_MPMQ_IS_ASYNC:
 615         *result = 1;
 616         break;
 617     case AP_MPMQ_HARD_LIMIT_DAEMONS:
 618         *result = server_limit;
 619         break;
 620     case AP_MPMQ_HARD_LIMIT_THREADS:
 621         *result = thread_limit;
 622         break;
 623     case AP_MPMQ_MAX_THREADS:
 624         *result = threads_per_child;
 625         break;
 626     case AP_MPMQ_MIN_SPARE_DAEMONS:
 627         *result = 0;
 628         break;
 629     case AP_MPMQ_MIN_SPARE_THREADS:
 630         *result = min_spare_threads;
 631         break;
 632     case AP_MPMQ_MAX_SPARE_DAEMONS:
 633         *result = 0;
 634         break;
 635     case AP_MPMQ_MAX_SPARE_THREADS:
 636         *result = max_spare_threads;
 637         break;
 638     case AP_MPMQ_MAX_REQUESTS_DAEMON:
 639         *result = ap_max_requests_per_child;
 640         break;
 641     case AP_MPMQ_MAX_DAEMONS:
 642         *result = active_daemons_limit;
 643         break;
 644     case AP_MPMQ_MPM_STATE:
 645         *result = retained->mpm->mpm_state;
 646         break;
 647     case AP_MPMQ_GENERATION:
 648         *result = retained->mpm->my_generation;
 649         break;
 650     default:
 651         *rv = APR_ENOTIMPL;
 652         break;
 653     }
 654     return OK;
 655 }
 656
 657 static void event_note_child_killed(int childnum, pid_t pid, ap_generation_t gen)
 658 {
 659     if (childnum != -1) { /* child had a scoreboard slot? */
 660         ap_run_child_status(ap_server_conf,
 661                             ap_scoreboard_image->parent[childnum].pid,
 662                             ap_scoreboard_image->parent[childnum].generation,
 663                             childnum, MPM_CHILD_EXITED);
 664         ap_scoreboard_image->parent[childnum].pid = 0;
 665     }
 666     else {
 667         ap_run_child_status(ap_server_conf, pid, gen, -1, MPM_CHILD_EXITED);
 668     }
 669 }
 670
 671 static void event_note_child_started(int slot, pid_t pid)
 672 {
 673     ap_scoreboard_image->parent[slot].pid = pid;
 674     ap_run_child_status(ap_server_conf,
 675                         ap_scoreboard_image->parent[slot].pid,
 676                         retained->mpm->my_generation, slot, MPM_CHILD_STARTED);
 677 }
 678
 679 static const char *event_get_name(void)
 680 {
 681     return "event";
 682 }
 683
 684 /* a clean exit from a child with proper cleanup */
 685 static void clean_child_exit(int code) __attribute__ ((noreturn));
 686 static void clean_child_exit(int code)
 687 {
 688     retained->mpm->mpm_state = AP_MPMQ_STOPPING;
 689     if (pchild) {
 690         apr_pool_destroy(pchild);
 691     }
 692
 693     if (one_process) {
 694         event_note_child_killed(/* slot */ 0, 0, 0);
 695     }
 696
 697     exit(code);
 698 }
 699
 700 static void just_die(int sig)
 701 {
 702     clean_child_exit(0);
 703 }
 704
 705 /*****************************************************************
 706  * Connection structures and accounting...
 707  */
 708
 709 static int child_fatal;
 710
 711 static apr_status_t decrement_connection_count(void *cs_)
 712 {
 713     event_conn_state_t *cs = cs_;
 714     switch (cs->pub.state) {
 715         case CONN_STATE_LINGER_NORMAL:
 716         case CONN_STATE_LINGER_SHORT:
 717             apr_atomic_dec32(&lingering_count);
 718             break;
 719         case CONN_STATE_SUSPENDED:
 720             apr_atomic_dec32(&suspended_count);
 721             break;
 722         default:
 723             break;
 724     }
 725     /* Unblock the listener if it's waiting for connection_count = 0 */
 726     if (!apr_atomic_dec32(&connection_count)
 727              && listener_is_wakeable && listener_may_exit) {
 728         apr_pollset_wakeup(event_pollset);
 729     }
 730     return APR_SUCCESS;
 731 }
 732
 733 static void notify_suspend(event_conn_state_t *cs)
 734 {
 735     ap_run_suspend_connection(cs->c, cs->r);
 736     cs->suspended = 1;
 737     cs->c->sbh = NULL;
 738 }
 739
 740 static void notify_resume(event_conn_state_t *cs, ap_sb_handle_t *sbh)
 741 {
 742     cs->c->sbh = sbh;
 743     cs->suspended = 0;
 744     ap_run_resume_connection(cs->c, cs->r);
 745 }
 746
 747 /*
 748  * Close our side of the connection, flushing data to the client first.
 749  * Pre-condition: cs is not in any timeout queue and not in the pollset,
 750  *                timeout_mutex is not locked
 751  * return: 0 if connection is fully closed,
 752  *         1 if connection is lingering
 753  * May only be called by worker thread.
 754  */
 755 static int start_lingering_close_blocking(event_conn_state_t *cs)
 756 {
 757     apr_status_t rv;
 758     struct timeout_queue *q;
 759     apr_socket_t *csd = cs->pfd.desc.s;
 760
 761     if (ap_start_lingering_close(cs->c)) {
 762         notify_suspend(cs);
 763         apr_socket_close(csd);
 764         ap_push_pool(worker_queue_info, cs->p);
 765         return 0;
 766     }
 767
 768 #ifdef AP_DEBUG
 769     {
 770         rv = apr_socket_timeout_set(csd, 0);
 771         AP_DEBUG_ASSERT(rv == APR_SUCCESS);
 772     }
 773 #else
 774     apr_socket_timeout_set(csd, 0);
 775 #endif
 776
 777     cs->queue_timestamp = apr_time_now();
 778     /*
 779      * If some module requested a shortened waiting period, only wait for
 780      * 2s (SECONDS_TO_LINGER). This is useful for mitigating certain
 781      * DoS attacks.
 782      */
 783     if (apr_table_get(cs->c->notes, "short-lingering-close")) {
 784         q = short_linger_q;
 785         cs->pub.state = CONN_STATE_LINGER_SHORT;
 786     }
 787     else {
 788         q = linger_q;
 789         cs->pub.state = CONN_STATE_LINGER_NORMAL;
 790     }
 791     apr_atomic_inc32(&lingering_count);
 792     notify_suspend(cs);
 793
 794     cs->pfd.reqevents = (
 795             cs->pub.sense == CONN_SENSE_WANT_WRITE ? APR_POLLOUT :
 796                     APR_POLLIN) | APR_POLLHUP | APR_POLLERR;
 797     cs->pub.sense = CONN_SENSE_DEFAULT;
 798     apr_thread_mutex_lock(timeout_mutex);
 799     TO_QUEUE_APPEND(q, cs);
 800     rv = apr_pollset_add(event_pollset, &cs->pfd);
 801     if (rv != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rv)) {
 802         TO_QUEUE_REMOVE(q, cs);
 803         apr_thread_mutex_unlock(timeout_mutex);
 804         ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(03092)
 805                      "start_lingering_close: apr_pollset_add failure");
 806         apr_socket_close(cs->pfd.desc.s);
 807         ap_push_pool(worker_queue_info, cs->p);
 808         return 0;
 809     }
 810     apr_thread_mutex_unlock(timeout_mutex);
 811     return 1;
 812 }
 813
 814 /*
 815  * Defer flush and close of the connection by adding it to defer_linger_chain,
 816  * for a worker to grab it and do the job (should that be blocking).
 817  * Pre-condition: cs is not in any timeout queue and not in the pollset,
 818  *                timeout_mutex is not locked
 819  * return: 1 connection is alive (but aside and about to linger)
 820  * May be called by listener thread.
 821  */
 822 static int start_lingering_close_nonblocking(event_conn_state_t *cs)
 823 {
 824     event_conn_state_t *chain;
 825     for (;;) {
 826         cs->chain = chain = defer_linger_chain;
 827         if (apr_atomic_casptr((void *)&defer_linger_chain, cs,
 828                               chain) != chain) {
 829             /* Race lost, try again */
 830             continue;
 831         }
 832         return 1;
 833     }
 834 }
 835
 836 /*
 837  * forcibly close a lingering connection after the lingering period has
 838  * expired
 839  * Pre-condition: cs is not in any timeout queue and not in the pollset
 840  * return: irrelevant (need same prototype as start_lingering_close)
 841  */
 842 static int stop_lingering_close(event_conn_state_t *cs)
 843 {
 844     apr_socket_t *csd = ap_get_conn_socket(cs->c);
 845     ap_log_error(APLOG_MARK, APLOG_TRACE4, 0, ap_server_conf,
 846                  "socket reached timeout in lingering-close state");
 847     abort_socket_nonblocking(csd);
 848     ap_push_pool(worker_queue_info, cs->p);
 849     if (dying)
 850         ap_queue_interrupt_one(worker_queue);
 851     return 0;
 852 }
 853
 854 /*
 855  * This runs before any non-MPM cleanup code on the connection;
 856  * if the connection is currently suspended as far as modules
 857  * know, provide notification of resumption.
 858  */
 859 static apr_status_t ptrans_pre_cleanup(void *dummy)
 860 {
 861     event_conn_state_t *cs = dummy;
 862
 863     if (cs->suspended) {
 864         notify_resume(cs, NULL);
 865     }
 866     return APR_SUCCESS;
 867 }
 868
 869 /*
 870  * event_pre_read_request() and event_request_cleanup() track the
 871  * current r for a given connection.
 872  */
 873 static apr_status_t event_request_cleanup(void *dummy)
 874 {
 875     conn_rec *c = dummy;
 876     event_conn_state_t *cs = ap_get_module_config(c->conn_config,
 877                                                   &mpm_event_module);
 878
 879     cs->r = NULL;
 880     return APR_SUCCESS;
 881 }
 882
 883 static void event_pre_read_request(request_rec *r, conn_rec *c)
 884 {
 885     event_conn_state_t *cs = ap_get_module_config(c->conn_config,
 886                                                   &mpm_event_module);
 887
 888     cs->r = r;
 889     cs->sc = ap_get_module_config(ap_server_conf->module_config,
 890                                   &mpm_event_module);
 891     apr_pool_cleanup_register(r->pool, c, event_request_cleanup,
 892                               apr_pool_cleanup_null);
 893 }
 894
 895 /*
 896  * event_post_read_request() tracks the current server config for a
 897  * given request.
 898  */
 899 static int event_post_read_request(request_rec *r)
 900 {
 901     conn_rec *c = r->connection;
 902     event_conn_state_t *cs = ap_get_module_config(c->conn_config,
 903                                                   &mpm_event_module);
 904
 905     /* To preserve legacy behaviour (consistent with other MPMs), use
 906      * the keepalive timeout from the base server (first on this IP:port)
 907      * when none is explicitly configured on this server.
 908      */
 909     if (r->server->keep_alive_timeout_set) {
 910         cs->sc = ap_get_module_config(r->server->module_config,
 911                                       &mpm_event_module);
 912     }
 913     else {
 914         cs->sc = ap_get_module_config(c->base_server->module_config,
 915                                       &mpm_event_module);
 916     }
 917     return OK;
 918 }
 919
 920 /*
 921  * process one connection in the worker
 922  */
 923 static void process_socket(apr_thread_t *thd, apr_pool_t * p, apr_socket_t * sock,
 924                           event_conn_state_t * cs, int my_child_num,
 925                           int my_thread_num)
 926 {
 927     conn_rec *c;
 928     long conn_id = ID_FROM_CHILD_THREAD(my_child_num, my_thread_num);
 929     int rc;
 930     ap_sb_handle_t *sbh;
 931
 932     /* XXX: This will cause unbounded mem usage for long lasting connections */
 933     ap_create_sb_handle(&sbh, p, my_child_num, my_thread_num);
 934
 935     if (cs == NULL) {           /* This is a new connection */
 936         listener_poll_type *pt = apr_pcalloc(p, sizeof(*pt));
 937         cs = apr_pcalloc(p, sizeof(event_conn_state_t));
 938         cs->bucket_alloc = apr_bucket_alloc_create(p);
 939         c = ap_run_create_connection(p, ap_server_conf, sock,
 940                                      conn_id, sbh, cs->bucket_alloc);
 941         if (!c) {
 942             ap_push_pool(worker_queue_info, p);
 943             return;
 944         }
 945         apr_atomic_inc32(&connection_count);
 946         apr_pool_cleanup_register(c->pool, cs, decrement_connection_count,
 947                                   apr_pool_cleanup_null);
 948         ap_set_module_config(c->conn_config, &mpm_event_module, cs);
 949         c->current_thread = thd;
 950         cs->c = c;
 951         c->cs = &(cs->pub);
 952         cs->p = p;
 953         cs->sc = ap_get_module_config(ap_server_conf->module_config,
 954                                       &mpm_event_module);
 955         cs->pfd.desc_type = APR_POLL_SOCKET;
 956         cs->pfd.reqevents = APR_POLLIN;
 957         cs->pfd.desc.s = sock;
 958         pt->type = PT_CSD;
 959         pt->baton = cs;
 960         cs->pfd.client_data = pt;
 961         apr_pool_pre_cleanup_register(p, cs, ptrans_pre_cleanup);
 962         TO_QUEUE_ELEM_INIT(cs);
 963
 964         ap_update_vhost_given_ip(c);
 965
 966         rc = ap_run_pre_connection(c, sock);
 967         if (rc != OK && rc != DONE) {
 968             ap_log_cerror(APLOG_MARK, APLOG_DEBUG, 0, c, APLOGNO(00469)
 969                           "process_socket: connection aborted");
 970             c->aborted = 1;
 971         }
 972
 973         /**
 974          * XXX If the platform does not have a usable way of bundling
 975          * accept() with a socket readability check, like Win32,
 976          * and there are measurable delays before the
 977          * socket is readable due to the first data packet arriving,
 978          * it might be better to create the cs on the listener thread
 979          * with the state set to CONN_STATE_CHECK_REQUEST_LINE_READABLE
 980          *
 981          * FreeBSD users will want to enable the HTTP accept filter
 982          * module in their kernel for the highest performance
 983          * When the accept filter is active, sockets are kept in the
 984          * kernel until a HTTP request is received.
 985          */
 986         cs->pub.state = CONN_STATE_READ_REQUEST_LINE;
 987
 988         cs->pub.sense = CONN_SENSE_DEFAULT;
 989     }
 990     else {
 991         c = cs->c;
 992         notify_resume(cs, sbh);
 993         c->current_thread = thd;
 994         /* Subsequent request on a conn, and thread number is part of ID */
 995         c->id = conn_id;
 996
 997         if (c->aborted) {
 998             cs->pub.state = CONN_STATE_LINGER;
 999         }
1000     }
1001
1002     if (cs->pub.state == CONN_STATE_LINGER) {
1003         /* do lingering close below */
1004     }
1005     else if (c->clogging_input_filters) {
1006         /* Since we have an input filter which 'clogs' the input stream,
1007          * like mod_ssl used to, lets just do the normal read from input
1008          * filters, like the Worker MPM does. Filters that need to write
1009          * where they would otherwise read, or read where they would
1010          * otherwise write, should set the sense appropriately.
1011          */
1012         apr_atomic_inc32(&clogged_count);
1013         ap_run_process_connection(c);
1014         if (cs->pub.state != CONN_STATE_SUSPENDED) {
1015             cs->pub.state = CONN_STATE_LINGER;
1016         }
1017         apr_atomic_dec32(&clogged_count);
1018     }
1019     else if (cs->pub.state == CONN_STATE_READ_REQUEST_LINE) {
1020 read_request:
1021         ap_run_process_connection(c);
1022
1023         /* state will be updated upon return
1024          * fall thru to either wait for readability/timeout or
1025          * do lingering close
1026          */
1027     }
1028
1029     if (cs->pub.state == CONN_STATE_WRITE_COMPLETION) {
1030         ap_filter_t *output_filter = c->output_filters;
1031         apr_status_t rv;
1032         ap_update_child_status(sbh, SERVER_BUSY_WRITE, NULL);
1033         while (output_filter->next != NULL) {
1034             output_filter = output_filter->next;
1035         }
1036         rv = output_filter->frec->filter_func.out_func(output_filter, NULL);
1037         if (rv != APR_SUCCESS) {
1038             ap_log_cerror(APLOG_MARK, APLOG_DEBUG, rv, c, APLOGNO(00470)
1039                           "network write failure in core output filter");
1040             cs->pub.state = CONN_STATE_LINGER;
1041         }
1042         else if (c->data_in_output_filters) {
1043             /* Still in WRITE_COMPLETION_STATE:
1044              * Set a write timeout for this connection, and let the
1045              * event thread poll for writeability.
1046              */
1047             cs->queue_timestamp = apr_time_now();
1048             notify_suspend(cs);
1049             cs->pfd.reqevents = (
1050                     cs->pub.sense == CONN_SENSE_WANT_READ ? APR_POLLIN :
1051                             APR_POLLOUT) | APR_POLLHUP | APR_POLLERR;
1052             cs->pub.sense = CONN_SENSE_DEFAULT;
1053             apr_thread_mutex_lock(timeout_mutex);
1054             TO_QUEUE_APPEND(cs->sc->wc_q, cs);
1055             rc = apr_pollset_add(event_pollset, &cs->pfd);
1056             if (rc != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rc)) {
1057                 TO_QUEUE_REMOVE(cs->sc->wc_q, cs);
1058                 apr_thread_mutex_unlock(timeout_mutex);
1059                 ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf, APLOGNO(03465)
1060                              "process_socket: apr_pollset_add failure for "
1061                              "write completion");
1062                 apr_socket_close(cs->pfd.desc.s);
1063                 ap_push_pool(worker_queue_info, cs->p);
1064             }
1065             else {
1066                 apr_thread_mutex_unlock(timeout_mutex);
1067             }
1068             return;
1069         }
1070         else if (c->keepalive != AP_CONN_KEEPALIVE || c->aborted ||
1071                  listener_may_exit) {
1072             cs->pub.state = CONN_STATE_LINGER;
1073         }
1074         else if (c->data_in_input_filters) {
1075             cs->pub.state = CONN_STATE_READ_REQUEST_LINE;
1076             goto read_request;
1077         }
1078         else {
1079             cs->pub.state = CONN_STATE_CHECK_REQUEST_LINE_READABLE;
1080         }
1081     }
1082
1083     if (cs->pub.state == CONN_STATE_LINGER) {
1084         start_lingering_close_blocking(cs);
1085     }
1086     else if (cs->pub.state == CONN_STATE_CHECK_REQUEST_LINE_READABLE) {
1087         /* It greatly simplifies the logic to use a single timeout value per q
1088          * because the new element can just be added to the end of the list and
1089          * it will stay sorted in expiration time sequence.  If brand new
1090          * sockets are sent to the event thread for a readability check, this
1091          * will be a slight behavior change - they use the non-keepalive
1092          * timeout today.  With a normal client, the socket will be readable in
1093          * a few milliseconds anyway.
1094          */
1095         cs->queue_timestamp = apr_time_now();
1096         notify_suspend(cs);
1097
1098         /* Add work to pollset. */
1099         cs->pfd.reqevents = APR_POLLIN;
1100         apr_thread_mutex_lock(timeout_mutex);
1101         TO_QUEUE_APPEND(cs->sc->ka_q, cs);
1102         rc = apr_pollset_add(event_pollset, &cs->pfd);
1103         if (rc != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rc)) {
1104             TO_QUEUE_REMOVE(cs->sc->ka_q, cs);
1105             apr_thread_mutex_unlock(timeout_mutex);
1106             ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf, APLOGNO(03093)
1107                          "process_socket: apr_pollset_add failure for "
1108                          "keep alive");
1109             apr_socket_close(cs->pfd.desc.s);
1110             ap_push_pool(worker_queue_info, cs->p);
1111             return;
1112         }
1113         apr_thread_mutex_unlock(timeout_mutex);
1114     }
1115     else if (cs->pub.state == CONN_STATE_SUSPENDED) {
1116         apr_atomic_inc32(&suspended_count);
1117         notify_suspend(cs);
1118     }
1119 }
1120
1121 /* conns_this_child has gone to zero or below.  See if the admin coded
1122    "MaxConnectionsPerChild 0", and keep going in that case.  Doing it this way
1123    simplifies the hot path in worker_thread */
1124 static void check_infinite_requests(void)
1125 {
1126     if (ap_max_requests_per_child) {
1127         ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
1128                      "Stopping process due to MaxConnectionsPerChild");
1129         signal_threads(ST_GRACEFUL);
1130     }
1131     else {
1132         /* keep going */
1133         conns_this_child = APR_INT32_MAX;
1134     }
1135 }
1136
1137 static void close_listeners(int process_slot, int *closed)
1138 {
1139     if (!*closed) {
1140         int i;
1141         disable_listensocks(process_slot);
1142         ap_close_listeners_ex(my_bucket->listeners);
1143         *closed = 1;
1144         dying = 1;
1145         ap_scoreboard_image->parent[process_slot].quiescing = 1;
1146         for (i = 0; i < threads_per_child; ++i) {
1147             ap_update_child_status_from_indexes(process_slot, i,
1148                                                 SERVER_GRACEFUL, NULL);
1149         }
1150         /* wake up the main thread */
1151         kill(ap_my_pid, SIGTERM);
1152
1153         ap_free_idle_pools(worker_queue_info);
1154         ap_queue_interrupt_all(worker_queue);
1155     }
1156 }
1157
1158 static void unblock_signal(int sig)
1159 {
1160     sigset_t sig_mask;
1161
1162     sigemptyset(&sig_mask);
1163     sigaddset(&sig_mask, sig);
1164 #if defined(SIGPROCMASK_SETS_THREAD_MASK)
1165     sigprocmask(SIG_UNBLOCK, &sig_mask, NULL);
1166 #else
1167     pthread_sigmask(SIG_UNBLOCK, &sig_mask, NULL);
1168 #endif
1169 }
1170
1171 static void dummy_signal_handler(int sig)
1172 {
1173     /* XXX If specifying SIG_IGN is guaranteed to unblock a syscall,
1174      *     then we don't need this goofy function.
1175      */
1176 }
1177
1178
1179 static apr_status_t init_pollset(apr_pool_t *p)
1180 {
1181     ap_listen_rec *lr;
1182     listener_poll_type *pt;
1183     int i = 0;
1184
1185     listener_pollfd = apr_palloc(p, sizeof(apr_pollfd_t) * num_listensocks);
1186     for (lr = my_bucket->listeners; lr != NULL; lr = lr->next, i++) {
1187         apr_pollfd_t *pfd;
1188         AP_DEBUG_ASSERT(i < num_listensocks);
1189         pfd = &listener_pollfd[i];
1190         pt = apr_pcalloc(p, sizeof(*pt));
1191         pfd->desc_type = APR_POLL_SOCKET;
1192         pfd->desc.s = lr->sd;
1193         pfd->reqevents = APR_POLLIN;
1194
1195         pt->type = PT_ACCEPT;
1196         pt->baton = lr;
1197
1198         pfd->client_data = pt;
1199
1200         apr_socket_opt_set(pfd->desc.s, APR_SO_NONBLOCK, 1);
1201         apr_pollset_add(event_pollset, pfd);
1202
1203         lr->accept_func = ap_unixd_accept;
1204     }
1205
1206     return APR_SUCCESS;
1207 }
1208
1209 static apr_status_t push_timer2worker(timer_event_t* te)
1210 {
1211     return ap_queue_push_timer(worker_queue, te);
1212 }
1213
1214 /*
1215  * Pre-condition: cs is neither in event_pollset nor a timeout queue
1216  * this function may only be called by the listener
1217  */
1218 static apr_status_t push2worker(event_conn_state_t *cs, apr_socket_t *csd,
1219                                 apr_pool_t *ptrans)
1220 {
1221     apr_status_t rc;
1222
1223     if (cs) {
1224         csd = cs->pfd.desc.s;
1225         ptrans = cs->p;
1226     }
1227     rc = ap_queue_push(worker_queue, csd, cs, ptrans);
1228     if (rc != APR_SUCCESS) {
1229         ap_log_error(APLOG_MARK, APLOG_CRIT, rc, ap_server_conf, APLOGNO(00471)
1230                      "push2worker: ap_queue_push failed");
1231         /* trash the connection; we couldn't queue the connected
1232          * socket to a worker
1233          */
1234         if (csd) {
1235             abort_socket_nonblocking(csd);
1236         }
1237         if (ptrans) {
1238             ap_push_pool(worker_queue_info, ptrans);
1239         }
1240         signal_threads(ST_GRACEFUL);
1241     }
1242
1243     return rc;
1244 }
1245
1246 /* get_worker:
1247  *     If *have_idle_worker_p == 0, reserve a worker thread, and set
1248  *     *have_idle_worker_p = 1.
1249  *     If *have_idle_worker_p is already 1, will do nothing.
1250  *     If blocking == 1, block if all workers are currently busy.
1251  *     If no worker was available immediately, will set *all_busy to 1.
1252  *     XXX: If there are no workers, we should not block immediately but
1253  *     XXX: close all keep-alive connections first.
1254  */
1255 static void get_worker(int *have_idle_worker_p, int blocking, int *all_busy)
1256 {
1257     apr_status_t rc;
1258
1259     if (*have_idle_worker_p) {
1260         /* already reserved a worker thread - must have hit a
1261          * transient error on a previous pass
1262          */
1263         return;
1264     }
1265
1266     if (blocking)
1267         rc = ap_queue_info_wait_for_idler(worker_queue_info, all_busy);
1268     else
1269         rc = ap_queue_info_try_get_idler(worker_queue_info);
1270
1271     if (rc == APR_SUCCESS || APR_STATUS_IS_EOF(rc)) {
1272         *have_idle_worker_p = 1;
1273     }
1274     else if (!blocking && rc == APR_EAGAIN) {
1275         *all_busy = 1;
1276     }
1277     else {
1278         ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf, APLOGNO(00472)
1279                      "ap_queue_info_wait_for_idler failed.  "
1280                      "Attempting to shutdown process gracefully");
1281         signal_threads(ST_GRACEFUL);
1282     }
1283 }
1284
1285 /* Structures to reuse */
1286 static APR_RING_HEAD(timer_free_ring_t, timer_event_t) timer_free_ring;
1287
1288 static apr_skiplist *timer_skiplist;
1289 static volatile apr_time_t timers_next_expiry;
1290
1291 /* Same goal as for TIMEOUT_FUDGE_FACTOR (avoid extra poll calls), but applied
1292  * to timers. Since their timeouts are custom (user defined), we can't be too
1293  * approximative here (hence using 0.01s).
1294  */
1295 #define EVENT_FUDGE_FACTOR apr_time_from_msec(10)
1296
1297 /* The following compare function is used by apr_skiplist_insert() to keep the
1298  * elements (timers) sorted and provide O(log n) complexity (this is also true
1299  * for apr_skiplist_{find,remove}(), but those are not used in MPM event where
1300  * inserted timers are not searched nor removed, but with apr_skiplist_pop()
1301  * which does use any compare function).  It is meant to return 0 when a == b,
1302  * <0 when a < b, and >0 when a > b.  However apr_skiplist_insert() will not
1303  * add duplicates (i.e. a == b), and apr_skiplist_add() is only available in
1304  * APR 1.6, yet multiple timers could possibly be created in the same micro-
1305  * second (duplicates with regard to apr_time_t); therefore we implement the
1306  * compare function to return +1 instead of 0 when compared timers are equal,
1307  * thus duplicates are still added after each other (in order of insertion).
1308  */
1309 static int timer_comp(void *a, void *b)
1310 {
1311     apr_time_t t1 = (apr_time_t) ((timer_event_t *)a)->when;
1312     apr_time_t t2 = (apr_time_t) ((timer_event_t *)b)->when;
1313     AP_DEBUG_ASSERT(t1);
1314     AP_DEBUG_ASSERT(t2);
1315     return ((t1 < t2) ? -1 : 1);
1316 }
1317
1318 static apr_thread_mutex_t *g_timer_skiplist_mtx;
1319
1320 static apr_status_t event_register_timed_callback(apr_time_t t,
1321                                                   ap_mpm_callback_fn_t *cbfn,
1322                                                   void *baton)
1323 {
1324     timer_event_t *te;
1325     /* oh yeah, and make locking smarter/fine grained. */
1326     apr_thread_mutex_lock(g_timer_skiplist_mtx);
1327
1328     if (!APR_RING_EMPTY(&timer_free_ring, timer_event_t, link)) {
1329         te = APR_RING_FIRST(&timer_free_ring);
1330         APR_RING_REMOVE(te, link);
1331     }
1332     else {
1333         te = apr_skiplist_alloc(timer_skiplist, sizeof(timer_event_t));
1334         APR_RING_ELEM_INIT(te, link);
1335     }
1336
1337     te->cbfunc = cbfn;
1338     te->baton = baton;
1339     /* XXXXX: optimize */
1340     te->when = t + apr_time_now();
1341
1342     {
1343         apr_time_t next_expiry;
1344
1345         /* Okay, add sorted by when.. */
1346         apr_skiplist_insert(timer_skiplist, te);
1347
1348         /* Cheaply update the overall timers' next expiry according to
1349          * this event, if necessary.
1350          */
1351         next_expiry = timers_next_expiry;
1352         if (!next_expiry || next_expiry > te->when + EVENT_FUDGE_FACTOR) {
1353             timers_next_expiry = te->when;
1354             /* Unblock the poll()ing listener for it to update its timeout. */
1355             if (listener_is_wakeable) {
1356                 apr_pollset_wakeup(event_pollset);
1357             }
1358         }
1359     }
1360
1361     apr_thread_mutex_unlock(g_timer_skiplist_mtx);
1362
1363     return APR_SUCCESS;
1364 }
1365
1366
1367 /*
1368  * Close socket and clean up if remote closed its end while we were in
1369  * lingering close.
1370  * Only to be called in the listener thread;
1371  * Pre-condition: cs is in one of the linger queues and in the pollset
1372  */
1373 static void process_lingering_close(event_conn_state_t *cs, const apr_pollfd_t *pfd)
1374 {
1375     apr_socket_t *csd = ap_get_conn_socket(cs->c);
1376     char dummybuf[2048];
1377     apr_size_t nbytes;
1378     apr_status_t rv;
1379     struct timeout_queue *q;
1380     q = (cs->pub.state == CONN_STATE_LINGER_SHORT) ? short_linger_q : linger_q;
1381
1382     /* socket is already in non-blocking state */
1383     do {
1384         nbytes = sizeof(dummybuf);
1385         rv = apr_socket_recv(csd, dummybuf, &nbytes);
1386     } while (rv == APR_SUCCESS);
1387
1388     if (APR_STATUS_IS_EAGAIN(rv)) {
1389         return;
1390     }
1391
1392     apr_thread_mutex_lock(timeout_mutex);
1393     TO_QUEUE_REMOVE(q, cs);
1394     rv = apr_pollset_remove(event_pollset, pfd);
1395     apr_thread_mutex_unlock(timeout_mutex);
1396     AP_DEBUG_ASSERT(rv == APR_SUCCESS ||  APR_STATUS_IS_NOTFOUND(rv));
1397
1398     rv = apr_socket_close(csd);
1399     AP_DEBUG_ASSERT(rv == APR_SUCCESS);
1400
1401     ap_push_pool(worker_queue_info, cs->p);
1402     if (dying)
1403         ap_queue_interrupt_one(worker_queue);
1404 }
1405
1406 /* call 'func' for all elements of 'q' with timeout less than 'timeout_time'.
1407  * Pre-condition: timeout_mutex must already be locked
1408  * Post-condition: timeout_mutex will be locked again
1409  */
1410 static void process_timeout_queue(struct timeout_queue *q,
1411                                   apr_time_t timeout_time,
1412                                   int (*func)(event_conn_state_t *))
1413 {
1414     apr_uint32_t total = 0, count;
1415     event_conn_state_t *first, *cs, *last;
1416     struct timeout_head_t trash;
1417     struct timeout_queue *qp;
1418     apr_status_t rv;
1419
1420     if (!apr_atomic_read32(q->total)) {
1421         return;
1422     }
1423
1424     APR_RING_INIT(&trash, event_conn_state_t, timeout_list);
1425     for (qp = q; qp; qp = qp->next) {
1426         count = 0;
1427         cs = first = last = APR_RING_FIRST(&qp->head);
1428         while (cs != APR_RING_SENTINEL(&qp->head, event_conn_state_t,
1429                                        timeout_list)) {
1430             /* Trash the entry if:
1431              * - no timeout_time was given (asked for all), or
1432              * - it expired (according to the queue timeout), or
1433              * - the system clock skewed in the past: no entry should be
1434              *   registered above the given timeout_time (~now) + the queue
1435              *   timeout, we won't keep any here (eg. for centuries).
1436              *
1437              * Otherwise stop, no following entry will match thanks to the
1438              * single timeout per queue (entries are added to the end!).
1439              * This allows maintenance in O(1).
1440              */
1441             if (timeout_time
1442                     && cs->queue_timestamp + qp->timeout > timeout_time
1443                     && cs->queue_timestamp < timeout_time + qp->timeout) {
1444                 /* Since this is the next expiring of this queue, update the
1445                  * overall queues' next expiry if it's later than this one.
1446                  */
1447                 apr_time_t q_expiry = cs->queue_timestamp + qp->timeout;
1448                 apr_time_t next_expiry = queues_next_expiry;
1449                 if (!next_expiry || next_expiry > q_expiry) {
1450                     queues_next_expiry = q_expiry;
1451                 }
1452                 break;
1453             }
1454
1455             last = cs;
1456             rv = apr_pollset_remove(event_pollset, &cs->pfd);
1457             if (rv != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rv)) {
1458                 ap_log_cerror(APLOG_MARK, APLOG_ERR, rv, cs->c, APLOGNO(00473)
1459                               "apr_pollset_remove failed");
1460             }
1461             cs = APR_RING_NEXT(cs, timeout_list);
1462             count++;
1463         }
1464         if (!count)
1465             continue;
1466
1467         APR_RING_UNSPLICE(first, last, timeout_list);
1468         APR_RING_SPLICE_TAIL(&trash, first, last, event_conn_state_t,
1469                              timeout_list);
1470         AP_DEBUG_ASSERT(apr_atomic_read32(q->total) >= count);
1471         apr_atomic_sub32(q->total, count);
1472         qp->count -= count;
1473         total += count;
1474     }
1475     if (!total)
1476         return;
1477
1478     apr_thread_mutex_unlock(timeout_mutex);
1479     first = APR_RING_FIRST(&trash);
1480     do {
1481         cs = APR_RING_NEXT(first, timeout_list);
1482         TO_QUEUE_ELEM_INIT(first);
1483         func(first);
1484         first = cs;
1485     } while (--total);
1486     apr_thread_mutex_lock(timeout_mutex);
1487 }
1488
1489 static void process_keepalive_queue(apr_time_t timeout_time)
1490 {
1491     /* If all workers are busy, we kill older keep-alive connections so
1492      * that they may connect to another process.
1493      */
1494     if (!timeout_time) {
1495         ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
1496                      "All workers are busy or dying, will close %u "
1497                      "keep-alive connections",
1498                      apr_atomic_read32(keepalive_q->total));
1499     }
1500     process_timeout_queue(keepalive_q, timeout_time,
1501                           start_lingering_close_nonblocking);
1502 }
1503
1504 static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
1505 {
1506     timer_event_t *te;
1507     apr_status_t rc;
1508     proc_info *ti = dummy;
1509     int process_slot = ti->pslot;
1510     struct process_score *ps = ap_get_scoreboard_process(process_slot);
1511     apr_pool_t *tpool = apr_thread_pool_get(thd);
1512     void *csd = NULL;
1513     apr_pool_t *ptrans;         /* Pool for per-transaction stuff */
1514     ap_listen_rec *lr;
1515     int have_idle_worker = 0;
1516     const apr_pollfd_t *out_pfd;
1517     apr_int32_t num = 0;
1518     apr_interval_time_t timeout_interval;
1519     apr_time_t timeout_time = 0, now, last_log;
1520     listener_poll_type *pt;
1521     int closed = 0, listeners_disabled = 0;
1522
1523     last_log = apr_time_now();
1524     free(ti);
1525
1526     rc = init_pollset(tpool);
1527     if (rc != APR_SUCCESS) {
1528         ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
1529                      "failed to initialize pollset, "
1530                      "attempting to shutdown process gracefully");
1531         signal_threads(ST_GRACEFUL);
1532         return NULL;
1533     }
1534
1535     /* Unblock the signal used to wake this thread up, and set a handler for
1536      * it.
1537      */
1538     unblock_signal(LISTENER_SIGNAL);
1539     apr_signal(LISTENER_SIGNAL, dummy_signal_handler);
1540
1541     for (;;) {
1542         int workers_were_busy = 0;
1543
1544         if (listener_may_exit) {
1545             close_listeners(process_slot, &closed);
1546             if (terminate_mode == ST_UNGRACEFUL
1547                 || apr_atomic_read32(&connection_count) == 0)
1548                 break;
1549         }
1550
1551         if (conns_this_child <= 0)
1552             check_infinite_requests();
1553
1554         now = apr_time_now();
1555         if (APLOGtrace6(ap_server_conf)) {
1556             /* trace log status every second */
1557             if (now - last_log > apr_time_from_sec(1)) {
1558                 last_log = now;
1559                 apr_thread_mutex_lock(timeout_mutex);
1560                 ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf,
1561                              "connections: %u (clogged: %u write-completion: %d "
1562                              "keep-alive: %d lingering: %d suspended: %u)",
1563                              apr_atomic_read32(&connection_count),
1564                              apr_atomic_read32(&clogged_count),
1565                              apr_atomic_read32(write_completion_q->total),
1566                              apr_atomic_read32(keepalive_q->total),
1567                              apr_atomic_read32(&lingering_count),
1568                              apr_atomic_read32(&suspended_count));
1569                 if (dying) {
1570                     ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf,
1571                                  "%u/%u workers shutdown",
1572                                  apr_atomic_read32(&threads_shutdown),
1573                                  threads_per_child);
1574                 }
1575                 apr_thread_mutex_unlock(timeout_mutex);
1576             }
1577         }
1578
1579         /* Start with an infinite poll() timeout and update it according to
1580          * the next expiring timer or queue entry. If there are none, either
1581          * the listener is wakeable and it can poll() indefinitely until a wake
1582          * up occurs, otherwise periodic checks (maintenance, shutdown, ...)
1583          * must be performed.
1584          */
1585         timeout_interval = -1;
1586
1587         /* Push expired timers to a worker, the first remaining one determines
1588          * the maximum time to poll() below, if any.
1589          */
1590         timeout_time = timers_next_expiry;
1591         if (timeout_time && timeout_time < now + EVENT_FUDGE_FACTOR) {
1592             apr_thread_mutex_lock(g_timer_skiplist_mtx);
1593             while ((te = apr_skiplist_peek(timer_skiplist))) {
1594                 if (te->when > now + EVENT_FUDGE_FACTOR) {
1595                     timers_next_expiry = te->when;
1596                     timeout_interval = te->when - now;
1597                     break;
1598                 }
1599                 apr_skiplist_pop(timer_skiplist, NULL);
1600                 push_timer2worker(te);
1601             }
1602             if (!te) {
1603                 timers_next_expiry = 0;
1604             }
1605             apr_thread_mutex_unlock(g_timer_skiplist_mtx);
1606         }
1607
1608         /* Same for queues, use their next expiry, if any. */
1609         timeout_time = queues_next_expiry;
1610         if (timeout_time
1611                 && (timeout_interval < 0
1612                     || timeout_time <= now
1613                     || timeout_interval > timeout_time - now)) {
1614             timeout_interval = timeout_time > now ? timeout_time - now : 1;
1615         }
1616
1617         /* When non-wakeable, don't wait more than 100 ms, in any case. */
1618 #define NON_WAKEABLE_POLL_TIMEOUT apr_time_from_msec(100)
1619         if (!listener_is_wakeable
1620                 && (timeout_interval < 0
1621                     || timeout_interval > NON_WAKEABLE_POLL_TIMEOUT)) {
1622             timeout_interval = NON_WAKEABLE_POLL_TIMEOUT;
1623         }
1624
1625         rc = apr_pollset_poll(event_pollset, timeout_interval, &num, &out_pfd);
1626         if (rc != APR_SUCCESS) {
1627             if (APR_STATUS_IS_EINTR(rc)) {
1628                 /* Woken up, if we are exiting we must fall through to kill
1629                  * kept-alive connections, otherwise we only need to update
1630                  * timeouts (logic is above, so restart the loop).
1631                  */
1632                 if (!listener_may_exit) {
1633                     continue;
1634                 }
1635                 timeout_time = 0;
1636             }
1637             else if (!APR_STATUS_IS_TIMEUP(rc)) {
1638                 ap_log_error(APLOG_MARK, APLOG_CRIT, rc, ap_server_conf,
1639                              "apr_pollset_poll failed.  Attempting to "
1640                              "shutdown process gracefully");
1641                 signal_threads(ST_GRACEFUL);
1642             }
1643             num = 0;
1644         }
1645
1646         if (listener_may_exit) {
1647             close_listeners(process_slot, &closed);
1648             if (terminate_mode == ST_UNGRACEFUL
1649                 || apr_atomic_read32(&connection_count) == 0)
1650                 break;
1651         }
1652
1653         while (num) {
1654             pt = (listener_poll_type *) out_pfd->client_data;
1655             if (pt->type == PT_CSD) {
1656                 /* one of the sockets is readable */
1657                 event_conn_state_t *cs = (event_conn_state_t *) pt->baton;
1658                 struct timeout_queue *remove_from_q = cs->sc->wc_q;
1659                 int blocking = 1;
1660
1661                 switch (cs->pub.state) {
1662                 case CONN_STATE_CHECK_REQUEST_LINE_READABLE:
1663                     cs->pub.state = CONN_STATE_READ_REQUEST_LINE;
1664                     remove_from_q = cs->sc->ka_q;
1665                     /* don't wait for a worker for a keepalive request */
1666                     blocking = 0;
1667                     /* FALL THROUGH */
1668                 case CONN_STATE_WRITE_COMPLETION:
1669                     get_worker(&have_idle_worker, blocking,
1670                                &workers_were_busy);
1671                     apr_thread_mutex_lock(timeout_mutex);
1672                     TO_QUEUE_REMOVE(remove_from_q, cs);
1673                     rc = apr_pollset_remove(event_pollset, &cs->pfd);
1674                     apr_thread_mutex_unlock(timeout_mutex);
1675
1676                     /*
1677                      * Some of the pollset backends, like KQueue or Epoll
1678                      * automagically remove the FD if the socket is closed,
1679                      * therefore, we can accept _SUCCESS or _NOTFOUND,
1680                      * and we still want to keep going
1681                      */
1682                     if (rc != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rc)) {
1683                         ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
1684                                      APLOGNO(03094) "pollset remove failed");
1685                         start_lingering_close_nonblocking(cs);
1686                         break;
1687                     }
1688
1689                     /* If we didn't get a worker immediately for a keep-alive
1690                      * request, we close the connection, so that the client can
1691                      * re-connect to a different process.
1692                      */
1693                     if (!have_idle_worker) {
1694                         start_lingering_close_nonblocking(cs);
1695                     }
1696                     else if (push2worker(cs, NULL, NULL) == APR_SUCCESS) {
1697                         have_idle_worker = 0;
1698                     }
1699                     break;
1700
1701                 case CONN_STATE_LINGER_NORMAL:
1702                 case CONN_STATE_LINGER_SHORT:
1703                     process_lingering_close(cs, out_pfd);
1704                     break;
1705
1706                 default:
1707                     ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1708                                  ap_server_conf, APLOGNO(03096)
1709                                  "event_loop: unexpected state %d",
1710                                  cs->pub.state);
1711                     ap_assert(0);
1712                 }
1713             }
1714             else if (pt->type == PT_ACCEPT) {
1715                 /* A Listener Socket is ready for an accept() */
1716                 if (workers_were_busy) {
1717                     if (!listeners_disabled)
1718                         disable_listensocks(process_slot);
1719                     listeners_disabled = 1;
1720                     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
1721                                  "All workers busy, not accepting new conns "
1722                                  "in this process");
1723                 }
1724                 else if (  (int)apr_atomic_read32(&connection_count)
1725                            - (int)apr_atomic_read32(&lingering_count)
1726                          > threads_per_child
1727                            + ap_queue_info_get_idlers(worker_queue_info) *
1728                              worker_factor / WORKER_FACTOR_SCALE)
1729                 {
1730                     if (!listeners_disabled)
1731                         disable_listensocks(process_slot);
1732                     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
1733                                  "Too many open connections (%u), "
1734                                  "not accepting new conns in this process",
1735                                  apr_atomic_read32(&connection_count));
1736                     ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
1737                                  "Idle workers: %u",
1738                                  ap_queue_info_get_idlers(worker_queue_info));
1739                     listeners_disabled = 1;
1740                 }
1741                 else if (listeners_disabled) {
1742                     listeners_disabled = 0;
1743                     enable_listensocks(process_slot);
1744                 }
1745                 if (!listeners_disabled) {
1746                     lr = (ap_listen_rec *) pt->baton;
1747                     ap_pop_pool(&ptrans, worker_queue_info);
1748
1749                     if (ptrans == NULL) {
1750                         /* create a new transaction pool for each accepted socket */
1751                         apr_allocator_t *allocator;
1752
1753                         apr_allocator_create(&allocator);
1754                         apr_allocator_max_free_set(allocator,
1755                                                    ap_max_mem_free);
1756                         apr_pool_create_ex(&ptrans, pconf, NULL, allocator);
1757                         apr_allocator_owner_set(allocator, ptrans);
1758                         if (ptrans == NULL) {
1759                             ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1760                                          ap_server_conf, APLOGNO(03097)
1761                                          "Failed to create transaction pool");
1762                             signal_threads(ST_GRACEFUL);
1763                             return NULL;
1764                         }
1765                     }
1766                     apr_pool_tag(ptrans, "transaction");
1767
1768                     get_worker(&have_idle_worker, 1, &workers_were_busy);
1769                     rc = lr->accept_func(&csd, lr, ptrans);
1770
1771                     /* later we trash rv and rely on csd to indicate
1772                      * success/failure
1773                      */
1774                     AP_DEBUG_ASSERT(rc == APR_SUCCESS || !csd);
1775
1776                     if (rc == APR_EGENERAL) {
1777                         /* E[NM]FILE, ENOMEM, etc */
1778                         resource_shortage = 1;
1779                         signal_threads(ST_GRACEFUL);
1780                     }
1781
1782                     if (csd != NULL) {
1783                         conns_this_child--;
1784                         if (push2worker(NULL, csd, ptrans) == APR_SUCCESS) {
1785                             have_idle_worker = 0;
1786                         }
1787                     }
1788                     else {
1789                         ap_push_pool(worker_queue_info, ptrans);
1790                     }
1791                 }
1792             }               /* if:else on pt->type */
1793             out_pfd++;
1794             num--;
1795         }                   /* while for processing poll */
1796
1797         /* XXX possible optimization: stash the current time for use as
1798          * r->request_time for new requests
1799          */
1800         /* We process the timeout queues here only when their overall next
1801          * expiry (read once above) is over. This happens accurately since
1802          * adding to the queues (in workers) can only decrease this expiry,
1803          * while latest ones are only taken into account here (in listener)
1804          * during queues' processing, with the lock held. This works both
1805          * with and without wake-ability.
1806          */
1807         if (timeout_time && timeout_time < (now = apr_time_now())) {
1808             timeout_time = now + TIMEOUT_FUDGE_FACTOR;
1809
1810             /* handle timed out sockets */
1811             apr_thread_mutex_lock(timeout_mutex);
1812
1813             /* Processing all the queues below will recompute this. */
1814             queues_next_expiry = 0;
1815
1816             /* Step 1: keepalive timeouts */
1817             if (workers_were_busy || dying) {
1818                 process_keepalive_queue(0); /* kill'em all \m/ */
1819             }
1820             else {
1821                 process_keepalive_queue(timeout_time);
1822             }
1823             /* Step 2: write completion timeouts */
1824             process_timeout_queue(write_completion_q, timeout_time,
1825                                   start_lingering_close_nonblocking);
1826             /* Step 3: (normal) lingering close completion timeouts */
1827             process_timeout_queue(linger_q, timeout_time,
1828                                   stop_lingering_close);
1829             /* Step 4: (short) lingering close completion timeouts */
1830             process_timeout_queue(short_linger_q, timeout_time,
1831                                   stop_lingering_close);
1832
1833             apr_thread_mutex_unlock(timeout_mutex);
1834
1835             ps->keep_alive = apr_atomic_read32(keepalive_q->total);
1836             ps->write_completion = apr_atomic_read32(write_completion_q->total);
1837             ps->connections = apr_atomic_read32(&connection_count);
1838             ps->suspended = apr_atomic_read32(&suspended_count);
1839             ps->lingering_close = apr_atomic_read32(&lingering_count);
1840         }
1841         else if ((workers_were_busy || dying)
1842                  && apr_atomic_read32(keepalive_q->total)) {
1843             apr_thread_mutex_lock(timeout_mutex);
1844             process_keepalive_queue(0); /* kill'em all \m/ */
1845             apr_thread_mutex_unlock(timeout_mutex);
1846             ps->keep_alive = 0;
1847         }
1848
1849         /* If there are some lingering closes to defer (to a worker), schedule
1850          * them now. We might wakeup a worker spuriously if another one empties
1851          * defer_linger_chain in the meantime, but there also may be no active
1852          * or all busy workers for an undefined time.  In any case a deferred
1853          * lingering close can't starve if we do that here since the chain is
1854          * filled only above in the listener and it's emptied only in the
1855          * worker(s); thus a NULL here means it will stay so while the listener
1856          * waits (possibly indefinitely) in poll().
1857          */
1858         if (defer_linger_chain) {
1859             get_worker(&have_idle_worker, 0, &workers_were_busy);
1860             if (have_idle_worker
1861                     && defer_linger_chain /* re-test */
1862                     && push2worker(NULL, NULL, NULL) == APR_SUCCESS) {
1863                 have_idle_worker = 0;
1864             }
1865         }
1866
1867         if (listeners_disabled && !workers_were_busy
1868             && (int)apr_atomic_read32(&connection_count)
1869                - (int)apr_atomic_read32(&lingering_count)
1870                < ((int)ap_queue_info_get_idlers(worker_queue_info) - 1)
1871                  * worker_factor / WORKER_FACTOR_SCALE + threads_per_child)
1872         {
1873             listeners_disabled = 0;
1874             enable_listensocks(process_slot);
1875         }
1876         /*
1877          * XXX: do we need to set some timeout that re-enables the listensocks
1878          * XXX: in case no other event occurs?
1879          */
1880     }     /* listener main loop */
1881
1882     close_listeners(process_slot, &closed);
1883     ap_queue_term(worker_queue);
1884
1885     apr_thread_exit(thd, APR_SUCCESS);
1886     return NULL;
1887 }
1888
1889 /*
1890  * During graceful shutdown, if there are more running worker threads than
1891  * open connections, exit one worker thread.
1892  *
1893  * return 1 if thread should exit, 0 if it should continue running.
1894  */
1895 static int worker_thread_should_exit_early(void)
1896 {
1897     for (;;) {
1898         apr_uint32_t conns = apr_atomic_read32(&connection_count);
1899         apr_uint32_t dead = apr_atomic_read32(&threads_shutdown);
1900         apr_uint32_t newdead;
1901
1902         AP_DEBUG_ASSERT(dead <= threads_per_child);
1903         if (conns >= threads_per_child - dead)
1904             return 0;
1905
1906         newdead = dead + 1;
1907         if (apr_atomic_cas32(&threads_shutdown, newdead, dead) == dead) {
1908             /*
1909              * No other thread has exited in the mean time, safe to exit
1910              * this one.
1911              */
1912             return 1;
1913         }
1914     }
1915 }
1916
1917 /* XXX For ungraceful termination/restart, we definitely don't want to
1918  *     wait for active connections to finish but we may want to wait
1919  *     for idle workers to get out of the queue code and release mutexes,
1920  *     since those mutexes are cleaned up pretty soon and some systems
1921  *     may not react favorably (i.e., segfault) if operations are attempted
1922  *     on cleaned-up mutexes.
1923  */
1924 static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy)
1925 {
1926     proc_info *ti = dummy;
1927     int process_slot = ti->pslot;
1928     int thread_slot = ti->tslot;
1929     apr_socket_t *csd = NULL;
1930     event_conn_state_t *cs;
1931     apr_pool_t *ptrans;         /* Pool for per-transaction stuff */
1932     apr_status_t rv;
1933     int is_idle = 0;
1934     timer_event_t *te = NULL;
1935
1936     free(ti);
1937
1938     ap_scoreboard_image->servers[process_slot][thread_slot].pid = ap_my_pid;
1939     ap_scoreboard_image->servers[process_slot][thread_slot].tid = apr_os_thread_current();
1940     ap_scoreboard_image->servers[process_slot][thread_slot].generation = retained->mpm->my_generation;
1941     ap_update_child_status_from_indexes(process_slot, thread_slot,
1942                                         SERVER_STARTING, NULL);
1943
1944     while (!workers_may_exit) {
1945         if (!is_idle) {
1946             rv = ap_queue_info_set_idle(worker_queue_info, NULL);
1947             if (rv != APR_SUCCESS) {
1948                 ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf,
1949                              "ap_queue_info_set_idle failed. Attempting to "
1950                              "shutdown process gracefully.");
1951                 signal_threads(ST_GRACEFUL);
1952                 break;
1953             }
1954             is_idle = 1;
1955         }
1956
1957         ap_update_child_status_from_indexes(process_slot, thread_slot,
1958                                             dying ? SERVER_GRACEFUL
1959                                                   : SERVER_READY, NULL);
1960       worker_pop:
1961         if (workers_may_exit) {
1962             break;
1963         }
1964         if (dying && worker_thread_should_exit_early()) {
1965             break;
1966         }
1967
1968         te = NULL;
1969         rv = ap_queue_pop_something(worker_queue, &csd, &cs, &ptrans, &te);
1970
1971         if (rv != APR_SUCCESS) {
1972             /* We get APR_EOF during a graceful shutdown once all the
1973              * connections accepted by this server process have been handled.
1974              */
1975             if (APR_STATUS_IS_EOF(rv)) {
1976                 break;
1977             }
1978             /* We get APR_EINTR whenever ap_queue_pop() has been interrupted
1979              * from an explicit call to ap_queue_interrupt_all(). This allows
1980              * us to unblock threads stuck in ap_queue_pop() when a shutdown
1981              * is pending.
1982              *
1983              * If workers_may_exit is set and this is ungraceful termination/
1984              * restart, we are bound to get an error on some systems (e.g.,
1985              * AIX, which sanity-checks mutex operations) since the queue
1986              * may have already been cleaned up.  Don't log the "error" if
1987              * workers_may_exit is set.
1988              */
1989             else if (APR_STATUS_IS_EINTR(rv)) {
1990                 goto worker_pop;
1991             }
1992             /* We got some other error. */
1993             else if (!workers_may_exit) {
1994                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
1995                              APLOGNO(03099) "ap_queue_pop failed");
1996             }
1997             continue;
1998         }
1999         if (te != NULL) {
2000             te->cbfunc(te->baton);
2001
2002             {
2003                 apr_thread_mutex_lock(g_timer_skiplist_mtx);
2004                 APR_RING_INSERT_TAIL(&timer_free_ring, te, timer_event_t, link);
2005                 apr_thread_mutex_unlock(g_timer_skiplist_mtx);
2006             }
2007         }
2008         else {
2009             is_idle = 0;
2010             if (csd != NULL) {
2011                 worker_sockets[thread_slot] = csd;
2012                 process_socket(thd, ptrans, csd, cs, process_slot, thread_slot);
2013                 worker_sockets[thread_slot] = NULL;
2014             }
2015         }
2016
2017         /* If there are deferred lingering closes, handle them now. */
2018         while (!workers_may_exit) {
2019             cs = defer_linger_chain;
2020             if (!cs) {
2021                 break;
2022             }
2023             if (apr_atomic_casptr((void *)&defer_linger_chain, cs->chain,
2024                                   cs) != cs) {
2025                 /* Race lost, try again */
2026                 continue;
2027             }
2028             cs->chain = NULL;
2029
2030             worker_sockets[thread_slot] = csd = cs->pfd.desc.s;
2031 #ifdef AP_DEBUG
2032             rv = apr_socket_timeout_set(csd, SECONDS_TO_LINGER);
2033             AP_DEBUG_ASSERT(rv == APR_SUCCESS);
2034 #else
2035             apr_socket_timeout_set(csd, SECONDS_TO_LINGER);
2036 #endif
2037             cs->pub.state = CONN_STATE_LINGER;
2038             process_socket(thd, cs->p, csd, cs, process_slot, thread_slot);
2039             worker_sockets[thread_slot] = NULL;
2040         }
2041     }
2042
2043     ap_update_child_status_from_indexes(process_slot, thread_slot,
2044                                         dying ? SERVER_DEAD
2045                                               : SERVER_GRACEFUL, NULL);
2046
2047     apr_thread_exit(thd, APR_SUCCESS);
2048     return NULL;
2049 }
2050
2051 static int check_signal(int signum)
2052 {
2053     switch (signum) {
2054     case SIGTERM:
2055     case SIGINT:
2056         return 1;
2057     }
2058     return 0;
2059 }
2060
2061
2062
2063 static void create_listener_thread(thread_starter * ts)
2064 {
2065     int my_child_num = ts->child_num_arg;
2066     apr_threadattr_t *thread_attr = ts->threadattr;
2067     proc_info *my_info;
2068     apr_status_t rv;
2069
2070     my_info = (proc_info *) ap_malloc(sizeof(proc_info));
2071     my_info->pslot = my_child_num;
2072     my_info->tslot = -1;      /* listener thread doesn't have a thread slot */
2073     rv = apr_thread_create(&ts->listener, thread_attr, listener_thread,
2074                            my_info, pchild);
2075     if (rv != APR_SUCCESS) {
2076         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf, APLOGNO(00474)
2077                      "apr_thread_create: unable to create listener thread");
2078         /* let the parent decide how bad this really is */
2079         clean_child_exit(APEXIT_CHILDSICK);
2080     }
2081     apr_os_thread_get(&listener_os_thread, ts->listener);
2082 }
2083
2084 /* XXX under some circumstances not understood, children can get stuck
2085  *     in start_threads forever trying to take over slots which will
2086  *     never be cleaned up; for now there is an APLOG_DEBUG message issued
2087  *     every so often when this condition occurs
2088  */
2089 static void *APR_THREAD_FUNC start_threads(apr_thread_t * thd, void *dummy)
2090 {
2091     thread_starter *ts = dummy;
2092     apr_thread_t **threads = ts->threads;
2093     apr_threadattr_t *thread_attr = ts->threadattr;
2094     int my_child_num = ts->child_num_arg;
2095     proc_info *my_info;
2096     apr_status_t rv;
2097     int i;
2098     int threads_created = 0;
2099     int listener_started = 0;
2100     int loops;
2101     int prev_threads_created;
2102     int max_recycled_pools = -1;
2103     int good_methods[] = {APR_POLLSET_KQUEUE, APR_POLLSET_PORT, APR_POLLSET_EPOLL};
2104     /* XXX don't we need more to handle K-A or lingering close? */
2105     const apr_uint32_t pollset_size = threads_per_child * 2;
2106
2107     /* We must create the fd queues before we start up the listener
2108      * and worker threads. */
2109     worker_queue = apr_pcalloc(pchild, sizeof(*worker_queue));
2110     rv = ap_queue_init(worker_queue, threads_per_child, pchild);
2111     if (rv != APR_SUCCESS) {
2112         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf, APLOGNO(03100)
2113                      "ap_queue_init() failed");
2114         clean_child_exit(APEXIT_CHILDFATAL);
2115     }
2116
2117     if (ap_max_mem_free != APR_ALLOCATOR_MAX_FREE_UNLIMITED) {
2118         /* If we want to conserve memory, let's not keep an unlimited number of
2119          * pools & allocators.
2120          * XXX: This should probably be a separate config directive
2121          */
2122         max_recycled_pools = threads_per_child * 3 / 4 ;
2123     }
2124     rv = ap_queue_info_create(&worker_queue_info, pchild,
2125                               threads_per_child, max_recycled_pools);
2126     if (rv != APR_SUCCESS) {
2127         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf, APLOGNO(03101)
2128                      "ap_queue_info_create() failed");
2129         clean_child_exit(APEXIT_CHILDFATAL);
2130     }
2131
2132     /* Create the timeout mutex and main pollset before the listener
2133      * thread starts.
2134      */
2135     rv = apr_thread_mutex_create(&timeout_mutex, APR_THREAD_MUTEX_DEFAULT,
2136                                  pchild);
2137     if (rv != APR_SUCCESS) {
2138         ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(03102)
2139                      "creation of the timeout mutex failed.");
2140         clean_child_exit(APEXIT_CHILDFATAL);
2141     }
2142
2143     /* Create the main pollset */
2144     for (i = 0; i < sizeof(good_methods) / sizeof(good_methods[0]); i++) {
2145         apr_uint32_t flags = APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY |
2146                              APR_POLLSET_NODEFAULT | APR_POLLSET_WAKEABLE;
2147         rv = apr_pollset_create_ex(&event_pollset, pollset_size, pchild, flags,
2148                                    good_methods[i]);
2149         if (rv == APR_SUCCESS) {
2150             listener_is_wakeable = 1;
2151             break;
2152         }
2153         flags &= ~APR_POLLSET_WAKEABLE;
2154         rv = apr_pollset_create_ex(&event_pollset, pollset_size, pchild, flags,
2155                                    good_methods[i]);
2156         if (rv == APR_SUCCESS) {
2157             break;
2158         }
2159     }
2160     if (rv != APR_SUCCESS) {
2161         rv = apr_pollset_create(&event_pollset, pollset_size, pchild,
2162                                 APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY);
2163     }
2164     if (rv != APR_SUCCESS) {
2165         ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(03103)
2166                      "apr_pollset_create with Thread Safety failed.");
2167         clean_child_exit(APEXIT_CHILDFATAL);
2168     }
2169
2170     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(02471)
2171                  "start_threads: Using %s (%swakeable)",
2172                  apr_pollset_method_name(event_pollset),
2173                  listener_is_wakeable ? "" : "not ");
2174     worker_sockets = apr_pcalloc(pchild, threads_per_child
2175                                  * sizeof(apr_socket_t *));
2176
2177     loops = prev_threads_created = 0;
2178     while (1) {
2179         /* threads_per_child does not include the listener thread */
2180         for (i = 0; i < threads_per_child; i++) {
2181             int status =
2182                 ap_scoreboard_image->servers[my_child_num][i].status;
2183
2184             if (status != SERVER_DEAD) {
2185                 continue;
2186             }
2187
2188             my_info = (proc_info *) ap_malloc(sizeof(proc_info));
2189             my_info->pslot = my_child_num;
2190             my_info->tslot = i;
2191
2192             /* We are creating threads right now */
2193             ap_update_child_status_from_indexes(my_child_num, i,
2194                                                 SERVER_STARTING, NULL);
2195             /* We let each thread update its own scoreboard entry.  This is
2196              * done because it lets us deal with tid better.
2197              */
2198             rv = apr_thread_create(&threads[i], thread_attr,
2199                                    worker_thread, my_info, pchild);
2200             if (rv != APR_SUCCESS) {
2201                 ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
2202                              APLOGNO(03104)
2203                              "apr_thread_create: unable to create worker thread");
2204                 /* let the parent decide how bad this really is */
2205                 clean_child_exit(APEXIT_CHILDSICK);
2206             }
2207             threads_created++;
2208         }
2209
2210         /* Start the listener only when there are workers available */
2211         if (!listener_started && threads_created) {
2212             create_listener_thread(ts);
2213             listener_started = 1;
2214         }
2215
2216
2217         if (start_thread_may_exit || threads_created == threads_per_child) {
2218             break;
2219         }
2220         /* wait for previous generation to clean up an entry */
2221         apr_sleep(apr_time_from_sec(1));
2222         ++loops;
2223         if (loops % 120 == 0) { /* every couple of minutes */
2224             if (prev_threads_created == threads_created) {
2225                 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
2226                              "child %" APR_PID_T_FMT " isn't taking over "
2227                              "slots very quickly (%d of %d)",
2228                              ap_my_pid, threads_created,
2229                              threads_per_child);
2230             }
2231             prev_threads_created = threads_created;
2232         }
2233     }
2234
2235     /* What state should this child_main process be listed as in the
2236      * scoreboard...?
2237      *  ap_update_child_status_from_indexes(my_child_num, i, SERVER_STARTING,
2238      *                                      (request_rec *) NULL);
2239      *
2240      *  This state should be listed separately in the scoreboard, in some kind
2241      *  of process_status, not mixed in with the worker threads' status.
2242      *  "life_status" is almost right, but it's in the worker's structure, and
2243      *  the name could be clearer.   gla
2244      */
2245     apr_thread_exit(thd, APR_SUCCESS);
2246     return NULL;
2247 }
2248
2249 static void join_workers(apr_thread_t * listener, apr_thread_t ** threads)
2250 {
2251     int i;
2252     apr_status_t rv, thread_rv;
2253
2254     if (listener) {
2255         int iter;
2256
2257         /* deal with a rare timing window which affects waking up the
2258          * listener thread...  if the signal sent to the listener thread
2259          * is delivered between the time it verifies that the
2260          * listener_may_exit flag is clear and the time it enters a
2261          * blocking syscall, the signal didn't do any good...  work around
2262          * that by sleeping briefly and sending it again
2263          */
2264
2265         iter = 0;
2266         while (iter < 10 && !dying) {
2267             /* listener has not stopped accepting yet */
2268             apr_sleep(apr_time_make(0, 500000));
2269             wakeup_listener();
2270             ++iter;
2271         }
2272         if (iter >= 10) {
2273             ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00475)
2274                          "the listener thread didn't stop accepting");
2275         }
2276         else {
2277             rv = apr_thread_join(&thread_rv, listener);
2278             if (rv != APR_SUCCESS) {
2279                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf, APLOGNO(00476)
2280                              "apr_thread_join: unable to join listener thread");
2281             }
2282         }
2283     }
2284
2285     for (i = 0; i < threads_per_child; i++) {
2286         if (threads[i]) {       /* if we ever created this thread */
2287             rv = apr_thread_join(&thread_rv, threads[i]);
2288             if (rv != APR_SUCCESS) {
2289                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf, APLOGNO(00477)
2290                              "apr_thread_join: unable to join worker "
2291                              "thread %d", i);
2292             }
2293         }
2294     }
2295 }
2296
2297 static void join_start_thread(apr_thread_t * start_thread_id)
2298 {
2299     apr_status_t rv, thread_rv;
2300
2301     start_thread_may_exit = 1;  /* tell it to give up in case it is still
2302                                  * trying to take over slots from a
2303                                  * previous generation
2304                                  */
2305     rv = apr_thread_join(&thread_rv, start_thread_id);
2306     if (rv != APR_SUCCESS) {
2307         ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf, APLOGNO(00478)
2308                      "apr_thread_join: unable to join the start " "thread");
2309     }
2310 }
2311
2312 static void child_main(int child_num_arg, int child_bucket)
2313 {
2314     apr_thread_t **threads;
2315     apr_status_t rv;
2316     thread_starter *ts;
2317     apr_threadattr_t *thread_attr;
2318     apr_thread_t *start_thread_id;
2319     int i;
2320
2321     /* for benefit of any hooks that run as this child initializes */
2322     retained->mpm->mpm_state = AP_MPMQ_STARTING;
2323
2324     ap_my_pid = getpid();
2325     ap_fatal_signal_child_setup(ap_server_conf);
2326     apr_pool_create(&pchild, pconf);
2327
2328     /* close unused listeners and pods */
2329     for (i = 0; i < retained->mpm->num_buckets; i++) {
2330         if (i != child_bucket) {
2331             ap_close_listeners_ex(all_buckets[i].listeners);
2332             ap_mpm_podx_close(all_buckets[i].pod);
2333         }
2334     }
2335
2336     /*stuff to do before we switch id's, so we have permissions. */
2337     ap_reopen_scoreboard(pchild, NULL, 0);
2338
2339     if (ap_run_drop_privileges(pchild, ap_server_conf)) {
2340         clean_child_exit(APEXIT_CHILDFATAL);
2341     }
2342
2343     apr_thread_mutex_create(&g_timer_skiplist_mtx, APR_THREAD_MUTEX_DEFAULT, pchild);
2344     APR_RING_INIT(&timer_free_ring, timer_event_t, link);
2345     apr_skiplist_init(&timer_skiplist, pchild);
2346     apr_skiplist_set_compare(timer_skiplist, timer_comp, timer_comp);
2347     ap_run_child_init(pchild, ap_server_conf);
2348
2349     /* done with init critical section */
2350
2351     /* Just use the standard apr_setup_signal_thread to block all signals
2352      * from being received.  The child processes no longer use signals for
2353      * any communication with the parent process.
2354      */
2355     rv = apr_setup_signal_thread();
2356     if (rv != APR_SUCCESS) {
2357         ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf, APLOGNO(00479)
2358                      "Couldn't initialize signal thread");
2359         clean_child_exit(APEXIT_CHILDFATAL);
2360     }
2361
2362     if (ap_max_requests_per_child) {
2363         conns_this_child = ap_max_requests_per_child;
2364     }
2365     else {
2366         /* coding a value of zero means infinity */
2367         conns_this_child = APR_INT32_MAX;
2368     }
2369
2370     /* Setup worker threads */
2371
2372     /* clear the storage; we may not create all our threads immediately,
2373      * and we want a 0 entry to indicate a thread which was not created
2374      */
2375     threads = ap_calloc(threads_per_child, sizeof(apr_thread_t *));
2376     ts = apr_palloc(pchild, sizeof(*ts));
2377
2378     apr_threadattr_create(&thread_attr, pchild);
2379     /* 0 means PTHREAD_CREATE_JOINABLE */
2380     apr_threadattr_detach_set(thread_attr, 0);
2381
2382     if (ap_thread_stacksize != 0) {
2383         rv = apr_threadattr_stacksize_set(thread_attr, ap_thread_stacksize);
2384         if (rv != APR_SUCCESS && rv != APR_ENOTIMPL) {
2385             ap_log_error(APLOG_MARK, APLOG_WARNING, rv, ap_server_conf, APLOGNO(02436)
2386                          "WARNING: ThreadStackSize of %" APR_SIZE_T_FMT " is "
2387                          "inappropriate, using default",
2388                          ap_thread_stacksize);
2389         }
2390     }
2391
2392     ts->threads = threads;
2393     ts->listener = NULL;
2394     ts->child_num_arg = child_num_arg;
2395     ts->threadattr = thread_attr;
2396
2397     rv = apr_thread_create(&start_thread_id, thread_attr, start_threads,
2398                            ts, pchild);
2399     if (rv != APR_SUCCESS) {
2400         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf, APLOGNO(00480)
2401                      "apr_thread_create: unable to create worker thread");
2402         /* let the parent decide how bad this really is */
2403         clean_child_exit(APEXIT_CHILDSICK);
2404     }
2405
2406     retained->mpm->mpm_state = AP_MPMQ_RUNNING;
2407
2408     /* If we are only running in one_process mode, we will want to
2409      * still handle signals. */
2410     if (one_process) {
2411         /* Block until we get a terminating signal. */
2412         apr_signal_thread(check_signal);
2413         /* make sure the start thread has finished; signal_threads()
2414          * and join_workers() depend on that
2415          */
2416         /* XXX join_start_thread() won't be awakened if one of our
2417          *     threads encounters a critical error and attempts to
2418          *     shutdown this child
2419          */
2420         join_start_thread(start_thread_id);
2421
2422         /* helps us terminate a little more quickly than the dispatch of the
2423          * signal thread; beats the Pipe of Death and the browsers
2424          */
2425         signal_threads(ST_UNGRACEFUL);
2426
2427         /* A terminating signal was received. Now join each of the
2428          * workers to clean them up.
2429          *   If the worker already exited, then the join frees
2430          *   their resources and returns.
2431          *   If the worker hasn't exited, then this blocks until
2432          *   they have (then cleans up).
2433          */
2434         join_workers(ts->listener, threads);
2435     }
2436     else {                      /* !one_process */
2437         /* remove SIGTERM from the set of blocked signals...  if one of
2438          * the other threads in the process needs to take us down
2439          * (e.g., for MaxConnectionsPerChild) it will send us SIGTERM
2440          */
2441         unblock_signal(SIGTERM);
2442         apr_signal(SIGTERM, dummy_signal_handler);
2443         /* Watch for any messages from the parent over the POD */
2444         while (1) {
2445             rv = ap_mpm_podx_check(my_bucket->pod);
2446             if (rv == AP_MPM_PODX_NORESTART) {
2447                 /* see if termination was triggered while we slept */
2448                 switch (terminate_mode) {
2449                 case ST_GRACEFUL:
2450                     rv = AP_MPM_PODX_GRACEFUL;
2451                     break;
2452                 case ST_UNGRACEFUL:
2453                     rv = AP_MPM_PODX_RESTART;
2454                     break;
2455                 }
2456             }
2457             if (rv == AP_MPM_PODX_GRACEFUL || rv == AP_MPM_PODX_RESTART) {
2458                 /* make sure the start thread has finished;
2459                  * signal_threads() and join_workers depend on that
2460                  */
2461                 join_start_thread(start_thread_id);
2462                 signal_threads(rv ==
2463                                AP_MPM_PODX_GRACEFUL ? ST_GRACEFUL : ST_UNGRACEFUL);
2464                 break;
2465             }
2466         }
2467
2468         /* A terminating signal was received. Now join each of the
2469          * workers to clean them up.
2470          *   If the worker already exited, then the join frees
2471          *   their resources and returns.
2472          *   If the worker hasn't exited, then this blocks until
2473          *   they have (then cleans up).
2474          */
2475         join_workers(ts->listener, threads);
2476     }
2477
2478     free(threads);
2479
2480     clean_child_exit(resource_shortage ? APEXIT_CHILDSICK : 0);
2481 }
2482
2483 static int make_child(server_rec * s, int slot, int bucket)
2484 {
2485     int pid;
2486
2487     if (slot + 1 > retained->max_daemons_limit) {
2488         retained->max_daemons_limit = slot + 1;
2489     }
2490
2491     if (ap_scoreboard_image->parent[slot].pid != 0) {
2492         /* XXX replace with assert or remove ? */
2493         ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(03455)
2494                  "BUG: Scoreboard slot %d should be empty but is "
2495                  "in use by pid %" APR_PID_T_FMT,
2496                  slot, ap_scoreboard_image->parent[slot].pid);
2497         return -1;
2498     }
2499
2500     if (one_process) {
2501         my_bucket = &all_buckets[0];
2502
2503         event_note_child_started(slot, getpid());
2504         child_main(slot, 0);
2505         /* NOTREACHED */
2506         ap_assert(0);
2507         return -1;
2508     }
2509
2510     if ((pid = fork()) == -1) {
2511         ap_log_error(APLOG_MARK, APLOG_ERR, errno, s, APLOGNO(00481)
2512                      "fork: Unable to fork new process");
2513
2514         /* fork didn't succeed.  There's no need to touch the scoreboard;
2515          * if we were trying to replace a failed child process, then
2516          * server_main_loop() marked its workers SERVER_DEAD, and if
2517          * we were trying to replace a child process that exited normally,
2518          * its worker_thread()s left SERVER_DEAD or SERVER_GRACEFUL behind.
2519          */
2520
2521         /* In case system resources are maxxed out, we don't want
2522            Apache running away with the CPU trying to fork over and
2523            over and over again. */
2524         apr_sleep(apr_time_from_sec(10));
2525
2526         return -1;
2527     }
2528
2529     if (!pid) {
2530         my_bucket = &all_buckets[bucket];
2531
2532 #ifdef HAVE_BINDPROCESSOR
2533         /* By default, AIX binds to a single processor.  This bit unbinds
2534          * children which will then bind to another CPU.
2535          */
2536         int status = bindprocessor(BINDPROCESS, (int) getpid(),
2537                                    PROCESSOR_CLASS_ANY);
2538         if (status != OK)
2539             ap_log_error(APLOG_MARK, APLOG_DEBUG, errno,
2540                          ap_server_conf, APLOGNO(00482)
2541                          "processor unbind failed");
2542 #endif
2543         RAISE_SIGSTOP(MAKE_CHILD);
2544
2545         apr_signal(SIGTERM, just_die);
2546         child_main(slot, bucket);
2547         /* NOTREACHED */
2548         ap_assert(0);
2549         return -1;
2550     }
2551
2552     ap_scoreboard_image->parent[slot].quiescing = 0;
2553     ap_scoreboard_image->parent[slot].not_accepting = 0;
2554     ap_scoreboard_image->parent[slot].bucket = bucket;
2555     event_note_child_started(slot, pid);
2556     active_daemons++;
2557     retained->total_daemons++;
2558     return 0;
2559 }
2560
2561 /* start up a bunch of children */
2562 static void startup_children(int number_to_start)
2563 {
2564     int i;
2565
2566     for (i = 0; number_to_start && i < server_limit; ++i) {
2567         if (ap_scoreboard_image->parent[i].pid != 0) {
2568             continue;
2569         }
2570         if (make_child(ap_server_conf, i, i % retained->mpm->num_buckets) < 0) {
2571             break;
2572         }
2573         --number_to_start;
2574     }
2575 }
2576
2577 static void perform_idle_server_maintenance(int child_bucket, int num_buckets)
2578 {
2579     int i, j;
2580     int idle_thread_count = 0;
2581     worker_score *ws;
2582     process_score *ps;
2583     int free_length = 0;
2584     int free_slots[MAX_SPAWN_RATE];
2585     int last_non_dead = -1;
2586     int active_thread_count = 0;
2587
2588     for (i = 0; i < server_limit; ++i) {
2589         /* Initialization to satisfy the compiler. It doesn't know
2590          * that threads_per_child is always > 0 */
2591         int status = SERVER_DEAD;
2592         int child_threads_active = 0;
2593
2594         if (i >= retained->max_daemons_limit &&
2595             free_length == retained->idle_spawn_rate[child_bucket]) {
2596             /* short cut if all active processes have been examined and
2597              * enough empty scoreboard slots have been found
2598              */
2599
2600             break;
2601         }
2602         ps = &ap_scoreboard_image->parent[i];
2603         if (ps->pid != 0) {
2604             for (j = 0; j < threads_per_child; j++) {
2605                 ws = &ap_scoreboard_image->servers[i][j];
2606                 status = ws->status;
2607
2608                 /* We consider a starting server as idle because we started it
2609                  * at least a cycle ago, and if it still hasn't finished starting
2610                  * then we're just going to swamp things worse by forking more.
2611                  * So we hopefully won't need to fork more if we count it.
2612                  * This depends on the ordering of SERVER_READY and SERVER_STARTING.
2613                  */
2614                 if (status <= SERVER_READY && !ps->quiescing && !ps->not_accepting
2615                     && ps->generation == retained->mpm->my_generation
2616                     && ps->bucket == child_bucket)
2617                 {
2618                     ++idle_thread_count;
2619                 }
2620                 if (status >= SERVER_READY && status < SERVER_GRACEFUL) {
2621                     ++child_threads_active;
2622                 }
2623             }
2624             last_non_dead = i;
2625         }
2626         active_thread_count += child_threads_active;
2627         if (!ps->pid && free_length < retained->idle_spawn_rate[child_bucket])
2628             free_slots[free_length++] = i;
2629         else if (child_threads_active == threads_per_child)
2630             had_healthy_child = 1;
2631     }
2632
2633     if (retained->sick_child_detected) {
2634         if (had_healthy_child) {
2635             /* Assume this is a transient error, even though it may not be.  Leave
2636              * the server up in case it is able to serve some requests or the
2637              * problem will be resolved.
2638              */
2639             retained->sick_child_detected = 0;
2640         }
2641         else {
2642             /* looks like a basket case, as no child ever fully initialized; give up.
2643              */
2644             retained->mpm->shutdown_pending = 1;
2645             child_fatal = 1;
2646             ap_log_error(APLOG_MARK, APLOG_ALERT, 0,
2647                          ap_server_conf, APLOGNO(02324)
2648                          "A resource shortage or other unrecoverable failure "
2649                          "was encountered before any child process initialized "
2650                          "successfully... httpd is exiting!");
2651             /* the child already logged the failure details */
2652             return;
2653         }
2654     }
2655
2656     retained->max_daemons_limit = last_non_dead + 1;
2657
2658     if (idle_thread_count > max_spare_threads / num_buckets)
2659     {
2660         /*
2661          * Child processes that we ask to shut down won't die immediately
2662          * but may stay around for a long time when they finish their
2663          * requests. If the server load changes many times, many such
2664          * gracefully finishing processes may accumulate, filling up the
2665          * scoreboard. To avoid running out of scoreboard entries, we
2666          * don't shut down more processes when the total number of processes
2667          * is high.
2668          *
2669          * XXX It would be nice if we could
2670          * XXX - kill processes without keepalive connections first
2671          * XXX - tell children to stop accepting new connections, and
2672          * XXX   depending on server load, later be able to resurrect them
2673          *       or kill them
2674          */
2675         if (retained->total_daemons <= active_daemons_limit &&
2676             retained->total_daemons < server_limit) {
2677             /* Kill off one child */
2678             ap_mpm_podx_signal(all_buckets[child_bucket].pod,
2679                                AP_MPM_PODX_GRACEFUL);
2680             retained->idle_spawn_rate[child_bucket] = 1;
2681             active_daemons--;
2682         } else {
2683             ap_log_error(APLOG_MARK, APLOG_TRACE5, 0, ap_server_conf,
2684                          "Not shutting down child: total daemons %d / "
2685                          "active limit %d / ServerLimit %d",
2686                          retained->total_daemons, active_daemons_limit,
2687                          server_limit);
2688         }
2689     }
2690     else if (idle_thread_count < min_spare_threads / num_buckets) {
2691         if (active_thread_count >= max_workers) {
2692             if (!retained->maxclients_reported) {
2693                 /* only report this condition once */
2694                 ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(00484)
2695                              "server reached MaxRequestWorkers setting, "
2696                              "consider raising the MaxRequestWorkers "
2697                              "setting");
2698                 retained->maxclients_reported = 1;
2699             }
2700             retained->idle_spawn_rate[child_bucket] = 1;
2701         }
2702         else if (free_length == 0) { /* scoreboard is full, can't fork */
2703             ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(03490)
2704                          "scoreboard is full, not at MaxRequestWorkers."
2705                          "Increase ServerLimit.");
2706             retained->idle_spawn_rate[child_bucket] = 1;
2707         }
2708         else {
2709             if (free_length > retained->idle_spawn_rate[child_bucket]) {
2710                 free_length = retained->idle_spawn_rate[child_bucket];
2711             }
2712             if (retained->idle_spawn_rate[child_bucket] >= 8) {
2713                 ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, APLOGNO(00486)
2714                              "server seems busy, (you may need "
2715                              "to increase StartServers, ThreadsPerChild "
2716                              "or Min/MaxSpareThreads), "
2717                              "spawning %d children, there are around %d idle "
2718                              "threads, %d active children, and %d children "
2719                              "that are shutting down", free_length,
2720                              idle_thread_count, active_daemons,
2721                              retained->total_daemons);
2722             }
2723             for (i = 0; i < free_length; ++i) {
2724                 ap_log_error(APLOG_MARK, APLOG_TRACE5, 0, ap_server_conf,
2725                              "Spawning new child: slot %d active / "
2726                              "total daemons: %d/%d",
2727                              free_slots[i], active_daemons,
2728                              retained->total_daemons);
2729                 make_child(ap_server_conf, free_slots[i], child_bucket);
2730             }
2731             /* the next time around we want to spawn twice as many if this
2732              * wasn't good enough, but not if we've just done a graceful
2733              */
2734             if (retained->hold_off_on_exponential_spawning) {
2735                 --retained->hold_off_on_exponential_spawning;
2736             }
2737             else if (retained->idle_spawn_rate[child_bucket]
2738                      < MAX_SPAWN_RATE / num_buckets) {
2739                 retained->idle_spawn_rate[child_bucket] *= 2;
2740             }
2741         }
2742     }
2743     else {
2744         retained->idle_spawn_rate[child_bucket] = 1;
2745     }
2746 }
2747
2748 static void server_main_loop(int remaining_children_to_start, int num_buckets)
2749 {
2750     int child_slot;
2751     apr_exit_why_e exitwhy;
2752     int status, processed_status;
2753     apr_proc_t pid;
2754     int i;
2755
2756     while (!retained->mpm->restart_pending && !retained->mpm->shutdown_pending) {
2757         ap_wait_or_timeout(&exitwhy, &status, &pid, pconf, ap_server_conf);
2758
2759         if (pid.pid != -1) {
2760             processed_status = ap_process_child_status(&pid, exitwhy, status);
2761             child_slot = ap_find_child_by_pid(&pid);
2762             if (processed_status == APEXIT_CHILDFATAL) {
2763                 /* fix race condition found in PR 39311
2764                  * A child created at the same time as a graceful happens
2765                  * can find the lock missing and create a fatal error.
2766                  * It is not fatal for the last generation to be in this state.
2767                  */
2768                 if (child_slot < 0
2769                     || ap_get_scoreboard_process(child_slot)->generation
2770                        == retained->mpm->my_generation) {
2771                     retained->mpm->shutdown_pending = 1;
2772                     child_fatal = 1;
2773                     /*
2774                      * total_daemons counting will be off now, but as we
2775                      * are shutting down, that is not an issue anymore.
2776                      */
2777                     return;
2778                 }
2779                 else {
2780                     ap_log_error(APLOG_MARK, APLOG_WARNING, 0, ap_server_conf, APLOGNO(00487)
2781                                  "Ignoring fatal error in child of previous "
2782                                  "generation (pid %ld).",
2783                                  (long)pid.pid);
2784                     retained->sick_child_detected = 1;
2785                 }
2786             }
2787             else if (processed_status == APEXIT_CHILDSICK) {
2788                 /* tell perform_idle_server_maintenance to check into this
2789                  * on the next timer pop
2790                  */
2791                 retained->sick_child_detected = 1;
2792             }
2793             /* non-fatal death... note that it's gone in the scoreboard. */
2794             if (child_slot >= 0) {
2795                 process_score *ps;
2796
2797                 for (i = 0; i < threads_per_child; i++)
2798                     ap_update_child_status_from_indexes(child_slot, i,
2799                                                         SERVER_DEAD, NULL);
2800
2801                 event_note_child_killed(child_slot, 0, 0);
2802                 ps = &ap_scoreboard_image->parent[child_slot];
2803                 if (!ps->quiescing)
2804                     active_daemons--;
2805                 ps->quiescing = 0;
2806                 /* NOTE: We don't dec in the (child_slot < 0) case! */
2807                 retained->total_daemons--;
2808                 if (processed_status == APEXIT_CHILDSICK) {
2809                     /* resource shortage, minimize the fork rate */
2810                     retained->idle_spawn_rate[ps->bucket] = 1;
2811                 }
2812                 else if (remaining_children_to_start) {
2813                     /* we're still doing a 1-for-1 replacement of dead
2814                      * children with new children
2815                      */
2816                     make_child(ap_server_conf, child_slot, ps->bucket);
2817                     --remaining_children_to_start;
2818                 }
2819             }
2820 #if APR_HAS_OTHER_CHILD
2821             else if (apr_proc_other_child_alert(&pid, APR_OC_REASON_DEATH,
2822                                                 status) == 0) {
2823                 /* handled */
2824             }
2825 #endif
2826             else if (retained->mpm->was_graceful) {
2827                 /* Great, we've probably just lost a slot in the
2828                  * scoreboard.  Somehow we don't know about this child.
2829                  */
2830                 ap_log_error(APLOG_MARK, APLOG_WARNING, 0,
2831                              ap_server_conf, APLOGNO(00488)
2832                              "long lost child came home! (pid %ld)",
2833                              (long) pid.pid);
2834             }
2835             /* Don't perform idle maintenance when a child dies,
2836              * only do it when there's a timeout.  Remember only a
2837              * finite number of children can die, and it's pretty
2838              * pathological for a lot to die suddenly.
2839              */
2840             continue;
2841         }
2842         else if (remaining_children_to_start) {
2843             /* we hit a 1 second timeout in which none of the previous
2844              * generation of children needed to be reaped... so assume
2845              * they're all done, and pick up the slack if any is left.
2846              */
2847             startup_children(remaining_children_to_start);
2848             remaining_children_to_start = 0;
2849             /* In any event we really shouldn't do the code below because
2850              * few of the servers we just started are in the IDLE state
2851              * yet, so we'd mistakenly create an extra server.
2852              */
2853             continue;
2854         }
2855
2856         for (i = 0; i < num_buckets; i++) {
2857             perform_idle_server_maintenance(i, num_buckets);
2858         }
2859     }
2860 }
2861
2862 static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
2863 {
2864     int num_buckets = retained->mpm->num_buckets;
2865     int remaining_children_to_start;
2866     int i;
2867
2868     ap_log_pid(pconf, ap_pid_fname);
2869
2870     if (!retained->mpm->was_graceful) {
2871         if (ap_run_pre_mpm(s->process->pool, SB_SHARED) != OK) {
2872             retained->mpm->mpm_state = AP_MPMQ_STOPPING;
2873             return !OK;
2874         }
2875         /* fix the generation number in the global score; we just got a new,
2876          * cleared scoreboard
2877          */
2878         ap_scoreboard_image->global->running_generation = retained->mpm->my_generation;
2879     }
2880
2881     if (!one_process) {
2882         ap_fatal_signal_setup(ap_server_conf, pconf);
2883     }
2884     ap_unixd_mpm_set_signals(pconf, one_process);
2885
2886     /* Don't thrash since num_buckets depends on the
2887      * system and the number of online CPU cores...
2888      */
2889     if (active_daemons_limit < num_buckets)
2890         active_daemons_limit = num_buckets;
2891     if (ap_daemons_to_start < num_buckets)
2892         ap_daemons_to_start = num_buckets;
2893     /* We want to create as much children at a time as the number of buckets,
2894      * so to optimally accept connections (evenly distributed across buckets).
2895      * Thus min_spare_threads should at least maintain num_buckets children,
2896      * and max_spare_threads allow num_buckets more children w/o triggering
2897      * immediately (e.g. num_buckets idle threads margin, one per bucket).
2898      */
2899     if (min_spare_threads < threads_per_child * (num_buckets - 1) + num_buckets)
2900         min_spare_threads = threads_per_child * (num_buckets - 1) + num_buckets;
2901     if (max_spare_threads < min_spare_threads + (threads_per_child + 1) * num_buckets)
2902         max_spare_threads = min_spare_threads + (threads_per_child + 1) * num_buckets;
2903
2904     /* If we're doing a graceful_restart then we're going to see a lot
2905      * of children exiting immediately when we get into the main loop
2906      * below (because we just sent them AP_SIG_GRACEFUL).  This happens pretty
2907      * rapidly... and for each one that exits we may start a new one, until
2908      * there are at least min_spare_threads idle threads, counting across
2909      * all children.  But we may be permitted to start more children than
2910      * that, so we'll just keep track of how many we're
2911      * supposed to start up without the 1 second penalty between each fork.
2912      */
2913     remaining_children_to_start = ap_daemons_to_start;
2914     if (remaining_children_to_start > active_daemons_limit) {
2915         remaining_children_to_start = active_daemons_limit;
2916     }
2917     if (!retained->mpm->was_graceful) {
2918         startup_children(remaining_children_to_start);
2919         remaining_children_to_start = 0;
2920     }
2921     else {
2922         /* give the system some time to recover before kicking into
2923          * exponential mode */
2924         retained->hold_off_on_exponential_spawning = 10;
2925     }
2926
2927     ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00489)
2928                  "%s configured -- resuming normal operations",
2929                  ap_get_server_description());
2930     ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, APLOGNO(00490)
2931                  "Server built: %s", ap_get_server_built());
2932     ap_log_command_line(plog, s);
2933     ap_log_mpm_common(s);
2934
2935     retained->mpm->mpm_state = AP_MPMQ_RUNNING;
2936
2937     server_main_loop(remaining_children_to_start, num_buckets);
2938     retained->mpm->mpm_state = AP_MPMQ_STOPPING;
2939
2940     if (retained->mpm->shutdown_pending && retained->mpm->is_ungraceful) {
2941         /* Time to shut down:
2942          * Kill child processes, tell them to call child_exit, etc...
2943          */
2944         for (i = 0; i < num_buckets; i++) {
2945             ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
2946                                AP_MPM_PODX_RESTART);
2947         }
2948         ap_reclaim_child_processes(1, /* Start with SIGTERM */
2949                                    event_note_child_killed);
2950
2951         if (!child_fatal) {
2952             /* cleanup pid file on normal shutdown */
2953             ap_remove_pid(pconf, ap_pid_fname);
2954             ap_log_error(APLOG_MARK, APLOG_NOTICE, 0,
2955                          ap_server_conf, APLOGNO(00491) "caught SIGTERM, shutting down");
2956         }
2957
2958         return DONE;
2959     }
2960
2961     if (retained->mpm->shutdown_pending) {
2962         /* Time to gracefully shut down:
2963          * Kill child processes, tell them to call child_exit, etc...
2964          */
2965         int active_children;
2966         int index;
2967         apr_time_t cutoff = 0;
2968
2969         /* Close our listeners, and then ask our children to do same */
2970         ap_close_listeners();
2971         for (i = 0; i < num_buckets; i++) {
2972             ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
2973                                AP_MPM_PODX_GRACEFUL);
2974         }
2975         ap_relieve_child_processes(event_note_child_killed);
2976
2977         if (!child_fatal) {
2978             /* cleanup pid file on normal shutdown */
2979             ap_remove_pid(pconf, ap_pid_fname);
2980             ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00492)
2981                          "caught " AP_SIG_GRACEFUL_STOP_STRING
2982                          ", shutting down gracefully");
2983         }
2984
2985         if (ap_graceful_shutdown_timeout) {
2986             cutoff = apr_time_now() +
2987                      apr_time_from_sec(ap_graceful_shutdown_timeout);
2988         }
2989
2990         /* Don't really exit until each child has finished */
2991         retained->mpm->shutdown_pending = 0;
2992         do {
2993             /* Pause for a second */
2994             apr_sleep(apr_time_from_sec(1));
2995
2996             /* Relieve any children which have now exited */
2997             ap_relieve_child_processes(event_note_child_killed);
2998
2999             active_children = 0;
3000             for (index = 0; index < retained->max_daemons_limit; ++index) {
3001                 if (ap_mpm_safe_kill(MPM_CHILD_PID(index), 0) == APR_SUCCESS) {
3002                     active_children = 1;
3003                     /* Having just one child is enough to stay around */
3004                     break;
3005                 }
3006             }
3007         } while (!retained->mpm->shutdown_pending && active_children &&
3008                  (!ap_graceful_shutdown_timeout || apr_time_now() < cutoff));
3009
3010         /* We might be here because we received SIGTERM, either
3011          * way, try and make sure that all of our processes are
3012          * really dead.
3013          */
3014         for (i = 0; i < num_buckets; i++) {
3015             ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
3016                                AP_MPM_PODX_RESTART);
3017         }
3018         ap_reclaim_child_processes(1, event_note_child_killed);
3019
3020         return DONE;
3021     }
3022
3023     /* we've been told to restart */
3024     if (one_process) {
3025         /* not worth thinking about */
3026         return DONE;
3027     }
3028
3029     /* advance to the next generation */
3030     /* XXX: we really need to make sure this new generation number isn't in
3031      * use by any of the children.
3032      */
3033     ++retained->mpm->my_generation;
3034     ap_scoreboard_image->global->running_generation = retained->mpm->my_generation;
3035
3036     if (!retained->mpm->is_ungraceful) {
3037         ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00493)
3038                      AP_SIG_GRACEFUL_STRING
3039                      " received.  Doing graceful restart");
3040         /* wake up the children...time to die.  But we'll have more soon */
3041         for (i = 0; i < num_buckets; i++) {
3042             ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
3043                                AP_MPM_PODX_GRACEFUL);
3044         }
3045
3046         /* This is mostly for debugging... so that we know what is still
3047          * gracefully dealing with existing request.
3048          */
3049
3050     }
3051     else {
3052         /* Kill 'em all.  Since the child acts the same on the parents SIGTERM
3053          * and a SIGHUP, we may as well use the same signal, because some user
3054          * pthreads are stealing signals from us left and right.
3055          */
3056         for (i = 0; i < num_buckets; i++) {
3057             ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
3058                                AP_MPM_PODX_RESTART);
3059         }
3060
3061         ap_reclaim_child_processes(1,  /* Start with SIGTERM */
3062                                    event_note_child_killed);
3063         ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00494)
3064                      "SIGHUP received.  Attempting to restart");
3065     }
3066
3067     active_daemons = 0;
3068
3069     return OK;
3070 }
3071
3072 static void setup_slave_conn(conn_rec *c, void *csd)
3073 {
3074     event_conn_state_t *mcs;
3075     event_conn_state_t *cs;
3076
3077     mcs = ap_get_module_config(c->master->conn_config, &mpm_event_module);
3078
3079     cs = apr_pcalloc(c->pool, sizeof(*cs));
3080     cs->c = c;
3081     cs->r = NULL;
3082     cs->sc = mcs->sc;
3083     cs->suspended = 0;
3084     cs->p = c->pool;
3085     cs->bucket_alloc = c->bucket_alloc;
3086     cs->pfd = mcs->pfd;
3087     cs->pub = mcs->pub;
3088     cs->pub.state = CONN_STATE_READ_REQUEST_LINE;
3089     cs->pub.sense = CONN_SENSE_DEFAULT;
3090
3091     c->cs = &(cs->pub);
3092     ap_set_module_config(c->conn_config, &mpm_event_module, cs);
3093 }
3094
3095 static int event_pre_connection(conn_rec *c, void *csd)
3096 {
3097     if (c->master && (!c->cs || c->cs == c->master->cs)) {
3098         setup_slave_conn(c, csd);
3099     }
3100     return OK;
3101 }
3102
3103 static int event_protocol_switch(conn_rec *c, request_rec *r, server_rec *s,
3104                                  const char *protocol)
3105 {
3106     if (!r && s) {
3107         /* connection based switching of protocol, set the correct server
3108          * configuration, so that timeouts, keepalives and such are used
3109          * for the server that the connection was switched on.
3110          * Normally, we set this on post_read_request, but on a protocol
3111          * other than http/1.1, this might never happen.
3112          */
3113         event_conn_state_t *cs;
3114
3115         cs = ap_get_module_config(c->conn_config, &mpm_event_module);
3116         cs->sc = ap_get_module_config(s->module_config, &mpm_event_module);
3117     }
3118     return DECLINED;
3119 }
3120
3121 /* This really should be a post_config hook, but the error log is already
3122  * redirected by that point, so we need to do this in the open_logs phase.
3123  */
3124 static int event_open_logs(apr_pool_t * p, apr_pool_t * plog,
3125                            apr_pool_t * ptemp, server_rec * s)
3126 {
3127     int startup = 0;
3128     int level_flags = 0;
3129     int num_buckets = 0;
3130     ap_listen_rec **listen_buckets;
3131     apr_status_t rv;
3132     int i;
3133
3134     pconf = p;
3135
3136     /* the reverse of pre_config, we want this only the first time around */
3137     if (retained->mpm->module_loads == 1) {
3138         startup = 1;
3139         level_flags |= APLOG_STARTUP;
3140     }
3141
3142     if ((num_listensocks = ap_setup_listeners(ap_server_conf)) < 1) {
3143         ap_log_error(APLOG_MARK, APLOG_ALERT | level_flags, 0,
3144                      (startup ? NULL : s),
3145                      "no listening sockets available, shutting down");
3146         return !OK;
3147     }
3148
3149     if (one_process) {
3150         num_buckets = 1;
3151     }
3152     else if (retained->mpm->was_graceful) {
3153         /* Preserve the number of buckets on graceful restarts. */
3154         num_buckets = retained->mpm->num_buckets;
3155     }
3156     if ((rv = ap_duplicate_listeners(pconf, ap_server_conf,
3157                                      &listen_buckets, &num_buckets))) {
3158         ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
3159                      (startup ? NULL : s),
3160                      "could not duplicate listeners");
3161         return !OK;
3162     }
3163
3164     all_buckets = apr_pcalloc(pconf, num_buckets * sizeof(*all_buckets));
3165     for (i = 0; i < num_buckets; i++) {
3166         if (!one_process && /* no POD in one_process mode */
3167                 (rv = ap_mpm_podx_open(pconf, &all_buckets[i].pod))) {
3168             ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
3169                          (startup ? NULL : s),
3170                          "could not open pipe-of-death");
3171             return !OK;
3172         }
3173         all_buckets[i].listeners = listen_buckets[i];
3174     }
3175
3176     if (retained->mpm->max_buckets < num_buckets) {
3177         int new_max, *new_ptr;
3178         new_max = retained->mpm->max_buckets * 2;
3179         if (new_max < num_buckets) {
3180             new_max = num_buckets;
3181         }
3182         new_ptr = (int *)apr_palloc(ap_pglobal, new_max * sizeof(int));
3183         memcpy(new_ptr, retained->idle_spawn_rate,
3184                retained->mpm->num_buckets * sizeof(int));
3185         retained->idle_spawn_rate = new_ptr;
3186         retained->mpm->max_buckets = new_max;
3187     }
3188     if (retained->mpm->num_buckets < num_buckets) {
3189         int rate_max = 1;
3190         /* If new buckets are added, set their idle spawn rate to
3191          * the highest so far, so that they get filled as quickly
3192          * as the existing ones.
3193          */
3194         for (i = 0; i < retained->mpm->num_buckets; i++) {
3195             if (rate_max < retained->idle_spawn_rate[i]) {
3196                 rate_max = retained->idle_spawn_rate[i];
3197             }
3198         }
3199         for (/* up to date i */; i < num_buckets; i++) {
3200             retained->idle_spawn_rate[i] = rate_max;
3201         }
3202     }
3203     retained->mpm->num_buckets = num_buckets;
3204
3205     /* for skiplist */
3206     srand((unsigned int)apr_time_now());
3207     return OK;
3208 }
3209
3210 static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
3211                             apr_pool_t * ptemp)
3212 {
3213     int no_detach, debug, foreground;
3214     apr_status_t rv;
3215     const char *userdata_key = "mpm_event_module";
3216     int test_atomics = 0;
3217
3218     debug = ap_exists_config_define("DEBUG");
3219
3220     if (debug) {
3221         foreground = one_process = 1;
3222         no_detach = 0;
3223     }
3224     else {
3225         one_process = ap_exists_config_define("ONE_PROCESS");
3226         no_detach = ap_exists_config_define("NO_DETACH");
3227         foreground = ap_exists_config_define("FOREGROUND");
3228     }
3229
3230     retained = ap_retained_data_get(userdata_key);
3231     if (!retained) {
3232         retained = ap_retained_data_create(userdata_key, sizeof(*retained));
3233         retained->mpm = ap_unixd_mpm_get_retained_data();
3234         retained->max_daemons_limit = -1;
3235         if (retained->mpm->module_loads) {
3236             test_atomics = 1;
3237         }
3238     }
3239     retained->mpm->mpm_state = AP_MPMQ_STARTING;
3240     if (retained->mpm->baton != retained) {
3241         retained->mpm->was_graceful = 0;
3242         retained->mpm->baton = retained;
3243     }
3244     ++retained->mpm->module_loads;
3245
3246     /* test once for correct operation of fdqueue */
3247     if (test_atomics || retained->mpm->module_loads == 2) {
3248         static apr_uint32_t foo1, foo2;
3249
3250         apr_atomic_set32(&foo1, 100);
3251         foo2 = apr_atomic_add32(&foo1, -10);
3252         if (foo2 != 100 || foo1 != 90) {
3253             ap_log_error(APLOG_MARK, APLOG_CRIT, 0, NULL, APLOGNO(02405)
3254                          "atomics not working as expected - add32 of negative number");
3255             return HTTP_INTERNAL_SERVER_ERROR;
3256         }
3257     }
3258
3259     /* sigh, want this only the second time around */
3260     if (retained->mpm->module_loads == 2) {
3261         rv = apr_pollset_create(&event_pollset, 1, plog,
3262                                 APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY);
3263         if (rv != APR_SUCCESS) {
3264             ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL, APLOGNO(00495)
3265                          "Couldn't create a Thread Safe Pollset. "
3266                          "Is it supported on your platform?"
3267                          "Also check system or user limits!");
3268             return HTTP_INTERNAL_SERVER_ERROR;
3269         }
3270         apr_pollset_destroy(event_pollset);
3271
3272         if (!one_process && !foreground) {
3273             /* before we detach, setup crash handlers to log to errorlog */
3274             ap_fatal_signal_setup(ap_server_conf, pconf);
3275             rv = apr_proc_detach(no_detach ? APR_PROC_DETACH_FOREGROUND
3276                                  : APR_PROC_DETACH_DAEMONIZE);
3277             if (rv != APR_SUCCESS) {
3278                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL, APLOGNO(00496)
3279                              "apr_proc_detach failed");
3280                 return HTTP_INTERNAL_SERVER_ERROR;
3281             }
3282         }
3283     }
3284
3285     parent_pid = ap_my_pid = getpid();
3286
3287     ap_listen_pre_config();
3288     ap_daemons_to_start = DEFAULT_START_DAEMON;
3289     min_spare_threads = DEFAULT_MIN_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD;
3290     max_spare_threads = DEFAULT_MAX_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD;
3291     server_limit = DEFAULT_SERVER_LIMIT;
3292     thread_limit = DEFAULT_THREAD_LIMIT;
3293     active_daemons_limit = server_limit;
3294     threads_per_child = DEFAULT_THREADS_PER_CHILD;
3295     max_workers = active_daemons_limit * threads_per_child;
3296     defer_linger_chain = NULL;
3297     had_healthy_child = 0;
3298     ap_extended_status = 0;
3299
3300     return OK;
3301 }
3302
3303 static int event_post_config(apr_pool_t *pconf, apr_pool_t *plog,
3304                              apr_pool_t *ptemp, server_rec *s)
3305 {
3306     struct {
3307         struct timeout_queue *tail, *q;
3308         apr_hash_t *hash;
3309     } wc, ka;
3310
3311     /* Not needed in pre_config stage */
3312     if (ap_state_query(AP_SQ_MAIN_STATE) == AP_SQ_MS_CREATE_PRE_CONFIG) {
3313         return OK;
3314     }
3315
3316     wc.tail = ka.tail = NULL;
3317     wc.hash = apr_hash_make(ptemp);
3318     ka.hash = apr_hash_make(ptemp);
3319
3320     linger_q = TO_QUEUE_MAKE(pconf, apr_time_from_sec(MAX_SECS_TO_LINGER),
3321                              NULL);
3322     short_linger_q = TO_QUEUE_MAKE(pconf, apr_time_from_sec(SECONDS_TO_LINGER),
3323                                    NULL);
3324
3325     for (; s; s = s->next) {
3326         event_srv_cfg *sc = apr_pcalloc(pconf, sizeof *sc);
3327
3328         ap_set_module_config(s->module_config, &mpm_event_module, sc);
3329         if (!wc.tail) {
3330             /* The main server uses the global queues */
3331             wc.q = TO_QUEUE_MAKE(pconf, s->timeout, NULL);
3332             apr_hash_set(wc.hash, &s->timeout, sizeof s->timeout, wc.q);
3333             wc.tail = write_completion_q = wc.q;
3334
3335             ka.q = TO_QUEUE_MAKE(pconf, s->keep_alive_timeout, NULL);
3336             apr_hash_set(ka.hash, &s->keep_alive_timeout,
3337                          sizeof s->keep_alive_timeout, ka.q);
3338             ka.tail = keepalive_q = ka.q;
3339         }
3340         else {
3341             /* The vhosts use any existing queue with the same timeout,
3342              * or their own queue(s) if there isn't */
3343             wc.q = apr_hash_get(wc.hash, &s->timeout, sizeof s->timeout);
3344             if (!wc.q) {
3345                 wc.q = TO_QUEUE_MAKE(pconf, s->timeout, wc.tail);
3346                 apr_hash_set(wc.hash, &s->timeout, sizeof s->timeout, wc.q);
3347                 wc.tail = wc.tail->next = wc.q;
3348             }
3349
3350             ka.q = apr_hash_get(ka.hash, &s->keep_alive_timeout,
3351                                 sizeof s->keep_alive_timeout);
3352             if (!ka.q) {
3353                 ka.q = TO_QUEUE_MAKE(pconf, s->keep_alive_timeout, ka.tail);
3354                 apr_hash_set(ka.hash, &s->keep_alive_timeout,
3355                              sizeof s->keep_alive_timeout, ka.q);
3356                 ka.tail = ka.tail->next = ka.q;
3357             }
3358         }
3359         sc->wc_q = wc.q;
3360         sc->ka_q = ka.q;
3361     }
3362
3363     return OK;
3364 }
3365
3366 static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
3367                               apr_pool_t *ptemp, server_rec *s)
3368 {
3369     int startup = 0;
3370
3371     /* the reverse of pre_config, we want this only the first time around */
3372     if (retained->mpm->module_loads == 1) {
3373         startup = 1;
3374     }
3375
3376     if (server_limit > MAX_SERVER_LIMIT) {
3377         if (startup) {
3378             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00497)
3379                          "WARNING: ServerLimit of %d exceeds compile-time "
3380                          "limit of %d servers, decreasing to %d.",
3381                          server_limit, MAX_SERVER_LIMIT, MAX_SERVER_LIMIT);
3382         } else {
3383             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00498)
3384                          "ServerLimit of %d exceeds compile-time limit "
3385                          "of %d, decreasing to match",
3386                          server_limit, MAX_SERVER_LIMIT);
3387         }
3388         server_limit = MAX_SERVER_LIMIT;
3389     }
3390     else if (server_limit < 1) {
3391         if (startup) {
3392             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00499)
3393                          "WARNING: ServerLimit of %d not allowed, "
3394                          "increasing to 1.", server_limit);
3395         } else {
3396             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00500)
3397                          "ServerLimit of %d not allowed, increasing to 1",
3398                          server_limit);
3399         }
3400         server_limit = 1;
3401     }
3402
3403     /* you cannot change ServerLimit across a restart; ignore
3404      * any such attempts
3405      */
3406     if (!retained->first_server_limit) {
3407         retained->first_server_limit = server_limit;
3408     }
3409     else if (server_limit != retained->first_server_limit) {
3410         /* don't need a startup console version here */
3411         ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00501)
3412                      "changing ServerLimit to %d from original value of %d "
3413                      "not allowed during restart",
3414                      server_limit, retained->first_server_limit);
3415         server_limit = retained->first_server_limit;
3416     }
3417
3418     if (thread_limit > MAX_THREAD_LIMIT) {
3419         if (startup) {
3420             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00502)
3421                          "WARNING: ThreadLimit of %d exceeds compile-time "
3422                          "limit of %d threads, decreasing to %d.",
3423                          thread_limit, MAX_THREAD_LIMIT, MAX_THREAD_LIMIT);
3424         } else {
3425             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00503)
3426                          "ThreadLimit of %d exceeds compile-time limit "
3427                          "of %d, decreasing to match",
3428                          thread_limit, MAX_THREAD_LIMIT);
3429         }
3430         thread_limit = MAX_THREAD_LIMIT;
3431     }
3432     else if (thread_limit < 1) {
3433         if (startup) {
3434             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00504)
3435                          "WARNING: ThreadLimit of %d not allowed, "
3436                          "increasing to 1.", thread_limit);
3437         } else {
3438             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00505)
3439                          "ThreadLimit of %d not allowed, increasing to 1",
3440                          thread_limit);
3441         }
3442         thread_limit = 1;
3443     }
3444
3445     /* you cannot change ThreadLimit across a restart; ignore
3446      * any such attempts
3447      */
3448     if (!retained->first_thread_limit) {
3449         retained->first_thread_limit = thread_limit;
3450     }
3451     else if (thread_limit != retained->first_thread_limit) {
3452         /* don't need a startup console version here */
3453         ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00506)
3454                      "changing ThreadLimit to %d from original value of %d "
3455                      "not allowed during restart",
3456                      thread_limit, retained->first_thread_limit);
3457         thread_limit = retained->first_thread_limit;
3458     }
3459
3460     if (threads_per_child > thread_limit) {
3461         if (startup) {
3462             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00507)
3463                          "WARNING: ThreadsPerChild of %d exceeds ThreadLimit "
3464                          "of %d threads, decreasing to %d. "
3465                          "To increase, please see the ThreadLimit directive.",
3466                          threads_per_child, thread_limit, thread_limit);
3467         } else {
3468             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00508)
3469                          "ThreadsPerChild of %d exceeds ThreadLimit "
3470                          "of %d, decreasing to match",
3471                          threads_per_child, thread_limit);
3472         }
3473         threads_per_child = thread_limit;
3474     }
3475     else if (threads_per_child < 1) {
3476         if (startup) {
3477             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00509)
3478                          "WARNING: ThreadsPerChild of %d not allowed, "
3479                          "increasing to 1.", threads_per_child);
3480         } else {
3481             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00510)
3482                          "ThreadsPerChild of %d not allowed, increasing to 1",
3483                          threads_per_child);
3484         }
3485         threads_per_child = 1;
3486     }
3487
3488     if (max_workers < threads_per_child) {
3489         if (startup) {
3490             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00511)
3491                          "WARNING: MaxRequestWorkers of %d is less than "
3492                          "ThreadsPerChild of %d, increasing to %d. "
3493                          "MaxRequestWorkers must be at least as large "
3494                          "as the number of threads in a single server.",
3495                          max_workers, threads_per_child, threads_per_child);
3496         } else {
3497             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00512)
3498                          "MaxRequestWorkers of %d is less than ThreadsPerChild "
3499                          "of %d, increasing to match",
3500                          max_workers, threads_per_child);
3501         }
3502         max_workers = threads_per_child;
3503     }
3504
3505     active_daemons_limit = max_workers / threads_per_child;
3506
3507     if (max_workers % threads_per_child) {
3508         int tmp_max_workers = active_daemons_limit * threads_per_child;
3509
3510         if (startup) {
3511             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00513)
3512                          "WARNING: MaxRequestWorkers of %d is not an integer "
3513                          "multiple of ThreadsPerChild of %d, decreasing to nearest "
3514                          "multiple %d, for a maximum of %d servers.",
3515                          max_workers, threads_per_child, tmp_max_workers,
3516                          active_daemons_limit);
3517         } else {
3518             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00514)
3519                          "MaxRequestWorkers of %d is not an integer multiple "
3520                          "of ThreadsPerChild of %d, decreasing to nearest "
3521                          "multiple %d", max_workers, threads_per_child,
3522                          tmp_max_workers);
3523         }
3524         max_workers = tmp_max_workers;
3525     }
3526
3527     if (active_daemons_limit > server_limit) {
3528         if (startup) {
3529             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00515)
3530                          "WARNING: MaxRequestWorkers of %d would require %d servers "
3531                          "and would exceed ServerLimit of %d, decreasing to %d. "
3532                          "To increase, please see the ServerLimit directive.",
3533                          max_workers, active_daemons_limit, server_limit,
3534                          server_limit * threads_per_child);
3535         } else {
3536             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00516)
3537                          "MaxRequestWorkers of %d would require %d servers and "
3538                          "exceed ServerLimit of %d, decreasing to %d",
3539                          max_workers, active_daemons_limit, server_limit,
3540                          server_limit * threads_per_child);
3541         }
3542         active_daemons_limit = server_limit;
3543     }
3544
3545     /* ap_daemons_to_start > active_daemons_limit checked in ap_mpm_run() */
3546     if (ap_daemons_to_start < 1) {
3547         if (startup) {
3548             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00517)
3549                          "WARNING: StartServers of %d not allowed, "
3550                          "increasing to 1.", ap_daemons_to_start);
3551         } else {
3552             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00518)
3553                          "StartServers of %d not allowed, increasing to 1",
3554                          ap_daemons_to_start);
3555         }
3556         ap_daemons_to_start = 1;
3557     }
3558
3559     if (min_spare_threads < 1) {
3560         if (startup) {
3561             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00519)
3562                          "WARNING: MinSpareThreads of %d not allowed, "
3563                          "increasing to 1 to avoid almost certain server "
3564                          "failure. Please read the documentation.",
3565                          min_spare_threads);
3566         } else {
3567             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00520)
3568                          "MinSpareThreads of %d not allowed, increasing to 1",
3569                          min_spare_threads);
3570         }
3571         min_spare_threads = 1;
3572     }
3573
3574     /* max_spare_threads < min_spare_threads + threads_per_child
3575      * checked in ap_mpm_run()
3576      */
3577
3578     return OK;
3579 }
3580
3581 static void event_hooks(apr_pool_t * p)
3582 {
3583     /* Our open_logs hook function must run before the core's, or stderr
3584      * will be redirected to a file, and the messages won't print to the
3585      * console.
3586      */
3587     static const char *const aszSucc[] = { "core.c", NULL };
3588     one_process = 0;
3589
3590     ap_hook_open_logs(event_open_logs, NULL, aszSucc, APR_HOOK_REALLY_FIRST);
3591     /* we need to set the MPM state before other pre-config hooks use MPM query
3592      * to retrieve it, so register as REALLY_FIRST
3593      */
3594     ap_hook_pre_config(event_pre_config, NULL, NULL, APR_HOOK_REALLY_FIRST);
3595     ap_hook_post_config(event_post_config, NULL, NULL, APR_HOOK_MIDDLE);
3596     ap_hook_check_config(event_check_config, NULL, NULL, APR_HOOK_MIDDLE);
3597     ap_hook_mpm(event_run, NULL, NULL, APR_HOOK_MIDDLE);
3598     ap_hook_mpm_query(event_query, NULL, NULL, APR_HOOK_MIDDLE);
3599     ap_hook_mpm_register_timed_callback(event_register_timed_callback, NULL, NULL,
3600                                         APR_HOOK_MIDDLE);
3601     ap_hook_pre_read_request(event_pre_read_request, NULL, NULL, APR_HOOK_MIDDLE);
3602     ap_hook_post_read_request(event_post_read_request, NULL, NULL, APR_HOOK_MIDDLE);
3603     ap_hook_mpm_get_name(event_get_name, NULL, NULL, APR_HOOK_MIDDLE);
3604
3605     ap_hook_pre_connection(event_pre_connection, NULL, NULL, APR_HOOK_REALLY_FIRST);
3606     ap_hook_protocol_switch(event_protocol_switch, NULL, NULL, APR_HOOK_REALLY_FIRST);
3607 }
3608
3609 static const char *set_daemons_to_start(cmd_parms *cmd, void *dummy,
3610                                         const char *arg)
3611 {
3612     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3613     if (err != NULL) {
3614         return err;
3615     }
3616
3617     ap_daemons_to_start = atoi(arg);
3618     return NULL;
3619 }
3620
3621 static const char *set_min_spare_threads(cmd_parms * cmd, void *dummy,
3622                                          const char *arg)
3623 {
3624     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3625     if (err != NULL) {
3626         return err;
3627     }
3628
3629     min_spare_threads = atoi(arg);
3630     return NULL;
3631 }
3632
3633 static const char *set_max_spare_threads(cmd_parms * cmd, void *dummy,
3634                                          const char *arg)
3635 {
3636     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3637     if (err != NULL) {
3638         return err;
3639     }
3640
3641     max_spare_threads = atoi(arg);
3642     return NULL;
3643 }
3644
3645 static const char *set_max_workers(cmd_parms * cmd, void *dummy,
3646                                    const char *arg)
3647 {
3648     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3649     if (err != NULL) {
3650         return err;
3651     }
3652     if (!strcasecmp(cmd->cmd->name, "MaxClients")) {
3653         ap_log_error(APLOG_MARK, APLOG_INFO, 0, NULL, APLOGNO(00521)
3654                      "MaxClients is deprecated, use MaxRequestWorkers "
3655                      "instead.");
3656     }
3657     max_workers = atoi(arg);
3658     return NULL;
3659 }
3660
3661 static const char *set_threads_per_child(cmd_parms * cmd, void *dummy,
3662                                          const char *arg)
3663 {
3664     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3665     if (err != NULL) {
3666         return err;
3667     }
3668
3669     threads_per_child = atoi(arg);
3670     return NULL;
3671 }
3672 static const char *set_server_limit (cmd_parms *cmd, void *dummy, const char *arg)
3673 {
3674     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3675     if (err != NULL) {
3676         return err;
3677     }
3678
3679     server_limit = atoi(arg);
3680     return NULL;
3681 }
3682
3683 static const char *set_thread_limit(cmd_parms * cmd, void *dummy,
3684                                     const char *arg)
3685 {
3686     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3687     if (err != NULL) {
3688         return err;
3689     }
3690
3691     thread_limit = atoi(arg);
3692     return NULL;
3693 }
3694
3695 static const char *set_worker_factor(cmd_parms * cmd, void *dummy,
3696                                      const char *arg)
3697 {
3698     double val;
3699     char *endptr;
3700     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3701     if (err != NULL) {
3702         return err;
3703     }
3704
3705     val = strtod(arg, &endptr);
3706     if (*endptr)
3707         return "error parsing value";
3708
3709     if (val <= 0)
3710         return "AsyncRequestWorkerFactor argument must be a positive number";
3711
3712     worker_factor = val * WORKER_FACTOR_SCALE;
3713     if (worker_factor == 0)
3714         worker_factor = 1;
3715     return NULL;
3716 }
3717
3718
3719 static const command_rec event_cmds[] = {
3720     LISTEN_COMMANDS,
3721     AP_INIT_TAKE1("StartServers", set_daemons_to_start, NULL, RSRC_CONF,
3722                   "Number of child processes launched at server startup"),
3723     AP_INIT_TAKE1("ServerLimit", set_server_limit, NULL, RSRC_CONF,
3724                   "Maximum number of child processes for this run of Apache"),
3725     AP_INIT_TAKE1("MinSpareThreads", set_min_spare_threads, NULL, RSRC_CONF,
3726                   "Minimum number of idle threads, to handle request spikes"),
3727     AP_INIT_TAKE1("MaxSpareThreads", set_max_spare_threads, NULL, RSRC_CONF,
3728                   "Maximum number of idle threads"),
3729     AP_INIT_TAKE1("MaxClients", set_max_workers, NULL, RSRC_CONF,
3730                   "Deprecated name of MaxRequestWorkers"),
3731     AP_INIT_TAKE1("MaxRequestWorkers", set_max_workers, NULL, RSRC_CONF,
3732                   "Maximum number of threads alive at the same time"),
3733     AP_INIT_TAKE1("ThreadsPerChild", set_threads_per_child, NULL, RSRC_CONF,
3734                   "Number of threads each child creates"),
3735     AP_INIT_TAKE1("ThreadLimit", set_thread_limit, NULL, RSRC_CONF,
3736                   "Maximum number of worker threads per child process for this "
3737                   "run of Apache - Upper limit for ThreadsPerChild"),
3738     AP_INIT_TAKE1("AsyncRequestWorkerFactor", set_worker_factor, NULL, RSRC_CONF,
3739                   "How many additional connects will be accepted per idle "
3740                   "worker thread"),
3741     AP_GRACEFUL_SHUTDOWN_TIMEOUT_COMMAND,
3742     {NULL}
3743 };
3744
3745 AP_DECLARE_MODULE(mpm_event) = {
3746     MPM20_MODULE_STUFF,
3747     NULL,                       /* hook to run before apache parses args */
3748     NULL,                       /* create per-directory config structure */
3749     NULL,                       /* merge per-directory config structures */
3750     NULL,                       /* create per-server config structure */
3751     NULL,                       /* merge per-server config structures */
3752     event_cmds,                 /* command apr_table_t */
3753     event_hooks                 /* register_hooks */
3754 };