granicus.if.org Git - apache/blob - server/mpm/event/event.c

   1 /* Licensed to the Apache Software Foundation (ASF) under one or more
   2  * contributor license agreements.  See the NOTICE file distributed with
   3  * this work for additional information regarding copyright ownership.
   4  * The ASF licenses this file to You under the Apache License, Version 2.0
   5  * (the "License"); you may not use this file except in compliance with
   6  * the License.  You may obtain a copy of the License at
   7  *
   8  *     http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /**
  18  * This MPM tries to fix the 'keep alive problem' in HTTP.
  19  *
  20  * After a client completes the first request, the client can keep the
  21  * connection open to send more requests with the same socket.  This can save
  22  * significant overhead in creating TCP connections.  However, the major
  23  * disadvantage is that Apache traditionally keeps an entire child
  24  * process/thread waiting for data from the client.  To solve this problem,
  25  * this MPM has a dedicated thread for handling both the Listening sockets,
  26  * and all sockets that are in a Keep Alive status.
  27  *
  28  * The MPM assumes the underlying apr_pollset implementation is somewhat
  29  * threadsafe.  This currently is only compatible with KQueue and EPoll.  This
  30  * enables the MPM to avoid extra high level locking or having to wake up the
  31  * listener thread when a keep-alive socket needs to be sent to it.
  32  *
  33  * This MPM does not perform well on older platforms that do not have very good
  34  * threading, like Linux with a 2.4 kernel, but this does not matter, since we
  35  * require EPoll or KQueue.
  36  *
  37  * For FreeBSD, use 5.3.  It is possible to run this MPM on FreeBSD 5.2.1, if
  38  * you use libkse (see `man libmap.conf`).
  39  *
  40  * For NetBSD, use at least 2.0.
  41  *
  42  * For Linux, you should use a 2.6 kernel, and make sure your glibc has epoll
  43  * support compiled in.
  44  *
  45  */
  46
  47 #include "apr.h"
  48 #include "apr_portable.h"
  49 #include "apr_strings.h"
  50 #include "apr_file_io.h"
  51 #include "apr_thread_proc.h"
  52 #include "apr_signal.h"
  53 #include "apr_thread_mutex.h"
  54 #include "apr_poll.h"
  55 #include "apr_ring.h"
  56 #include "apr_queue.h"
  57 #include "apr_atomic.h"
  58 #define APR_WANT_STRFUNC
  59 #include "apr_want.h"
  60 #include "apr_version.h"
  61
  62 #include <stdlib.h>
  63
  64 #if APR_HAVE_UNISTD_H
  65 #include <unistd.h>
  66 #endif
  67 #if APR_HAVE_SYS_SOCKET_H
  68 #include <sys/socket.h>
  69 #endif
  70 #if APR_HAVE_SYS_WAIT_H
  71 #include <sys/wait.h>
  72 #endif
  73 #ifdef HAVE_SYS_PROCESSOR_H
  74 #include <sys/processor.h>      /* for bindprocessor() */
  75 #endif
  76
  77 #if !APR_HAS_THREADS
  78 #error The Event MPM requires APR threads, but they are unavailable.
  79 #endif
  80
  81 #include "ap_config.h"
  82 #include "httpd.h"
  83 #include "http_main.h"
  84 #include "http_log.h"
  85 #include "http_config.h"        /* for read_config */
  86 #include "http_core.h"          /* for get_remote_host */
  87 #include "http_connection.h"
  88 #include "http_protocol.h"
  89 #include "ap_mpm.h"
  90 #include "mpm_common.h"
  91 #include "ap_listen.h"
  92 #include "scoreboard.h"
  93 #include "fdqueue.h"
  94 #include "mpm_default.h"
  95 #include "http_vhost.h"
  96 #include "unixd.h"
  97 #include "apr_skiplist.h"
  98
  99 #include <signal.h>
 100 #include <limits.h>             /* for INT_MAX */
 101
 102
 103 /* Limit on the total --- clients will be locked out if more servers than
 104  * this are needed.  It is intended solely to keep the server from crashing
 105  * when things get out of hand.
 106  *
 107  * We keep a hard maximum number of servers, for two reasons --- first off,
 108  * in case something goes seriously wrong, we want to stop the fork bomb
 109  * short of actually crashing the machine we're running on by filling some
 110  * kernel table.  Secondly, it keeps the size of the scoreboard file small
 111  * enough that we can read the whole thing without worrying too much about
 112  * the overhead.
 113  */
 114 #ifndef DEFAULT_SERVER_LIMIT
 115 #define DEFAULT_SERVER_LIMIT 16
 116 #endif
 117
 118 /* Admin can't tune ServerLimit beyond MAX_SERVER_LIMIT.  We want
 119  * some sort of compile-time limit to help catch typos.
 120  */
 121 #ifndef MAX_SERVER_LIMIT
 122 #define MAX_SERVER_LIMIT 20000
 123 #endif
 124
 125 /* Limit on the threads per process.  Clients will be locked out if more than
 126  * this are needed.
 127  *
 128  * We keep this for one reason it keeps the size of the scoreboard file small
 129  * enough that we can read the whole thing without worrying too much about
 130  * the overhead.
 131  */
 132 #ifndef DEFAULT_THREAD_LIMIT
 133 #define DEFAULT_THREAD_LIMIT 64
 134 #endif
 135
 136 /* Admin can't tune ThreadLimit beyond MAX_THREAD_LIMIT.  We want
 137  * some sort of compile-time limit to help catch typos.
 138  */
 139 #ifndef MAX_THREAD_LIMIT
 140 #define MAX_THREAD_LIMIT 100000
 141 #endif
 142
 143 #define MPM_CHILD_PID(i) (ap_scoreboard_image->parent[i].pid)
 144
 145 #if !APR_VERSION_AT_LEAST(1,4,0)
 146 #define apr_time_from_msec(x) (x * 1000)
 147 #endif
 148
 149 #ifndef MAX_SECS_TO_LINGER
 150 #define MAX_SECS_TO_LINGER 30
 151 #endif
 152 #define SECONDS_TO_LINGER  2
 153
 154 /*
 155  * Actual definitions of config globals
 156  */
 157
 158 #ifndef DEFAULT_WORKER_FACTOR
 159 #define DEFAULT_WORKER_FACTOR 2
 160 #endif
 161 #define WORKER_FACTOR_SCALE   16  /* scale factor to allow fractional values */
 162 static unsigned int worker_factor = DEFAULT_WORKER_FACTOR * WORKER_FACTOR_SCALE;
 163
 164 static int threads_per_child = 0;   /* Worker threads per child */
 165 static int ap_daemons_to_start = 0;
 166 static int min_spare_threads = 0;
 167 static int max_spare_threads = 0;
 168 static int ap_daemons_limit = 0;
 169 static int max_workers = 0;
 170 static int server_limit = 0;
 171 static int thread_limit = 0;
 172 static int had_healthy_child = 0;
 173 static int dying = 0;
 174 static int workers_may_exit = 0;
 175 static int start_thread_may_exit = 0;
 176 static int listener_may_exit = 0;
 177 static int num_listensocks = 0;
 178 static apr_int32_t conns_this_child;        /* MaxConnectionsPerChild, only access
 179                                                in listener thread */
 180 static apr_uint32_t connection_count = 0;   /* Number of open connections */
 181 static apr_uint32_t lingering_count = 0;    /* Number of connections in lingering close */
 182 static apr_uint32_t suspended_count = 0;    /* Number of suspended connections */
 183 static apr_uint32_t clogged_count = 0;      /* Number of threads processing ssl conns */
 184 static int resource_shortage = 0;
 185 static fd_queue_t *worker_queue;
 186 static fd_queue_info_t *worker_queue_info;
 187 static int mpm_state = AP_MPMQ_STARTING;
 188
 189 static apr_thread_mutex_t *timeout_mutex;
 190
 191 module AP_MODULE_DECLARE_DATA mpm_event_module;
 192
 193 /* forward declare */
 194 struct event_srv_cfg_s;
 195 typedef struct event_srv_cfg_s event_srv_cfg;
 196
 197 struct event_conn_state_t {
 198     /** APR_RING of expiration timeouts */
 199     APR_RING_ENTRY(event_conn_state_t) timeout_list;
 200     /** the time when the entry was queued */
 201     apr_time_t queue_timestamp;
 202     /** connection record this struct refers to */
 203     conn_rec *c;
 204     /** request record (if any) this struct refers to */
 205     request_rec *r;
 206     /** server config this struct refers to */
 207     event_srv_cfg *sc;
 208     /** is the current conn_rec suspended?  (disassociated with
 209      * a particular MPM thread; for suspend_/resume_connection
 210      * hooks)
 211      */
 212     int suspended;
 213     /** memory pool to allocate from */
 214     apr_pool_t *p;
 215     /** bucket allocator */
 216     apr_bucket_alloc_t *bucket_alloc;
 217     /** poll file descriptor information */
 218     apr_pollfd_t pfd;
 219     /** public parts of the connection state */
 220     conn_state_t pub;
 221 };
 222 APR_RING_HEAD(timeout_head_t, event_conn_state_t);
 223
 224 struct timeout_queue {
 225     struct timeout_head_t head;
 226     int count, *total;
 227     apr_interval_time_t timeout;
 228     struct timeout_queue *next;
 229 };
 230 /*
 231  * Several timeout queues that use different timeouts, so that we always can
 232  * simply append to the end.
 233  *   write_completion_q uses vhost's TimeOut
 234  *   keepalive_q        uses vhost's KeepAliveTimeOut
 235  *   linger_q           uses MAX_SECS_TO_LINGER
 236  *   short_linger_q     uses SECONDS_TO_LINGER
 237  */
 238 static struct timeout_queue *write_completion_q,
 239                             *keepalive_q,
 240                             *linger_q,
 241                             *short_linger_q;
 242
 243 static apr_pollfd_t *listener_pollfd;
 244
 245 /*
 246  * Macros for accessing struct timeout_queue.
 247  * For TO_QUEUE_APPEND and TO_QUEUE_REMOVE, timeout_mutex must be held.
 248  */
 249 #define TO_QUEUE_APPEND(q, el)                                                \
 250     do {                                                                      \
 251         APR_RING_INSERT_TAIL(&(q)->head, el, event_conn_state_t,              \
 252                              timeout_list);                                   \
 253         ++*(q)->total;                                                        \
 254         ++(q)->count;                                                         \
 255     } while (0)
 256
 257 #define TO_QUEUE_REMOVE(q, el)                                                \
 258     do {                                                                      \
 259         APR_RING_REMOVE(el, timeout_list);                                    \
 260         --*(q)->total;                                                        \
 261         --(q)->count;                                                         \
 262     } while (0)
 263
 264 #define TO_QUEUE_INIT(q, p, t, v)                                             \
 265     do {                                                                      \
 266         struct timeout_queue *b = (v);                                        \
 267         (q) = apr_palloc((p), sizeof *(q));                                   \
 268         APR_RING_INIT(&(q)->head, event_conn_state_t, timeout_list);          \
 269         (q)->total = (b) ? (b)->total : apr_pcalloc((p), sizeof *(q)->total); \
 270         (q)->count = 0;                                                       \
 271         (q)->timeout = (t);                                                   \
 272         (q)->next = NULL;                                                     \
 273     } while (0)
 274
 275 #define TO_QUEUE_ELEM_INIT(el) APR_RING_ELEM_INIT(el, timeout_list)
 276
 277 /*
 278  * The pollset for sockets that are in any of the timeout queues. Currently
 279  * we use the timeout_mutex to make sure that connections are added/removed
 280  * atomically to/from both event_pollset and a timeout queue. Otherwise
 281  * some confusion can happen under high load if timeout queues and pollset
 282  * get out of sync.
 283  * XXX: It should be possible to make the lock unnecessary in many or even all
 284  * XXX: cases.
 285  */
 286 static apr_pollset_t *event_pollset;
 287
 288 /* The structure used to pass unique initialization info to each thread */
 289 typedef struct
 290 {
 291     int pid;
 292     int tid;
 293     int sd;
 294 } proc_info;
 295
 296 /* Structure used to pass information to the thread responsible for
 297  * creating the rest of the threads.
 298  */
 299 typedef struct
 300 {
 301     apr_thread_t **threads;
 302     apr_thread_t *listener;
 303     int child_num_arg;
 304     apr_threadattr_t *threadattr;
 305 } thread_starter;
 306
 307 typedef enum
 308 {
 309     PT_CSD,
 310     PT_ACCEPT
 311 } poll_type_e;
 312
 313 typedef struct
 314 {
 315     poll_type_e type;
 316     void *baton;
 317 } listener_poll_type;
 318
 319 /* data retained by event across load/unload of the module
 320  * allocated on first call to pre-config hook; located on
 321  * subsequent calls to pre-config hook
 322  */
 323 typedef struct event_retained_data {
 324     int first_server_limit;
 325     int first_thread_limit;
 326     int module_loads;
 327     int sick_child_detected;
 328     ap_generation_t my_generation;
 329     int volatile is_graceful; /* set from signal handler */
 330     int maxclients_reported;
 331     /*
 332      * The max child slot ever assigned, preserved across restarts.  Necessary
 333      * to deal with MaxRequestWorkers changes across AP_SIG_GRACEFUL restarts.
 334      * We use this value to optimize routines that have to scan the entire
 335      * scoreboard.
 336      */
 337     int max_daemons_limit;
 338     /*
 339      * idle_spawn_rate is the number of children that will be spawned on the
 340      * next maintenance cycle if there aren't enough idle servers.  It is
 341      * maintained per listeners bucket, doubled up to MAX_SPAWN_RATE, and
 342      * reset only when a cycle goes by without the need to spawn.
 343      */
 344     int *idle_spawn_rate;
 345 #ifndef MAX_SPAWN_RATE
 346 #define MAX_SPAWN_RATE        (32)
 347 #endif
 348     int hold_off_on_exponential_spawning;
 349     /*
 350      * Current number of listeners buckets and maximum reached accross
 351      * restarts (to size retained data according to dynamic num_buckets,
 352      * eg. idle_spawn_rate).
 353      */
 354     int num_buckets, max_buckets;
 355 } event_retained_data;
 356 static event_retained_data *retained;
 357
 358 typedef struct event_child_bucket {
 359     ap_pod_t *pod;
 360     ap_listen_rec *listeners;
 361 } event_child_bucket;
 362 static event_child_bucket *all_buckets, /* All listeners buckets */
 363                           *my_bucket;   /* Current child bucket */
 364
 365 struct event_srv_cfg_s {
 366     struct timeout_queue *wc_q,
 367                          *ka_q;
 368 };
 369
 370 #define ID_FROM_CHILD_THREAD(c, t)    ((c * thread_limit) + t)
 371
 372 /* The event MPM respects a couple of runtime flags that can aid
 373  * in debugging. Setting the -DNO_DETACH flag will prevent the root process
 374  * from detaching from its controlling terminal. Additionally, setting
 375  * the -DONE_PROCESS flag (which implies -DNO_DETACH) will get you the
 376  * child_main loop running in the process which originally started up.
 377  * This gives you a pretty nice debugging environment.  (You'll get a SIGHUP
 378  * early in standalone_main; just continue through.  This is the server
 379  * trying to kill off any child processes which it might have lying
 380  * around --- Apache doesn't keep track of their pids, it just sends
 381  * SIGHUP to the process group, ignoring it in the root process.
 382  * Continue through and you'll be fine.).
 383  */
 384
 385 static int one_process = 0;
 386
 387 #ifdef DEBUG_SIGSTOP
 388 int raise_sigstop_flags;
 389 #endif
 390
 391 static apr_pool_t *pconf;       /* Pool for config stuff */
 392 static apr_pool_t *pchild;      /* Pool for httpd child stuff */
 393
 394 static pid_t ap_my_pid;         /* Linux getpid() doesn't work except in main
 395                                    thread. Use this instead */
 396 static pid_t parent_pid;
 397 static apr_os_thread_t *listener_os_thread;
 398
 399 /* The LISTENER_SIGNAL signal will be sent from the main thread to the
 400  * listener thread to wake it up for graceful termination (what a child
 401  * process from an old generation does when the admin does "apachectl
 402  * graceful").  This signal will be blocked in all threads of a child
 403  * process except for the listener thread.
 404  */
 405 #define LISTENER_SIGNAL     SIGHUP
 406
 407 /* An array of socket descriptors in use by each thread used to
 408  * perform a non-graceful (forced) shutdown of the server.
 409  */
 410 static apr_socket_t **worker_sockets;
 411
 412 static void disable_listensocks(int process_slot)
 413 {
 414     int i;
 415     for (i = 0; i < num_listensocks; i++) {
 416         apr_pollset_remove(event_pollset, &listener_pollfd[i]);
 417     }
 418     ap_scoreboard_image->parent[process_slot].not_accepting = 1;
 419 }
 420
 421 static void enable_listensocks(int process_slot)
 422 {
 423     int i;
 424     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00457)
 425                  "Accepting new connections again: "
 426                  "%u active conns (%u lingering/%u clogged/%u suspended), "
 427                  "%u idle workers",
 428                  apr_atomic_read32(&connection_count),
 429                  apr_atomic_read32(&lingering_count),
 430                  apr_atomic_read32(&clogged_count),
 431                  apr_atomic_read32(&suspended_count),
 432                  ap_queue_info_get_idlers(worker_queue_info));
 433     for (i = 0; i < num_listensocks; i++)
 434         apr_pollset_add(event_pollset, &listener_pollfd[i]);
 435     /*
 436      * XXX: This is not yet optimal. If many workers suddenly become available,
 437      * XXX: the parent may kill some processes off too soon.
 438      */
 439     ap_scoreboard_image->parent[process_slot].not_accepting = 0;
 440 }
 441
 442 static void close_worker_sockets(void)
 443 {
 444     int i;
 445     for (i = 0; i < threads_per_child; i++) {
 446         if (worker_sockets[i]) {
 447             apr_socket_close(worker_sockets[i]);
 448             worker_sockets[i] = NULL;
 449         }
 450     }
 451 }
 452
 453 static void wakeup_listener(void)
 454 {
 455     listener_may_exit = 1;
 456     if (!listener_os_thread) {
 457         /* XXX there is an obscure path that this doesn't handle perfectly:
 458          *     right after listener thread is created but before
 459          *     listener_os_thread is set, the first worker thread hits an
 460          *     error and starts graceful termination
 461          */
 462         return;
 463     }
 464
 465     /* unblock the listener if it's waiting for a worker */
 466     ap_queue_info_term(worker_queue_info);
 467
 468     /*
 469      * we should just be able to "kill(ap_my_pid, LISTENER_SIGNAL)" on all
 470      * platforms and wake up the listener thread since it is the only thread
 471      * with SIGHUP unblocked, but that doesn't work on Linux
 472      */
 473 #ifdef HAVE_PTHREAD_KILL
 474     pthread_kill(*listener_os_thread, LISTENER_SIGNAL);
 475 #else
 476     kill(ap_my_pid, LISTENER_SIGNAL);
 477 #endif
 478 }
 479
 480 #define ST_INIT              0
 481 #define ST_GRACEFUL          1
 482 #define ST_UNGRACEFUL        2
 483
 484 static int terminate_mode = ST_INIT;
 485
 486 static void signal_threads(int mode)
 487 {
 488     if (terminate_mode == mode) {
 489         return;
 490     }
 491     terminate_mode = mode;
 492     mpm_state = AP_MPMQ_STOPPING;
 493
 494     /* in case we weren't called from the listener thread, wake up the
 495      * listener thread
 496      */
 497     wakeup_listener();
 498
 499     /* for ungraceful termination, let the workers exit now;
 500      * for graceful termination, the listener thread will notify the
 501      * workers to exit once it has stopped accepting new connections
 502      */
 503     if (mode == ST_UNGRACEFUL) {
 504         workers_may_exit = 1;
 505         ap_queue_interrupt_all(worker_queue);
 506         close_worker_sockets(); /* forcefully kill all current connections */
 507     }
 508 }
 509
 510 static int event_query(int query_code, int *result, apr_status_t *rv)
 511 {
 512     *rv = APR_SUCCESS;
 513     switch (query_code) {
 514     case AP_MPMQ_MAX_DAEMON_USED:
 515         *result = retained->max_daemons_limit;
 516         break;
 517     case AP_MPMQ_IS_THREADED:
 518         *result = AP_MPMQ_STATIC;
 519         break;
 520     case AP_MPMQ_IS_FORKED:
 521         *result = AP_MPMQ_DYNAMIC;
 522         break;
 523     case AP_MPMQ_IS_ASYNC:
 524         *result = 1;
 525         break;
 526     case AP_MPMQ_HARD_LIMIT_DAEMONS:
 527         *result = server_limit;
 528         break;
 529     case AP_MPMQ_HARD_LIMIT_THREADS:
 530         *result = thread_limit;
 531         break;
 532     case AP_MPMQ_MAX_THREADS:
 533         *result = threads_per_child;
 534         break;
 535     case AP_MPMQ_MIN_SPARE_DAEMONS:
 536         *result = 0;
 537         break;
 538     case AP_MPMQ_MIN_SPARE_THREADS:
 539         *result = min_spare_threads;
 540         break;
 541     case AP_MPMQ_MAX_SPARE_DAEMONS:
 542         *result = 0;
 543         break;
 544     case AP_MPMQ_MAX_SPARE_THREADS:
 545         *result = max_spare_threads;
 546         break;
 547     case AP_MPMQ_MAX_REQUESTS_DAEMON:
 548         *result = ap_max_requests_per_child;
 549         break;
 550     case AP_MPMQ_MAX_DAEMONS:
 551         *result = ap_daemons_limit;
 552         break;
 553     case AP_MPMQ_MPM_STATE:
 554         *result = mpm_state;
 555         break;
 556     case AP_MPMQ_GENERATION:
 557         *result = retained->my_generation;
 558         break;
 559     default:
 560         *rv = APR_ENOTIMPL;
 561         break;
 562     }
 563     return OK;
 564 }
 565
 566 static void event_note_child_killed(int childnum, pid_t pid, ap_generation_t gen)
 567 {
 568     if (childnum != -1) { /* child had a scoreboard slot? */
 569         ap_run_child_status(ap_server_conf,
 570                             ap_scoreboard_image->parent[childnum].pid,
 571                             ap_scoreboard_image->parent[childnum].generation,
 572                             childnum, MPM_CHILD_EXITED);
 573         ap_scoreboard_image->parent[childnum].pid = 0;
 574     }
 575     else {
 576         ap_run_child_status(ap_server_conf, pid, gen, -1, MPM_CHILD_EXITED);
 577     }
 578 }
 579
 580 static void event_note_child_started(int slot, pid_t pid)
 581 {
 582     ap_scoreboard_image->parent[slot].pid = pid;
 583     ap_run_child_status(ap_server_conf,
 584                         ap_scoreboard_image->parent[slot].pid,
 585                         retained->my_generation, slot, MPM_CHILD_STARTED);
 586 }
 587
 588 static void event_note_child_lost_slot(int slot, pid_t newpid)
 589 {
 590     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00458)
 591                  "pid %" APR_PID_T_FMT " taking over scoreboard slot from "
 592                  "%" APR_PID_T_FMT "%s",
 593                  newpid,
 594                  ap_scoreboard_image->parent[slot].pid,
 595                  ap_scoreboard_image->parent[slot].quiescing ?
 596                  " (quiescing)" : "");
 597     ap_run_child_status(ap_server_conf,
 598                         ap_scoreboard_image->parent[slot].pid,
 599                         ap_scoreboard_image->parent[slot].generation,
 600                         slot, MPM_CHILD_LOST_SLOT);
 601     /* Don't forget about this exiting child process, or we
 602      * won't be able to kill it if it doesn't exit by the
 603      * time the server is shut down.
 604      */
 605     ap_register_extra_mpm_process(ap_scoreboard_image->parent[slot].pid,
 606                                   ap_scoreboard_image->parent[slot].generation);
 607 }
 608
 609 static const char *event_get_name(void)
 610 {
 611     return "event";
 612 }
 613
 614 /* a clean exit from a child with proper cleanup */
 615 static void clean_child_exit(int code) __attribute__ ((noreturn));
 616 static void clean_child_exit(int code)
 617 {
 618     mpm_state = AP_MPMQ_STOPPING;
 619     if (pchild) {
 620         apr_pool_destroy(pchild);
 621     }
 622
 623     if (one_process) {
 624         event_note_child_killed(/* slot */ 0, 0, 0);
 625     }
 626
 627     exit(code);
 628 }
 629
 630 static void just_die(int sig)
 631 {
 632     clean_child_exit(0);
 633 }
 634
 635 /*****************************************************************
 636  * Connection structures and accounting...
 637  */
 638
 639 static int child_fatal;
 640
 641 /* volatile because they're updated from a signal handler */
 642 static int volatile shutdown_pending;
 643 static int volatile restart_pending;
 644
 645 static apr_status_t decrement_connection_count(void *cs_)
 646 {
 647     event_conn_state_t *cs = cs_;
 648     switch (cs->pub.state) {
 649         case CONN_STATE_LINGER_NORMAL:
 650         case CONN_STATE_LINGER_SHORT:
 651             apr_atomic_dec32(&lingering_count);
 652             break;
 653         case CONN_STATE_SUSPENDED:
 654             apr_atomic_dec32(&suspended_count);
 655             break;
 656         default:
 657             break;
 658     }
 659     apr_atomic_dec32(&connection_count);
 660     return APR_SUCCESS;
 661 }
 662
 663 /*
 664  * ap_start_shutdown() and ap_start_restart(), below, are a first stab at
 665  * functions to initiate shutdown or restart without relying on signals.
 666  * Previously this was initiated in sig_term() and restart() signal handlers,
 667  * but we want to be able to start a shutdown/restart from other sources --
 668  * e.g. on Win32, from the service manager. Now the service manager can
 669  * call ap_start_shutdown() or ap_start_restart() as appropriate.  Note that
 670  * these functions can also be called by the child processes, since global
 671  * variables are no longer used to pass on the required action to the parent.
 672  *
 673  * These should only be called from the parent process itself, since the
 674  * parent process will use the shutdown_pending and restart_pending variables
 675  * to determine whether to shutdown or restart. The child process should
 676  * call signal_parent() directly to tell the parent to die -- this will
 677  * cause neither of those variable to be set, which the parent will
 678  * assume means something serious is wrong (which it will be, for the
 679  * child to force an exit) and so do an exit anyway.
 680  */
 681
 682 static void ap_start_shutdown(int graceful)
 683 {
 684     mpm_state = AP_MPMQ_STOPPING;
 685     if (shutdown_pending == 1) {
 686         /* Um, is this _probably_ not an error, if the user has
 687          * tried to do a shutdown twice quickly, so we won't
 688          * worry about reporting it.
 689          */
 690         return;
 691     }
 692     shutdown_pending = 1;
 693     retained->is_graceful = graceful;
 694 }
 695
 696 /* do a graceful restart if graceful == 1 */
 697 static void ap_start_restart(int graceful)
 698 {
 699     mpm_state = AP_MPMQ_STOPPING;
 700     if (restart_pending == 1) {
 701         /* Probably not an error - don't bother reporting it */
 702         return;
 703     }
 704     restart_pending = 1;
 705     retained->is_graceful = graceful;
 706 }
 707
 708 static void sig_term(int sig)
 709 {
 710     ap_start_shutdown(sig == AP_SIG_GRACEFUL_STOP);
 711 }
 712
 713 static void restart(int sig)
 714 {
 715     ap_start_restart(sig == AP_SIG_GRACEFUL);
 716 }
 717
 718 static void set_signals(void)
 719 {
 720 #ifndef NO_USE_SIGACTION
 721     struct sigaction sa;
 722 #endif
 723
 724     if (!one_process) {
 725         ap_fatal_signal_setup(ap_server_conf, pconf);
 726     }
 727
 728 #ifndef NO_USE_SIGACTION
 729     sigemptyset(&sa.sa_mask);
 730     sa.sa_flags = 0;
 731
 732     sa.sa_handler = sig_term;
 733     if (sigaction(SIGTERM, &sa, NULL) < 0)
 734         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf, APLOGNO(00459)
 735                      "sigaction(SIGTERM)");
 736 #ifdef AP_SIG_GRACEFUL_STOP
 737     if (sigaction(AP_SIG_GRACEFUL_STOP, &sa, NULL) < 0)
 738         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf, APLOGNO(00460)
 739                      "sigaction(" AP_SIG_GRACEFUL_STOP_STRING ")");
 740 #endif
 741 #ifdef SIGINT
 742     if (sigaction(SIGINT, &sa, NULL) < 0)
 743         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf, APLOGNO(00461)
 744                      "sigaction(SIGINT)");
 745 #endif
 746 #ifdef SIGXCPU
 747     sa.sa_handler = SIG_DFL;
 748     if (sigaction(SIGXCPU, &sa, NULL) < 0)
 749         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf, APLOGNO(00462)
 750                      "sigaction(SIGXCPU)");
 751 #endif
 752 #ifdef SIGXFSZ
 753     /* For systems following the LFS standard, ignoring SIGXFSZ allows
 754      * a write() beyond the 2GB limit to fail gracefully with E2BIG
 755      * rather than terminate the process. */
 756     sa.sa_handler = SIG_IGN;
 757     if (sigaction(SIGXFSZ, &sa, NULL) < 0)
 758         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf, APLOGNO(00463)
 759                      "sigaction(SIGXFSZ)");
 760 #endif
 761 #ifdef SIGPIPE
 762     sa.sa_handler = SIG_IGN;
 763     if (sigaction(SIGPIPE, &sa, NULL) < 0)
 764         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf, APLOGNO(00464)
 765                      "sigaction(SIGPIPE)");
 766 #endif
 767
 768     /* we want to ignore HUPs and AP_SIG_GRACEFUL while we're busy
 769      * processing one */
 770     sigaddset(&sa.sa_mask, SIGHUP);
 771     sigaddset(&sa.sa_mask, AP_SIG_GRACEFUL);
 772     sa.sa_handler = restart;
 773     if (sigaction(SIGHUP, &sa, NULL) < 0)
 774         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf, APLOGNO(00465)
 775                      "sigaction(SIGHUP)");
 776     if (sigaction(AP_SIG_GRACEFUL, &sa, NULL) < 0)
 777         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf, APLOGNO(00466)
 778                      "sigaction(" AP_SIG_GRACEFUL_STRING ")");
 779 #else
 780     if (!one_process) {
 781 #ifdef SIGXCPU
 782         apr_signal(SIGXCPU, SIG_DFL);
 783 #endif /* SIGXCPU */
 784 #ifdef SIGXFSZ
 785         apr_signal(SIGXFSZ, SIG_IGN);
 786 #endif /* SIGXFSZ */
 787     }
 788
 789     apr_signal(SIGTERM, sig_term);
 790 #ifdef SIGHUP
 791     apr_signal(SIGHUP, restart);
 792 #endif /* SIGHUP */
 793 #ifdef AP_SIG_GRACEFUL
 794     apr_signal(AP_SIG_GRACEFUL, restart);
 795 #endif /* AP_SIG_GRACEFUL */
 796 #ifdef AP_SIG_GRACEFUL_STOP
 797      apr_signal(AP_SIG_GRACEFUL_STOP, sig_term);
 798 #endif /* AP_SIG_GRACEFUL_STOP */
 799 #ifdef SIGPIPE
 800     apr_signal(SIGPIPE, SIG_IGN);
 801 #endif /* SIGPIPE */
 802
 803 #endif
 804 }
 805
 806 static void notify_suspend(event_conn_state_t *cs)
 807 {
 808     ap_run_suspend_connection(cs->c, cs->r);
 809     cs->suspended = 1;
 810     cs->c->sbh = NULL;
 811 }
 812
 813 static void notify_resume(event_conn_state_t *cs, ap_sb_handle_t *sbh)
 814 {
 815     cs->c->sbh = sbh;
 816     cs->suspended = 0;
 817     ap_run_resume_connection(cs->c, cs->r);
 818 }
 819
 820 static int start_lingering_close_common(event_conn_state_t *cs, int in_worker)
 821 {
 822     apr_status_t rv;
 823     struct timeout_queue *q;
 824     apr_socket_t *csd = cs->pfd.desc.s;
 825 #ifdef AP_DEBUG
 826     {
 827         rv = apr_socket_timeout_set(csd, 0);
 828         AP_DEBUG_ASSERT(rv == APR_SUCCESS);
 829     }
 830 #else
 831     apr_socket_timeout_set(csd, 0);
 832 #endif
 833     cs->queue_timestamp = apr_time_now();
 834     /*
 835      * If some module requested a shortened waiting period, only wait for
 836      * 2s (SECONDS_TO_LINGER). This is useful for mitigating certain
 837      * DoS attacks.
 838      */
 839     if (apr_table_get(cs->c->notes, "short-lingering-close")) {
 840         q = short_linger_q;
 841         cs->pub.state = CONN_STATE_LINGER_SHORT;
 842     }
 843     else {
 844         q = linger_q;
 845         cs->pub.state = CONN_STATE_LINGER_NORMAL;
 846     }
 847     apr_atomic_inc32(&lingering_count);
 848     if (in_worker) {
 849         notify_suspend(cs);
 850     }
 851     else {
 852         cs->c->sbh = NULL;
 853     }
 854     apr_thread_mutex_lock(timeout_mutex);
 855     TO_QUEUE_APPEND(q, cs);
 856     cs->pfd.reqevents = (
 857             cs->pub.sense == CONN_SENSE_WANT_WRITE ? APR_POLLOUT :
 858                     APR_POLLIN) | APR_POLLHUP | APR_POLLERR;
 859     cs->pub.sense = CONN_SENSE_DEFAULT;
 860     rv = apr_pollset_add(event_pollset, &cs->pfd);
 861     apr_thread_mutex_unlock(timeout_mutex);
 862     if (rv != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rv)) {
 863         ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(03092)
 864                      "start_lingering_close: apr_pollset_add failure");
 865         apr_thread_mutex_lock(timeout_mutex);
 866         TO_QUEUE_REMOVE(q, cs);
 867         apr_thread_mutex_unlock(timeout_mutex);
 868         apr_socket_close(cs->pfd.desc.s);
 869         ap_push_pool(worker_queue_info, cs->p);
 870         return 0;
 871     }
 872     return 1;
 873 }
 874
 875 /*
 876  * Close our side of the connection, flushing data to the client first.
 877  * Pre-condition: cs is not in any timeout queue and not in the pollset,
 878  *                timeout_mutex is not locked
 879  * return: 0 if connection is fully closed,
 880  *         1 if connection is lingering
 881  * May only be called by worker thread.
 882  */
 883 static int start_lingering_close_blocking(event_conn_state_t *cs)
 884 {
 885     if (ap_start_lingering_close(cs->c)) {
 886         notify_suspend(cs);
 887         ap_push_pool(worker_queue_info, cs->p);
 888         return 0;
 889     }
 890     return start_lingering_close_common(cs, 1);
 891 }
 892
 893 /*
 894  * Close our side of the connection, NOT flushing data to the client.
 895  * This should only be called if there has been an error or if we know
 896  * that our send buffers are empty.
 897  * Pre-condition: cs is not in any timeout queue and not in the pollset,
 898  *                timeout_mutex is not locked
 899  * return: 0 if connection is fully closed,
 900  *         1 if connection is lingering
 901  * may be called by listener thread
 902  */
 903 static int start_lingering_close_nonblocking(event_conn_state_t *cs)
 904 {
 905     conn_rec *c = cs->c;
 906     apr_socket_t *csd = cs->pfd.desc.s;
 907
 908     if (ap_prep_lingering_close(c)
 909         || c->aborted
 910         || ap_shutdown_conn(c, 0) != APR_SUCCESS || c->aborted
 911         || apr_socket_shutdown(csd, APR_SHUTDOWN_WRITE) != APR_SUCCESS) {
 912         apr_socket_close(csd);
 913         ap_push_pool(worker_queue_info, cs->p);
 914         return 0;
 915     }
 916     return start_lingering_close_common(cs, 0);
 917 }
 918
 919 /*
 920  * forcibly close a lingering connection after the lingering period has
 921  * expired
 922  * Pre-condition: cs is not in any timeout queue and not in the pollset
 923  * return: irrelevant (need same prototype as start_lingering_close)
 924  */
 925 static int stop_lingering_close(event_conn_state_t *cs)
 926 {
 927     apr_status_t rv;
 928     apr_socket_t *csd = ap_get_conn_socket(cs->c);
 929     ap_log_error(APLOG_MARK, APLOG_TRACE4, 0, ap_server_conf,
 930                  "socket reached timeout in lingering-close state");
 931     rv = apr_socket_close(csd);
 932     if (rv != APR_SUCCESS) {
 933         ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(00468) "error closing socket");
 934         AP_DEBUG_ASSERT(0);
 935     }
 936     ap_push_pool(worker_queue_info, cs->p);
 937     return 0;
 938 }
 939
 940 /*
 941  * This runs before any non-MPM cleanup code on the connection;
 942  * if the connection is currently suspended as far as modules
 943  * know, provide notification of resumption.
 944  */
 945 static apr_status_t ptrans_pre_cleanup(void *dummy)
 946 {
 947     event_conn_state_t *cs = dummy;
 948
 949     if (cs->suspended) {
 950         notify_resume(cs, NULL);
 951     }
 952     return APR_SUCCESS;
 953 }
 954
 955 /*
 956  * event_pre_read_request() and event_request_cleanup() track the
 957  * current r for a given connection.
 958  */
 959 static apr_status_t event_request_cleanup(void *dummy)
 960 {
 961     conn_rec *c = dummy;
 962     event_conn_state_t *cs = ap_get_module_config(c->conn_config,
 963                                                   &mpm_event_module);
 964
 965     cs->r = NULL;
 966     return APR_SUCCESS;
 967 }
 968
 969 static void event_pre_read_request(request_rec *r, conn_rec *c)
 970 {
 971     event_conn_state_t *cs = ap_get_module_config(c->conn_config,
 972                                                   &mpm_event_module);
 973
 974     cs->r = r;
 975     cs->sc = ap_get_module_config(ap_server_conf->module_config,
 976                                   &mpm_event_module);
 977     apr_pool_cleanup_register(r->pool, c, event_request_cleanup,
 978                               apr_pool_cleanup_null);
 979 }
 980
 981 /*
 982  * event_post_read_request() tracks the current server config for a
 983  * given request.
 984  */
 985 static int event_post_read_request(request_rec *r)
 986 {
 987     conn_rec *c = r->connection;
 988     event_conn_state_t *cs = ap_get_module_config(c->conn_config,
 989                                                   &mpm_event_module);
 990
 991     /* To preserve legacy behaviour (consistent with other MPMs), use
 992      * the keepalive timeout from the base server (first on this IP:port)
 993      * when none is explicitly configured on this server.
 994      */
 995     if (r->server->keep_alive_timeout_set) {
 996         cs->sc = ap_get_module_config(r->server->module_config,
 997                                       &mpm_event_module);
 998     }
 999     else {
1000         cs->sc = ap_get_module_config(c->base_server->module_config,
1001                                       &mpm_event_module);
1002     }
1003     return OK;
1004 }
1005
1006 /*
1007  * process one connection in the worker
1008  */
1009 static void process_socket(apr_thread_t *thd, apr_pool_t * p, apr_socket_t * sock,
1010                           event_conn_state_t * cs, int my_child_num,
1011                           int my_thread_num)
1012 {
1013     conn_rec *c;
1014     long conn_id = ID_FROM_CHILD_THREAD(my_child_num, my_thread_num);
1015     int rc;
1016     ap_sb_handle_t *sbh;
1017
1018     /* XXX: This will cause unbounded mem usage for long lasting connections */
1019     ap_create_sb_handle(&sbh, p, my_child_num, my_thread_num);
1020
1021     if (cs == NULL) {           /* This is a new connection */
1022         listener_poll_type *pt = apr_pcalloc(p, sizeof(*pt));
1023         cs = apr_pcalloc(p, sizeof(event_conn_state_t));
1024         cs->bucket_alloc = apr_bucket_alloc_create(p);
1025         c = ap_run_create_connection(p, ap_server_conf, sock,
1026                                      conn_id, sbh, cs->bucket_alloc);
1027         if (!c) {
1028             ap_push_pool(worker_queue_info, p);
1029             return;
1030         }
1031         apr_atomic_inc32(&connection_count);
1032         apr_pool_cleanup_register(c->pool, cs, decrement_connection_count,
1033                                   apr_pool_cleanup_null);
1034         ap_set_module_config(c->conn_config, &mpm_event_module, cs);
1035         c->current_thread = thd;
1036         cs->c = c;
1037         c->cs = &(cs->pub);
1038         cs->p = p;
1039         cs->sc = ap_get_module_config(ap_server_conf->module_config,
1040                                       &mpm_event_module);
1041         cs->pfd.desc_type = APR_POLL_SOCKET;
1042         cs->pfd.reqevents = APR_POLLIN;
1043         cs->pfd.desc.s = sock;
1044         pt->type = PT_CSD;
1045         pt->baton = cs;
1046         cs->pfd.client_data = pt;
1047         apr_pool_pre_cleanup_register(p, cs, ptrans_pre_cleanup);
1048         TO_QUEUE_ELEM_INIT(cs);
1049
1050         ap_update_vhost_given_ip(c);
1051
1052         rc = ap_run_pre_connection(c, sock);
1053         if (rc != OK && rc != DONE) {
1054             ap_log_cerror(APLOG_MARK, APLOG_DEBUG, 0, c, APLOGNO(00469)
1055                           "process_socket: connection aborted");
1056             c->aborted = 1;
1057         }
1058
1059         /**
1060          * XXX If the platform does not have a usable way of bundling
1061          * accept() with a socket readability check, like Win32,
1062          * and there are measurable delays before the
1063          * socket is readable due to the first data packet arriving,
1064          * it might be better to create the cs on the listener thread
1065          * with the state set to CONN_STATE_CHECK_REQUEST_LINE_READABLE
1066          *
1067          * FreeBSD users will want to enable the HTTP accept filter
1068          * module in their kernel for the highest performance
1069          * When the accept filter is active, sockets are kept in the
1070          * kernel until a HTTP request is received.
1071          */
1072         cs->pub.state = CONN_STATE_READ_REQUEST_LINE;
1073
1074         cs->pub.sense = CONN_SENSE_DEFAULT;
1075     }
1076     else {
1077         c = cs->c;
1078         notify_resume(cs, sbh);
1079         c->current_thread = thd;
1080         /* Subsequent request on a conn, and thread number is part of ID */
1081         c->id = conn_id;
1082     }
1083
1084     if (c->clogging_input_filters && !c->aborted) {
1085         /* Since we have an input filter which 'clogs' the input stream,
1086          * like mod_ssl used to, lets just do the normal read from input
1087          * filters, like the Worker MPM does. Filters that need to write
1088          * where they would otherwise read, or read where they would
1089          * otherwise write, should set the sense appropriately.
1090          */
1091         apr_atomic_inc32(&clogged_count);
1092         ap_run_process_connection(c);
1093         if (cs->pub.state != CONN_STATE_SUSPENDED) {
1094             cs->pub.state = CONN_STATE_LINGER;
1095         }
1096         apr_atomic_dec32(&clogged_count);
1097     }
1098
1099 read_request:
1100     if (cs->pub.state == CONN_STATE_READ_REQUEST_LINE) {
1101         if (!c->aborted) {
1102             ap_run_process_connection(c);
1103
1104             /* state will be updated upon return
1105              * fall thru to either wait for readability/timeout or
1106              * do lingering close
1107              */
1108         }
1109         else {
1110             cs->pub.state = CONN_STATE_LINGER;
1111         }
1112     }
1113
1114     if (cs->pub.state == CONN_STATE_WRITE_COMPLETION) {
1115         ap_filter_t *output_filter = c->output_filters;
1116         apr_status_t rv;
1117         ap_update_child_status_from_conn(sbh, SERVER_BUSY_WRITE, c);
1118         while (output_filter->next != NULL) {
1119             output_filter = output_filter->next;
1120         }
1121         rv = output_filter->frec->filter_func.out_func(output_filter, NULL);
1122         if (rv != APR_SUCCESS) {
1123             ap_log_cerror(APLOG_MARK, APLOG_DEBUG, rv, c, APLOGNO(00470)
1124                           "network write failure in core output filter");
1125             cs->pub.state = CONN_STATE_LINGER;
1126         }
1127         else if (c->data_in_output_filters) {
1128             /* Still in WRITE_COMPLETION_STATE:
1129              * Set a write timeout for this connection, and let the
1130              * event thread poll for writeability.
1131              */
1132             cs->queue_timestamp = apr_time_now();
1133             notify_suspend(cs);
1134             apr_thread_mutex_lock(timeout_mutex);
1135             TO_QUEUE_APPEND(cs->sc->wc_q, cs);
1136             cs->pfd.reqevents = (
1137                     cs->pub.sense == CONN_SENSE_WANT_READ ? APR_POLLIN :
1138                             APR_POLLOUT) | APR_POLLHUP | APR_POLLERR;
1139             cs->pub.sense = CONN_SENSE_DEFAULT;
1140             rc = apr_pollset_add(event_pollset, &cs->pfd);
1141             apr_thread_mutex_unlock(timeout_mutex);
1142             return;
1143         }
1144         else if (c->keepalive != AP_CONN_KEEPALIVE || c->aborted ||
1145             listener_may_exit) {
1146             cs->pub.state = CONN_STATE_LINGER;
1147         }
1148         else if (c->data_in_input_filters) {
1149             cs->pub.state = CONN_STATE_READ_REQUEST_LINE;
1150             goto read_request;
1151         }
1152         else {
1153             cs->pub.state = CONN_STATE_CHECK_REQUEST_LINE_READABLE;
1154         }
1155     }
1156
1157     if (cs->pub.state == CONN_STATE_LINGER) {
1158         start_lingering_close_blocking(cs);
1159     }
1160     else if (cs->pub.state == CONN_STATE_CHECK_REQUEST_LINE_READABLE) {
1161         /* It greatly simplifies the logic to use a single timeout value per q
1162          * because the new element can just be added to the end of the list and
1163          * it will stay sorted in expiration time sequence.  If brand new
1164          * sockets are sent to the event thread for a readability check, this
1165          * will be a slight behavior change - they use the non-keepalive
1166          * timeout today.  With a normal client, the socket will be readable in
1167          * a few milliseconds anyway.
1168          */
1169         cs->queue_timestamp = apr_time_now();
1170         notify_suspend(cs);
1171         apr_thread_mutex_lock(timeout_mutex);
1172         TO_QUEUE_APPEND(cs->sc->ka_q, cs);
1173
1174         /* Add work to pollset. */
1175         cs->pfd.reqevents = APR_POLLIN;
1176         rc = apr_pollset_add(event_pollset, &cs->pfd);
1177         apr_thread_mutex_unlock(timeout_mutex);
1178
1179         if (rc != APR_SUCCESS) {
1180             ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf, APLOGNO(03093)
1181                          "process_socket: apr_pollset_add failure");
1182             AP_DEBUG_ASSERT(rc == APR_SUCCESS);
1183         }
1184     }
1185     else if (cs->pub.state == CONN_STATE_SUSPENDED) {
1186         apr_atomic_inc32(&suspended_count);
1187         notify_suspend(cs);
1188     }
1189 }
1190
1191 /* conns_this_child has gone to zero or below.  See if the admin coded
1192    "MaxConnectionsPerChild 0", and keep going in that case.  Doing it this way
1193    simplifies the hot path in worker_thread */
1194 static void check_infinite_requests(void)
1195 {
1196     if (ap_max_requests_per_child) {
1197         ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
1198                      "Stopping process due to MaxConnectionsPerChild");
1199         signal_threads(ST_GRACEFUL);
1200     }
1201     else {
1202         /* keep going */
1203         conns_this_child = APR_INT32_MAX;
1204     }
1205 }
1206
1207 static void close_listeners(int process_slot, int *closed)
1208 {
1209     if (!*closed) {
1210         int i;
1211         disable_listensocks(process_slot);
1212         ap_close_listeners_ex(my_bucket->listeners);
1213         *closed = 1;
1214         dying = 1;
1215         ap_scoreboard_image->parent[process_slot].quiescing = 1;
1216         for (i = 0; i < threads_per_child; ++i) {
1217             ap_update_child_status_from_indexes(process_slot, i,
1218                                                 SERVER_GRACEFUL, NULL);
1219         }
1220         /* wake up the main thread */
1221         kill(ap_my_pid, SIGTERM);
1222     }
1223 }
1224
1225 static void unblock_signal(int sig)
1226 {
1227     sigset_t sig_mask;
1228
1229     sigemptyset(&sig_mask);
1230     sigaddset(&sig_mask, sig);
1231 #if defined(SIGPROCMASK_SETS_THREAD_MASK)
1232     sigprocmask(SIG_UNBLOCK, &sig_mask, NULL);
1233 #else
1234     pthread_sigmask(SIG_UNBLOCK, &sig_mask, NULL);
1235 #endif
1236 }
1237
1238 static void dummy_signal_handler(int sig)
1239 {
1240     /* XXX If specifying SIG_IGN is guaranteed to unblock a syscall,
1241      *     then we don't need this goofy function.
1242      */
1243 }
1244
1245
1246 static apr_status_t init_pollset(apr_pool_t *p)
1247 {
1248     ap_listen_rec *lr;
1249     listener_poll_type *pt;
1250     int i = 0;
1251
1252     listener_pollfd = apr_palloc(p, sizeof(apr_pollfd_t) * num_listensocks);
1253     for (lr = my_bucket->listeners; lr != NULL; lr = lr->next, i++) {
1254         apr_pollfd_t *pfd;
1255         AP_DEBUG_ASSERT(i < num_listensocks);
1256         pfd = &listener_pollfd[i];
1257         pt = apr_pcalloc(p, sizeof(*pt));
1258         pfd->desc_type = APR_POLL_SOCKET;
1259         pfd->desc.s = lr->sd;
1260         pfd->reqevents = APR_POLLIN;
1261
1262         pt->type = PT_ACCEPT;
1263         pt->baton = lr;
1264
1265         pfd->client_data = pt;
1266
1267         apr_socket_opt_set(pfd->desc.s, APR_SO_NONBLOCK, 1);
1268         apr_pollset_add(event_pollset, pfd);
1269
1270         lr->accept_func = ap_unixd_accept;
1271     }
1272
1273     return APR_SUCCESS;
1274 }
1275
1276 static apr_status_t push_timer2worker(timer_event_t* te)
1277 {
1278     return ap_queue_push_timer(worker_queue, te);
1279 }
1280
1281 /*
1282  * Pre-condition: pfd->cs is neither in pollset nor timeout queue
1283  * this function may only be called by the listener
1284  */
1285 static apr_status_t push2worker(const apr_pollfd_t * pfd,
1286                                 apr_pollset_t * pollset)
1287 {
1288     listener_poll_type *pt = (listener_poll_type *) pfd->client_data;
1289     event_conn_state_t *cs = (event_conn_state_t *) pt->baton;
1290     apr_status_t rc;
1291
1292     rc = ap_queue_push(worker_queue, cs->pfd.desc.s, cs, cs->p);
1293     if (rc != APR_SUCCESS) {
1294         /* trash the connection; we couldn't queue the connected
1295          * socket to a worker
1296          */
1297         apr_bucket_alloc_destroy(cs->bucket_alloc);
1298         apr_socket_close(cs->pfd.desc.s);
1299         ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1300                      ap_server_conf, APLOGNO(00471) "push2worker: ap_queue_push failed");
1301         ap_push_pool(worker_queue_info, cs->p);
1302     }
1303
1304     return rc;
1305 }
1306
1307 /* get_worker:
1308  *     If *have_idle_worker_p == 0, reserve a worker thread, and set
1309  *     *have_idle_worker_p = 1.
1310  *     If *have_idle_worker_p is already 1, will do nothing.
1311  *     If blocking == 1, block if all workers are currently busy.
1312  *     If no worker was available immediately, will set *all_busy to 1.
1313  *     XXX: If there are no workers, we should not block immediately but
1314  *     XXX: close all keep-alive connections first.
1315  */
1316 static void get_worker(int *have_idle_worker_p, int blocking, int *all_busy)
1317 {
1318     apr_status_t rc;
1319
1320     if (*have_idle_worker_p) {
1321         /* already reserved a worker thread - must have hit a
1322          * transient error on a previous pass
1323          */
1324         return;
1325     }
1326
1327     if (blocking)
1328         rc = ap_queue_info_wait_for_idler(worker_queue_info, all_busy);
1329     else
1330         rc = ap_queue_info_try_get_idler(worker_queue_info);
1331
1332     if (rc == APR_SUCCESS || APR_STATUS_IS_EOF(rc)) {
1333         *have_idle_worker_p = 1;
1334     }
1335     else if (!blocking && rc == APR_EAGAIN) {
1336         *all_busy = 1;
1337     }
1338     else {
1339         ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf, APLOGNO(00472)
1340                      "ap_queue_info_wait_for_idler failed.  "
1341                      "Attempting to shutdown process gracefully");
1342         signal_threads(ST_GRACEFUL);
1343     }
1344 }
1345
1346 /* Structures to reuse */
1347 static APR_RING_HEAD(timer_free_ring_t, timer_event_t) timer_free_ring;
1348
1349 static apr_skiplist *timer_skiplist;
1350
1351 /* The following compare function is used by apr_skiplist_insert() to keep the
1352  * elements (timers) sorted and provide O(log n) complexity (this is also true
1353  * for apr_skiplist_{find,remove}(), but those are not used in MPM event where
1354  * inserted timers are not searched nor removed, but with apr_skiplist_pop()
1355  * which does use any compare function).  It is meant to return 0 when a == b,
1356  * <0 when a < b, and >0 when a > b.  However apr_skiplist_insert() will not
1357  * add duplicates (i.e. a == b), and apr_skiplist_add() is only available in
1358  * APR 1.6, yet multiple timers could possibly be created in the same micro-
1359  * second (duplicates with regard to apr_time_t); therefore we implement the
1360  * compare function to return +1 instead of 0 when compared timers are equal,
1361  * thus duplicates are still added after each other (in order of insertion).
1362  */
1363 static int timer_comp(void *a, void *b)
1364 {
1365     apr_time_t t1 = (apr_time_t) ((timer_event_t *)a)->when;
1366     apr_time_t t2 = (apr_time_t) ((timer_event_t *)b)->when;
1367     AP_DEBUG_ASSERT(t1);
1368     AP_DEBUG_ASSERT(t2);
1369     return ((t1 < t2) ? -1 : 1);
1370 }
1371
1372 static apr_thread_mutex_t *g_timer_skiplist_mtx;
1373
1374 static apr_status_t event_register_timed_callback(apr_time_t t,
1375                                                   ap_mpm_callback_fn_t *cbfn,
1376                                                   void *baton)
1377 {
1378     timer_event_t *te;
1379     /* oh yeah, and make locking smarter/fine grained. */
1380     apr_thread_mutex_lock(g_timer_skiplist_mtx);
1381
1382     if (!APR_RING_EMPTY(&timer_free_ring, timer_event_t, link)) {
1383         te = APR_RING_FIRST(&timer_free_ring);
1384         APR_RING_REMOVE(te, link);
1385     }
1386     else {
1387         te = apr_skiplist_alloc(timer_skiplist, sizeof(timer_event_t));
1388         APR_RING_ELEM_INIT(te, link);
1389     }
1390
1391     te->cbfunc = cbfn;
1392     te->baton = baton;
1393     /* XXXXX: optimize */
1394     te->when = t + apr_time_now();
1395
1396     /* Okay, add sorted by when.. */
1397     apr_skiplist_insert(timer_skiplist, te);
1398
1399     apr_thread_mutex_unlock(g_timer_skiplist_mtx);
1400
1401     return APR_SUCCESS;
1402 }
1403
1404
1405 /*
1406  * Close socket and clean up if remote closed its end while we were in
1407  * lingering close.
1408  * Only to be called in the listener thread;
1409  * Pre-condition: cs is in one of the linger queues and in the pollset
1410  */
1411 static void process_lingering_close(event_conn_state_t *cs, const apr_pollfd_t *pfd)
1412 {
1413     apr_socket_t *csd = ap_get_conn_socket(cs->c);
1414     char dummybuf[2048];
1415     apr_size_t nbytes;
1416     apr_status_t rv;
1417     struct timeout_queue *q;
1418     q = (cs->pub.state == CONN_STATE_LINGER_SHORT) ? short_linger_q : linger_q;
1419
1420     /* socket is already in non-blocking state */
1421     do {
1422         nbytes = sizeof(dummybuf);
1423         rv = apr_socket_recv(csd, dummybuf, &nbytes);
1424     } while (rv == APR_SUCCESS);
1425
1426     if (APR_STATUS_IS_EAGAIN(rv)) {
1427         return;
1428     }
1429
1430     apr_thread_mutex_lock(timeout_mutex);
1431     rv = apr_pollset_remove(event_pollset, pfd);
1432     AP_DEBUG_ASSERT(rv == APR_SUCCESS);
1433
1434     rv = apr_socket_close(csd);
1435     AP_DEBUG_ASSERT(rv == APR_SUCCESS);
1436
1437     TO_QUEUE_REMOVE(q, cs);
1438     apr_thread_mutex_unlock(timeout_mutex);
1439     TO_QUEUE_ELEM_INIT(cs);
1440
1441     ap_push_pool(worker_queue_info, cs->p);
1442 }
1443
1444 /* call 'func' for all elements of 'q' with timeout less than 'timeout_time'.
1445  * Pre-condition: timeout_mutex must already be locked
1446  * Post-condition: timeout_mutex will be locked again
1447  */
1448 static void process_timeout_queue(struct timeout_queue *q,
1449                                   apr_time_t timeout_time,
1450                                   int (*func)(event_conn_state_t *))
1451 {
1452     int total = 0, count;
1453     event_conn_state_t *first, *cs, *last;
1454     struct timeout_head_t trash;
1455     struct timeout_queue *qp;
1456     apr_status_t rv;
1457
1458     if (!*q->total) {
1459         return;
1460     }
1461
1462     APR_RING_INIT(&trash, event_conn_state_t, timeout_list);
1463     for (qp = q; qp; qp = qp->next) {
1464         count = 0;
1465         cs = first = last = APR_RING_FIRST(&qp->head);
1466         while (cs != APR_RING_SENTINEL(&qp->head, event_conn_state_t,
1467                                        timeout_list)
1468                /* Trash the entry if:
1469                 * - no timeout_time was given (asked for all), or
1470                 * - it expired (according to the queue timeout), or
1471                 * - the system clock skewed in the past: no entry should be
1472                 *   registered above the given timeout_time (~now) + the queue
1473                 *   timeout, we won't keep any here (eg. for centuries).
1474                 * Stop otherwise, no following entry will match thanks to the
1475                 * single timeout per queue (entries are added to the end!).
1476                 * This allows maintenance in O(1).
1477                 */
1478                && (!timeout_time
1479                    || cs->queue_timestamp + qp->timeout < timeout_time
1480                    || cs->queue_timestamp > timeout_time + qp->timeout)) {
1481             last = cs;
1482             rv = apr_pollset_remove(event_pollset, &cs->pfd);
1483             if (rv != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rv)) {
1484                 ap_log_cerror(APLOG_MARK, APLOG_ERR, rv, cs->c, APLOGNO(00473)
1485                               "apr_pollset_remove failed");
1486             }
1487             cs = APR_RING_NEXT(cs, timeout_list);
1488             count++;
1489         }
1490         if (!count)
1491             continue;
1492
1493         APR_RING_UNSPLICE(first, last, timeout_list);
1494         APR_RING_SPLICE_TAIL(&trash, first, last, event_conn_state_t,
1495                              timeout_list);
1496         qp->count -= count;
1497         total += count;
1498     }
1499     if (!total)
1500         return;
1501
1502     AP_DEBUG_ASSERT(*q->total >= total);
1503     *q->total -= total;
1504     apr_thread_mutex_unlock(timeout_mutex);
1505     first = APR_RING_FIRST(&trash);
1506     do {
1507         cs = APR_RING_NEXT(first, timeout_list);
1508         TO_QUEUE_ELEM_INIT(first);
1509         func(first);
1510         first = cs;
1511     } while (--total);
1512     apr_thread_mutex_lock(timeout_mutex);
1513 }
1514
1515 static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
1516 {
1517     timer_event_t *ep;
1518     timer_event_t *te;
1519     apr_status_t rc;
1520     proc_info *ti = dummy;
1521     int process_slot = ti->pid;
1522     apr_pool_t *tpool = apr_thread_pool_get(thd);
1523     void *csd = NULL;
1524     apr_pool_t *ptrans;         /* Pool for per-transaction stuff */
1525     ap_listen_rec *lr;
1526     int have_idle_worker = 0;
1527     const apr_pollfd_t *out_pfd;
1528     apr_int32_t num = 0;
1529     apr_interval_time_t timeout_interval;
1530     apr_time_t timeout_time = 0, now, last_log;
1531     listener_poll_type *pt;
1532     int closed = 0, listeners_disabled = 0;
1533
1534     last_log = apr_time_now();
1535     free(ti);
1536
1537     /* the following times out events that are really close in the future
1538      *   to prevent extra poll calls
1539      *
1540      * current value is .1 second
1541      */
1542 #define TIMEOUT_FUDGE_FACTOR 100000
1543 #define EVENT_FUDGE_FACTOR 10000
1544
1545     rc = init_pollset(tpool);
1546     if (rc != APR_SUCCESS) {
1547         ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
1548                      "failed to initialize pollset, "
1549                      "attempting to shutdown process gracefully");
1550         signal_threads(ST_GRACEFUL);
1551         return NULL;
1552     }
1553
1554     /* Unblock the signal used to wake this thread up, and set a handler for
1555      * it.
1556      */
1557     unblock_signal(LISTENER_SIGNAL);
1558     apr_signal(LISTENER_SIGNAL, dummy_signal_handler);
1559
1560     for (;;) {
1561         int workers_were_busy = 0;
1562         if (listener_may_exit) {
1563             close_listeners(process_slot, &closed);
1564             if (terminate_mode == ST_UNGRACEFUL
1565                 || apr_atomic_read32(&connection_count) == 0)
1566                 break;
1567         }
1568
1569         if (conns_this_child <= 0)
1570             check_infinite_requests();
1571
1572         now = apr_time_now();
1573         if (APLOGtrace6(ap_server_conf)) {
1574             /* trace log status every second */
1575             if (now - last_log > apr_time_from_msec(1000)) {
1576                 last_log = now;
1577                 apr_thread_mutex_lock(timeout_mutex);
1578                 ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf,
1579                              "connections: %u (clogged: %u write-completion: %d "
1580                              "keep-alive: %d lingering: %d suspended: %u)",
1581                              apr_atomic_read32(&connection_count),
1582                              apr_atomic_read32(&clogged_count),
1583                              *write_completion_q->total,
1584                              *keepalive_q->total,
1585                              apr_atomic_read32(&lingering_count),
1586                              apr_atomic_read32(&suspended_count));
1587                 apr_thread_mutex_unlock(timeout_mutex);
1588             }
1589         }
1590
1591         apr_thread_mutex_lock(g_timer_skiplist_mtx);
1592         te = apr_skiplist_peek(timer_skiplist);
1593         if (te) {
1594             if (te->when > now) {
1595                 timeout_interval = te->when - now;
1596             }
1597             else {
1598                 timeout_interval = 1;
1599             }
1600         }
1601         else {
1602             timeout_interval = apr_time_from_msec(100);
1603         }
1604         apr_thread_mutex_unlock(g_timer_skiplist_mtx);
1605
1606         rc = apr_pollset_poll(event_pollset, timeout_interval, &num, &out_pfd);
1607         if (rc != APR_SUCCESS) {
1608             if (APR_STATUS_IS_EINTR(rc)) {
1609                 continue;
1610             }
1611             if (!APR_STATUS_IS_TIMEUP(rc)) {
1612                 ap_log_error(APLOG_MARK, APLOG_CRIT, rc, ap_server_conf,
1613                              "apr_pollset_poll failed.  Attempting to "
1614                              "shutdown process gracefully");
1615                 signal_threads(ST_GRACEFUL);
1616             }
1617         }
1618
1619         if (listener_may_exit) {
1620             close_listeners(process_slot, &closed);
1621             if (terminate_mode == ST_UNGRACEFUL
1622                 || apr_atomic_read32(&connection_count) == 0)
1623                 break;
1624         }
1625
1626         now = apr_time_now();
1627         apr_thread_mutex_lock(g_timer_skiplist_mtx);
1628         ep = apr_skiplist_peek(timer_skiplist);
1629         while (ep) {
1630             if (ep->when < now + EVENT_FUDGE_FACTOR) {
1631                 apr_skiplist_pop(timer_skiplist, NULL);
1632                 push_timer2worker(ep);
1633             }
1634             else {
1635                 break;
1636             }
1637             ep = apr_skiplist_peek(timer_skiplist);
1638         }
1639         apr_thread_mutex_unlock(g_timer_skiplist_mtx);
1640
1641         while (num) {
1642             pt = (listener_poll_type *) out_pfd->client_data;
1643             if (pt->type == PT_CSD) {
1644                 /* one of the sockets is readable */
1645                 event_conn_state_t *cs = (event_conn_state_t *) pt->baton;
1646                 struct timeout_queue *remove_from_q = cs->sc->wc_q;
1647                 int blocking = 1;
1648
1649                 switch (cs->pub.state) {
1650                 case CONN_STATE_CHECK_REQUEST_LINE_READABLE:
1651                     cs->pub.state = CONN_STATE_READ_REQUEST_LINE;
1652                     remove_from_q = cs->sc->ka_q;
1653                     /* don't wait for a worker for a keepalive request */
1654                     blocking = 0;
1655                     /* FALL THROUGH */
1656                 case CONN_STATE_WRITE_COMPLETION:
1657                     get_worker(&have_idle_worker, blocking,
1658                                &workers_were_busy);
1659                     apr_thread_mutex_lock(timeout_mutex);
1660                     TO_QUEUE_REMOVE(remove_from_q, cs);
1661                     rc = apr_pollset_remove(event_pollset, &cs->pfd);
1662                     apr_thread_mutex_unlock(timeout_mutex);
1663
1664                     /*
1665                      * Some of the pollset backends, like KQueue or Epoll
1666                      * automagically remove the FD if the socket is closed,
1667                      * therefore, we can accept _SUCCESS or _NOTFOUND,
1668                      * and we still want to keep going
1669                      */
1670                     if (rc != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rc)) {
1671                         ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
1672                                      APLOGNO(03094) "pollset remove failed");
1673                         start_lingering_close_nonblocking(cs);
1674                         break;
1675                     }
1676
1677                     TO_QUEUE_ELEM_INIT(cs);
1678                     /* If we didn't get a worker immediately for a keep-alive
1679                      * request, we close the connection, so that the client can
1680                      * re-connect to a different process.
1681                      */
1682                     if (!have_idle_worker) {
1683                         start_lingering_close_nonblocking(cs);
1684                         break;
1685                     }
1686                     rc = push2worker(out_pfd, event_pollset);
1687                     if (rc != APR_SUCCESS) {
1688                         ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1689                                      ap_server_conf, APLOGNO(03095)
1690                                      "push2worker failed");
1691                     }
1692                     else {
1693                         have_idle_worker = 0;
1694                     }
1695                     break;
1696                 case CONN_STATE_LINGER_NORMAL:
1697                 case CONN_STATE_LINGER_SHORT:
1698                     process_lingering_close(cs, out_pfd);
1699                     break;
1700                 default:
1701                     ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1702                                  ap_server_conf, APLOGNO(03096)
1703                                  "event_loop: unexpected state %d",
1704                                  cs->pub.state);
1705                     ap_assert(0);
1706                 }
1707             }
1708             else if (pt->type == PT_ACCEPT) {
1709                 /* A Listener Socket is ready for an accept() */
1710                 if (workers_were_busy) {
1711                     if (!listeners_disabled)
1712                         disable_listensocks(process_slot);
1713                     listeners_disabled = 1;
1714                     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
1715                                  "All workers busy, not accepting new conns "
1716                                  "in this process");
1717                 }
1718                 else if (  (int)apr_atomic_read32(&connection_count)
1719                            - (int)apr_atomic_read32(&lingering_count)
1720                          > threads_per_child
1721                            + ap_queue_info_get_idlers(worker_queue_info) *
1722                              worker_factor / WORKER_FACTOR_SCALE)
1723                 {
1724                     if (!listeners_disabled)
1725                         disable_listensocks(process_slot);
1726                     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
1727                                  "Too many open connections (%u), "
1728                                  "not accepting new conns in this process",
1729                                  apr_atomic_read32(&connection_count));
1730                     ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
1731                                  "Idle workers: %u",
1732                                  ap_queue_info_get_idlers(worker_queue_info));
1733                     listeners_disabled = 1;
1734                 }
1735                 else if (listeners_disabled) {
1736                     listeners_disabled = 0;
1737                     enable_listensocks(process_slot);
1738                 }
1739                 if (!listeners_disabled) {
1740                     lr = (ap_listen_rec *) pt->baton;
1741                     ap_pop_pool(&ptrans, worker_queue_info);
1742
1743                     if (ptrans == NULL) {
1744                         /* create a new transaction pool for each accepted socket */
1745                         apr_allocator_t *allocator;
1746
1747                         apr_allocator_create(&allocator);
1748                         apr_allocator_max_free_set(allocator,
1749                                                    ap_max_mem_free);
1750                         apr_pool_create_ex(&ptrans, pconf, NULL, allocator);
1751                         apr_allocator_owner_set(allocator, ptrans);
1752                         if (ptrans == NULL) {
1753                             ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1754                                          ap_server_conf, APLOGNO(03097)
1755                                          "Failed to create transaction pool");
1756                             signal_threads(ST_GRACEFUL);
1757                             return NULL;
1758                         }
1759                     }
1760                     apr_pool_tag(ptrans, "transaction");
1761
1762                     get_worker(&have_idle_worker, 1, &workers_were_busy);
1763                     rc = lr->accept_func(&csd, lr, ptrans);
1764
1765                     /* later we trash rv and rely on csd to indicate
1766                      * success/failure
1767                      */
1768                     AP_DEBUG_ASSERT(rc == APR_SUCCESS || !csd);
1769
1770                     if (rc == APR_EGENERAL) {
1771                         /* E[NM]FILE, ENOMEM, etc */
1772                         resource_shortage = 1;
1773                         signal_threads(ST_GRACEFUL);
1774                     }
1775
1776                     if (csd != NULL) {
1777                         conns_this_child--;
1778                         rc = ap_queue_push(worker_queue, csd, NULL, ptrans);
1779                         if (rc != APR_SUCCESS) {
1780                             /* trash the connection; we couldn't queue the connected
1781                              * socket to a worker
1782                              */
1783                             apr_socket_close(csd);
1784                             ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1785                                          ap_server_conf, APLOGNO(03098)
1786                                          "ap_queue_push failed");
1787                             ap_push_pool(worker_queue_info, ptrans);
1788                         }
1789                         else {
1790                             have_idle_worker = 0;
1791                         }
1792                     }
1793                     else {
1794                         ap_push_pool(worker_queue_info, ptrans);
1795                     }
1796                 }
1797             }               /* if:else on pt->type */
1798             out_pfd++;
1799             num--;
1800         }                   /* while for processing poll */
1801
1802         /* XXX possible optimization: stash the current time for use as
1803          * r->request_time for new requests
1804          */
1805         now = apr_time_now();
1806         /* We only do this once per 0.1s (TIMEOUT_FUDGE_FACTOR), or on a clock
1807          * skew (if the system time is set back in the meantime, timeout_time
1808          * will exceed now + TIMEOUT_FUDGE_FACTOR, can't happen otherwise).
1809          */
1810         if (now > timeout_time || now + TIMEOUT_FUDGE_FACTOR < timeout_time ) {
1811             struct process_score *ps;
1812             timeout_time = now + TIMEOUT_FUDGE_FACTOR;
1813
1814             /* handle timed out sockets */
1815             apr_thread_mutex_lock(timeout_mutex);
1816
1817             /* Step 1: keepalive timeouts */
1818             /* If all workers are busy, we kill older keep-alive connections so that they
1819              * may connect to another process.
1820              */
1821             if (workers_were_busy && *keepalive_q->total) {
1822                 ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
1823                              "All workers are busy, will close %d keep-alive "
1824                              "connections",
1825                              *keepalive_q->total);
1826                 process_timeout_queue(keepalive_q, 0,
1827                                       start_lingering_close_nonblocking);
1828             }
1829             else {
1830                 process_timeout_queue(keepalive_q, timeout_time,
1831                                       start_lingering_close_nonblocking);
1832             }
1833             /* Step 2: write completion timeouts */
1834             process_timeout_queue(write_completion_q, timeout_time,
1835                                   start_lingering_close_nonblocking);
1836             /* Step 3: (normal) lingering close completion timeouts */
1837             process_timeout_queue(linger_q, timeout_time, stop_lingering_close);
1838             /* Step 4: (short) lingering close completion timeouts */
1839             process_timeout_queue(short_linger_q, timeout_time, stop_lingering_close);
1840
1841             ps = ap_get_scoreboard_process(process_slot);
1842             ps->write_completion = *write_completion_q->total;
1843             ps->keep_alive = *keepalive_q->total;
1844             apr_thread_mutex_unlock(timeout_mutex);
1845
1846             ps->connections = apr_atomic_read32(&connection_count);
1847             ps->suspended = apr_atomic_read32(&suspended_count);
1848             ps->lingering_close = apr_atomic_read32(&lingering_count);
1849         }
1850         if (listeners_disabled && !workers_were_busy
1851             && (int)apr_atomic_read32(&connection_count)
1852                - (int)apr_atomic_read32(&lingering_count)
1853                < ((int)ap_queue_info_get_idlers(worker_queue_info) - 1)
1854                  * worker_factor / WORKER_FACTOR_SCALE + threads_per_child)
1855         {
1856             listeners_disabled = 0;
1857             enable_listensocks(process_slot);
1858         }
1859         /*
1860          * XXX: do we need to set some timeout that re-enables the listensocks
1861          * XXX: in case no other event occurs?
1862          */
1863     }     /* listener main loop */
1864
1865     close_listeners(process_slot, &closed);
1866     ap_queue_term(worker_queue);
1867
1868     apr_thread_exit(thd, APR_SUCCESS);
1869     return NULL;
1870 }
1871
1872 /* XXX For ungraceful termination/restart, we definitely don't want to
1873  *     wait for active connections to finish but we may want to wait
1874  *     for idle workers to get out of the queue code and release mutexes,
1875  *     since those mutexes are cleaned up pretty soon and some systems
1876  *     may not react favorably (i.e., segfault) if operations are attempted
1877  *     on cleaned-up mutexes.
1878  */
1879 static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy)
1880 {
1881     proc_info *ti = dummy;
1882     int process_slot = ti->pid;
1883     int thread_slot = ti->tid;
1884     apr_socket_t *csd = NULL;
1885     event_conn_state_t *cs;
1886     apr_pool_t *ptrans;         /* Pool for per-transaction stuff */
1887     apr_status_t rv;
1888     int is_idle = 0;
1889     timer_event_t *te = NULL;
1890
1891     free(ti);
1892
1893     ap_scoreboard_image->servers[process_slot][thread_slot].pid = ap_my_pid;
1894     ap_scoreboard_image->servers[process_slot][thread_slot].tid = apr_os_thread_current();
1895     ap_scoreboard_image->servers[process_slot][thread_slot].generation = retained->my_generation;
1896     ap_update_child_status_from_indexes(process_slot, thread_slot,
1897                                         SERVER_STARTING, NULL);
1898
1899     while (!workers_may_exit) {
1900         if (!is_idle) {
1901             rv = ap_queue_info_set_idle(worker_queue_info, NULL);
1902             if (rv != APR_SUCCESS) {
1903                 ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf,
1904                              "ap_queue_info_set_idle failed. Attempting to "
1905                              "shutdown process gracefully.");
1906                 signal_threads(ST_GRACEFUL);
1907                 break;
1908             }
1909             is_idle = 1;
1910         }
1911
1912         ap_update_child_status_from_indexes(process_slot, thread_slot,
1913                                             dying ? SERVER_GRACEFUL : SERVER_READY, NULL);
1914       worker_pop:
1915         if (workers_may_exit) {
1916             break;
1917         }
1918
1919         te = NULL;
1920         rv = ap_queue_pop_something(worker_queue, &csd, &cs, &ptrans, &te);
1921
1922         if (rv != APR_SUCCESS) {
1923             /* We get APR_EOF during a graceful shutdown once all the
1924              * connections accepted by this server process have been handled.
1925              */
1926             if (APR_STATUS_IS_EOF(rv)) {
1927                 break;
1928             }
1929             /* We get APR_EINTR whenever ap_queue_pop() has been interrupted
1930              * from an explicit call to ap_queue_interrupt_all(). This allows
1931              * us to unblock threads stuck in ap_queue_pop() when a shutdown
1932              * is pending.
1933              *
1934              * If workers_may_exit is set and this is ungraceful termination/
1935              * restart, we are bound to get an error on some systems (e.g.,
1936              * AIX, which sanity-checks mutex operations) since the queue
1937              * may have already been cleaned up.  Don't log the "error" if
1938              * workers_may_exit is set.
1939              */
1940             else if (APR_STATUS_IS_EINTR(rv)) {
1941                 goto worker_pop;
1942             }
1943             /* We got some other error. */
1944             else if (!workers_may_exit) {
1945                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
1946                              APLOGNO(03099) "ap_queue_pop failed");
1947             }
1948             continue;
1949         }
1950         if (te != NULL) {
1951             te->cbfunc(te->baton);
1952
1953             {
1954                 apr_thread_mutex_lock(g_timer_skiplist_mtx);
1955                 APR_RING_INSERT_TAIL(&timer_free_ring, te, timer_event_t, link);
1956                 apr_thread_mutex_unlock(g_timer_skiplist_mtx);
1957             }
1958         }
1959         else {
1960             is_idle = 0;
1961             worker_sockets[thread_slot] = csd;
1962             process_socket(thd, ptrans, csd, cs, process_slot, thread_slot);
1963             worker_sockets[thread_slot] = NULL;
1964         }
1965     }
1966
1967     ap_update_child_status_from_indexes(process_slot, thread_slot,
1968                                         dying ? SERVER_DEAD :
1969                                         SERVER_GRACEFUL,
1970                                         (request_rec *) NULL);
1971
1972     apr_thread_exit(thd, APR_SUCCESS);
1973     return NULL;
1974 }
1975
1976 static int check_signal(int signum)
1977 {
1978     switch (signum) {
1979     case SIGTERM:
1980     case SIGINT:
1981         return 1;
1982     }
1983     return 0;
1984 }
1985
1986
1987
1988 static void create_listener_thread(thread_starter * ts)
1989 {
1990     int my_child_num = ts->child_num_arg;
1991     apr_threadattr_t *thread_attr = ts->threadattr;
1992     proc_info *my_info;
1993     apr_status_t rv;
1994
1995     my_info = (proc_info *) ap_malloc(sizeof(proc_info));
1996     my_info->pid = my_child_num;
1997     my_info->tid = -1;          /* listener thread doesn't have a thread slot */
1998     my_info->sd = 0;
1999     rv = apr_thread_create(&ts->listener, thread_attr, listener_thread,
2000                            my_info, pchild);
2001     if (rv != APR_SUCCESS) {
2002         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf, APLOGNO(00474)
2003                      "apr_thread_create: unable to create listener thread");
2004         /* let the parent decide how bad this really is */
2005         clean_child_exit(APEXIT_CHILDSICK);
2006     }
2007     apr_os_thread_get(&listener_os_thread, ts->listener);
2008 }
2009
2010 /* XXX under some circumstances not understood, children can get stuck
2011  *     in start_threads forever trying to take over slots which will
2012  *     never be cleaned up; for now there is an APLOG_DEBUG message issued
2013  *     every so often when this condition occurs
2014  */
2015 static void *APR_THREAD_FUNC start_threads(apr_thread_t * thd, void *dummy)
2016 {
2017     thread_starter *ts = dummy;
2018     apr_thread_t **threads = ts->threads;
2019     apr_threadattr_t *thread_attr = ts->threadattr;
2020     int child_num_arg = ts->child_num_arg;
2021     int my_child_num = child_num_arg;
2022     proc_info *my_info;
2023     apr_status_t rv;
2024     int i;
2025     int threads_created = 0;
2026     int listener_started = 0;
2027     int loops;
2028     int prev_threads_created;
2029     int max_recycled_pools = -1;
2030     int good_methods[] = {APR_POLLSET_KQUEUE, APR_POLLSET_PORT, APR_POLLSET_EPOLL};
2031
2032     /* We must create the fd queues before we start up the listener
2033      * and worker threads. */
2034     worker_queue = apr_pcalloc(pchild, sizeof(*worker_queue));
2035     rv = ap_queue_init(worker_queue, threads_per_child, pchild);
2036     if (rv != APR_SUCCESS) {
2037         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf, APLOGNO(03100)
2038                      "ap_queue_init() failed");
2039         clean_child_exit(APEXIT_CHILDFATAL);
2040     }
2041
2042     if (ap_max_mem_free != APR_ALLOCATOR_MAX_FREE_UNLIMITED) {
2043         /* If we want to conserve memory, let's not keep an unlimited number of
2044          * pools & allocators.
2045          * XXX: This should probably be a separate config directive
2046          */
2047         max_recycled_pools = threads_per_child * 3 / 4 ;
2048     }
2049     rv = ap_queue_info_create(&worker_queue_info, pchild,
2050                               threads_per_child, max_recycled_pools);
2051     if (rv != APR_SUCCESS) {
2052         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf, APLOGNO(03101)
2053                      "ap_queue_info_create() failed");
2054         clean_child_exit(APEXIT_CHILDFATAL);
2055     }
2056
2057     /* Create the timeout mutex and main pollset before the listener
2058      * thread starts.
2059      */
2060     rv = apr_thread_mutex_create(&timeout_mutex, APR_THREAD_MUTEX_DEFAULT,
2061                                  pchild);
2062     if (rv != APR_SUCCESS) {
2063         ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(03102)
2064                      "creation of the timeout mutex failed.");
2065         clean_child_exit(APEXIT_CHILDFATAL);
2066     }
2067
2068     /* Create the main pollset */
2069     for (i = 0; i < sizeof(good_methods) / sizeof(void*); i++) {
2070         rv = apr_pollset_create_ex(&event_pollset,
2071                             threads_per_child*2, /* XXX don't we need more, to handle
2072                                                 * connections in K-A or lingering
2073                                                 * close?
2074                                                 */
2075                             pchild, APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY | APR_POLLSET_NODEFAULT,
2076                             good_methods[i]);
2077         if (rv == APR_SUCCESS) {
2078             break;
2079         }
2080     }
2081     if (rv != APR_SUCCESS) {
2082         rv = apr_pollset_create(&event_pollset,
2083                                threads_per_child*2, /* XXX don't we need more, to handle
2084                                                      * connections in K-A or lingering
2085                                                      * close?
2086                                                      */
2087                                pchild, APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY);
2088     }
2089     if (rv != APR_SUCCESS) {
2090         ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(03103)
2091                      "apr_pollset_create with Thread Safety failed.");
2092         clean_child_exit(APEXIT_CHILDFATAL);
2093     }
2094
2095     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(02471)
2096                  "start_threads: Using %s", apr_pollset_method_name(event_pollset));
2097     worker_sockets = apr_pcalloc(pchild, threads_per_child
2098                                  * sizeof(apr_socket_t *));
2099
2100     loops = prev_threads_created = 0;
2101     while (1) {
2102         /* threads_per_child does not include the listener thread */
2103         for (i = 0; i < threads_per_child; i++) {
2104             int status =
2105                 ap_scoreboard_image->servers[child_num_arg][i].status;
2106
2107             if (status != SERVER_GRACEFUL && status != SERVER_DEAD) {
2108                 continue;
2109             }
2110
2111             my_info = (proc_info *) ap_malloc(sizeof(proc_info));
2112             my_info->pid = my_child_num;
2113             my_info->tid = i;
2114             my_info->sd = 0;
2115
2116             /* We are creating threads right now */
2117             ap_update_child_status_from_indexes(my_child_num, i,
2118                                                 SERVER_STARTING, NULL);
2119             /* We let each thread update its own scoreboard entry.  This is
2120              * done because it lets us deal with tid better.
2121              */
2122             rv = apr_thread_create(&threads[i], thread_attr,
2123                                    worker_thread, my_info, pchild);
2124             if (rv != APR_SUCCESS) {
2125                 ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
2126                              APLOGNO(03104)
2127                              "apr_thread_create: unable to create worker thread");
2128                 /* let the parent decide how bad this really is */
2129                 clean_child_exit(APEXIT_CHILDSICK);
2130             }
2131             threads_created++;
2132         }
2133
2134         /* Start the listener only when there are workers available */
2135         if (!listener_started && threads_created) {
2136             create_listener_thread(ts);
2137             listener_started = 1;
2138         }
2139
2140
2141         if (start_thread_may_exit || threads_created == threads_per_child) {
2142             break;
2143         }
2144         /* wait for previous generation to clean up an entry */
2145         apr_sleep(apr_time_from_sec(1));
2146         ++loops;
2147         if (loops % 120 == 0) { /* every couple of minutes */
2148             if (prev_threads_created == threads_created) {
2149                 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
2150                              "child %" APR_PID_T_FMT " isn't taking over "
2151                              "slots very quickly (%d of %d)",
2152                              ap_my_pid, threads_created,
2153                              threads_per_child);
2154             }
2155             prev_threads_created = threads_created;
2156         }
2157     }
2158
2159     /* What state should this child_main process be listed as in the
2160      * scoreboard...?
2161      *  ap_update_child_status_from_indexes(my_child_num, i, SERVER_STARTING,
2162      *                                      (request_rec *) NULL);
2163      *
2164      *  This state should be listed separately in the scoreboard, in some kind
2165      *  of process_status, not mixed in with the worker threads' status.
2166      *  "life_status" is almost right, but it's in the worker's structure, and
2167      *  the name could be clearer.   gla
2168      */
2169     apr_thread_exit(thd, APR_SUCCESS);
2170     return NULL;
2171 }
2172
2173 static void join_workers(apr_thread_t * listener, apr_thread_t ** threads)
2174 {
2175     int i;
2176     apr_status_t rv, thread_rv;
2177
2178     if (listener) {
2179         int iter;
2180
2181         /* deal with a rare timing window which affects waking up the
2182          * listener thread...  if the signal sent to the listener thread
2183          * is delivered between the time it verifies that the
2184          * listener_may_exit flag is clear and the time it enters a
2185          * blocking syscall, the signal didn't do any good...  work around
2186          * that by sleeping briefly and sending it again
2187          */
2188
2189         iter = 0;
2190         while (iter < 10 && !dying) {
2191             /* listener has not stopped accepting yet */
2192             apr_sleep(apr_time_make(0, 500000));
2193             wakeup_listener();
2194             ++iter;
2195         }
2196         if (iter >= 10) {
2197             ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00475)
2198                          "the listener thread didn't stop accepting");
2199         }
2200         else {
2201             rv = apr_thread_join(&thread_rv, listener);
2202             if (rv != APR_SUCCESS) {
2203                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf, APLOGNO(00476)
2204                              "apr_thread_join: unable to join listener thread");
2205             }
2206         }
2207     }
2208
2209     for (i = 0; i < threads_per_child; i++) {
2210         if (threads[i]) {       /* if we ever created this thread */
2211             rv = apr_thread_join(&thread_rv, threads[i]);
2212             if (rv != APR_SUCCESS) {
2213                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf, APLOGNO(00477)
2214                              "apr_thread_join: unable to join worker "
2215                              "thread %d", i);
2216             }
2217         }
2218     }
2219 }
2220
2221 static void join_start_thread(apr_thread_t * start_thread_id)
2222 {
2223     apr_status_t rv, thread_rv;
2224
2225     start_thread_may_exit = 1;  /* tell it to give up in case it is still
2226                                  * trying to take over slots from a
2227                                  * previous generation
2228                                  */
2229     rv = apr_thread_join(&thread_rv, start_thread_id);
2230     if (rv != APR_SUCCESS) {
2231         ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf, APLOGNO(00478)
2232                      "apr_thread_join: unable to join the start " "thread");
2233     }
2234 }
2235
2236 static void child_main(int child_num_arg, int child_bucket)
2237 {
2238     apr_thread_t **threads;
2239     apr_status_t rv;
2240     thread_starter *ts;
2241     apr_threadattr_t *thread_attr;
2242     apr_thread_t *start_thread_id;
2243     int i;
2244
2245     mpm_state = AP_MPMQ_STARTING;       /* for benefit of any hooks that run as this
2246                                          * child initializes
2247                                          */
2248     ap_my_pid = getpid();
2249     ap_fatal_signal_child_setup(ap_server_conf);
2250     apr_pool_create(&pchild, pconf);
2251
2252     /* close unused listeners and pods */
2253     for (i = 0; i < retained->num_buckets; i++) {
2254         if (i != child_bucket) {
2255             ap_close_listeners_ex(all_buckets[i].listeners);
2256             ap_mpm_podx_close(all_buckets[i].pod);
2257         }
2258     }
2259
2260     /*stuff to do before we switch id's, so we have permissions. */
2261     ap_reopen_scoreboard(pchild, NULL, 0);
2262
2263     if (ap_run_drop_privileges(pchild, ap_server_conf)) {
2264         clean_child_exit(APEXIT_CHILDFATAL);
2265     }
2266
2267     apr_thread_mutex_create(&g_timer_skiplist_mtx, APR_THREAD_MUTEX_DEFAULT, pchild);
2268     APR_RING_INIT(&timer_free_ring, timer_event_t, link);
2269     apr_skiplist_init(&timer_skiplist, pchild);
2270     apr_skiplist_set_compare(timer_skiplist, timer_comp, timer_comp);
2271     ap_run_child_init(pchild, ap_server_conf);
2272
2273     /* done with init critical section */
2274
2275     /* Just use the standard apr_setup_signal_thread to block all signals
2276      * from being received.  The child processes no longer use signals for
2277      * any communication with the parent process.
2278      */
2279     rv = apr_setup_signal_thread();
2280     if (rv != APR_SUCCESS) {
2281         ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf, APLOGNO(00479)
2282                      "Couldn't initialize signal thread");
2283         clean_child_exit(APEXIT_CHILDFATAL);
2284     }
2285
2286     if (ap_max_requests_per_child) {
2287         conns_this_child = ap_max_requests_per_child;
2288     }
2289     else {
2290         /* coding a value of zero means infinity */
2291         conns_this_child = APR_INT32_MAX;
2292     }
2293
2294     /* Setup worker threads */
2295
2296     /* clear the storage; we may not create all our threads immediately,
2297      * and we want a 0 entry to indicate a thread which was not created
2298      */
2299     threads = ap_calloc(threads_per_child, sizeof(apr_thread_t *));
2300     ts = apr_palloc(pchild, sizeof(*ts));
2301
2302     apr_threadattr_create(&thread_attr, pchild);
2303     /* 0 means PTHREAD_CREATE_JOINABLE */
2304     apr_threadattr_detach_set(thread_attr, 0);
2305
2306     if (ap_thread_stacksize != 0) {
2307         rv = apr_threadattr_stacksize_set(thread_attr, ap_thread_stacksize);
2308         if (rv != APR_SUCCESS && rv != APR_ENOTIMPL) {
2309             ap_log_error(APLOG_MARK, APLOG_WARNING, rv, ap_server_conf, APLOGNO(02436)
2310                          "WARNING: ThreadStackSize of %" APR_SIZE_T_FMT " is "
2311                          "inappropriate, using default",
2312                          ap_thread_stacksize);
2313         }
2314     }
2315
2316     ts->threads = threads;
2317     ts->listener = NULL;
2318     ts->child_num_arg = child_num_arg;
2319     ts->threadattr = thread_attr;
2320
2321     rv = apr_thread_create(&start_thread_id, thread_attr, start_threads,
2322                            ts, pchild);
2323     if (rv != APR_SUCCESS) {
2324         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf, APLOGNO(00480)
2325                      "apr_thread_create: unable to create worker thread");
2326         /* let the parent decide how bad this really is */
2327         clean_child_exit(APEXIT_CHILDSICK);
2328     }
2329
2330     mpm_state = AP_MPMQ_RUNNING;
2331
2332     /* If we are only running in one_process mode, we will want to
2333      * still handle signals. */
2334     if (one_process) {
2335         /* Block until we get a terminating signal. */
2336         apr_signal_thread(check_signal);
2337         /* make sure the start thread has finished; signal_threads()
2338          * and join_workers() depend on that
2339          */
2340         /* XXX join_start_thread() won't be awakened if one of our
2341          *     threads encounters a critical error and attempts to
2342          *     shutdown this child
2343          */
2344         join_start_thread(start_thread_id);
2345
2346         /* helps us terminate a little more quickly than the dispatch of the
2347          * signal thread; beats the Pipe of Death and the browsers
2348          */
2349         signal_threads(ST_UNGRACEFUL);
2350
2351         /* A terminating signal was received. Now join each of the
2352          * workers to clean them up.
2353          *   If the worker already exited, then the join frees
2354          *   their resources and returns.
2355          *   If the worker hasn't exited, then this blocks until
2356          *   they have (then cleans up).
2357          */
2358         join_workers(ts->listener, threads);
2359     }
2360     else {                      /* !one_process */
2361         /* remove SIGTERM from the set of blocked signals...  if one of
2362          * the other threads in the process needs to take us down
2363          * (e.g., for MaxConnectionsPerChild) it will send us SIGTERM
2364          */
2365         unblock_signal(SIGTERM);
2366         apr_signal(SIGTERM, dummy_signal_handler);
2367         /* Watch for any messages from the parent over the POD */
2368         while (1) {
2369             rv = ap_mpm_podx_check(my_bucket->pod);
2370             if (rv == AP_MPM_PODX_NORESTART) {
2371                 /* see if termination was triggered while we slept */
2372                 switch (terminate_mode) {
2373                 case ST_GRACEFUL:
2374                     rv = AP_MPM_PODX_GRACEFUL;
2375                     break;
2376                 case ST_UNGRACEFUL:
2377                     rv = AP_MPM_PODX_RESTART;
2378                     break;
2379                 }
2380             }
2381             if (rv == AP_MPM_PODX_GRACEFUL || rv == AP_MPM_PODX_RESTART) {
2382                 /* make sure the start thread has finished;
2383                  * signal_threads() and join_workers depend on that
2384                  */
2385                 join_start_thread(start_thread_id);
2386                 signal_threads(rv ==
2387                                AP_MPM_PODX_GRACEFUL ? ST_GRACEFUL : ST_UNGRACEFUL);
2388                 break;
2389             }
2390         }
2391
2392         /* A terminating signal was received. Now join each of the
2393          * workers to clean them up.
2394          *   If the worker already exited, then the join frees
2395          *   their resources and returns.
2396          *   If the worker hasn't exited, then this blocks until
2397          *   they have (then cleans up).
2398          */
2399         join_workers(ts->listener, threads);
2400     }
2401
2402     free(threads);
2403
2404     clean_child_exit(resource_shortage ? APEXIT_CHILDSICK : 0);
2405 }
2406
2407 static int make_child(server_rec * s, int slot, int bucket)
2408 {
2409     int pid;
2410
2411     if (slot + 1 > retained->max_daemons_limit) {
2412         retained->max_daemons_limit = slot + 1;
2413     }
2414
2415     if (one_process) {
2416         my_bucket = &all_buckets[0];
2417
2418         set_signals();
2419         event_note_child_started(slot, getpid());
2420         child_main(slot, 0);
2421         /* NOTREACHED */
2422         ap_assert(0);
2423         return -1;
2424     }
2425
2426     if ((pid = fork()) == -1) {
2427         ap_log_error(APLOG_MARK, APLOG_ERR, errno, s, APLOGNO(00481)
2428                      "fork: Unable to fork new process");
2429
2430         /* fork didn't succeed.  There's no need to touch the scoreboard;
2431          * if we were trying to replace a failed child process, then
2432          * server_main_loop() marked its workers SERVER_DEAD, and if
2433          * we were trying to replace a child process that exited normally,
2434          * its worker_thread()s left SERVER_DEAD or SERVER_GRACEFUL behind.
2435          */
2436
2437         /* In case system resources are maxxed out, we don't want
2438            Apache running away with the CPU trying to fork over and
2439            over and over again. */
2440         apr_sleep(apr_time_from_sec(10));
2441
2442         return -1;
2443     }
2444
2445     if (!pid) {
2446         my_bucket = &all_buckets[bucket];
2447
2448 #ifdef HAVE_BINDPROCESSOR
2449         /* By default, AIX binds to a single processor.  This bit unbinds
2450          * children which will then bind to another CPU.
2451          */
2452         int status = bindprocessor(BINDPROCESS, (int) getpid(),
2453                                    PROCESSOR_CLASS_ANY);
2454         if (status != OK)
2455             ap_log_error(APLOG_MARK, APLOG_DEBUG, errno,
2456                          ap_server_conf, APLOGNO(00482)
2457                          "processor unbind failed");
2458 #endif
2459         RAISE_SIGSTOP(MAKE_CHILD);
2460
2461         apr_signal(SIGTERM, just_die);
2462         child_main(slot, bucket);
2463         /* NOTREACHED */
2464         ap_assert(0);
2465         return -1;
2466     }
2467
2468     if (ap_scoreboard_image->parent[slot].pid != 0) {
2469         /* This new child process is squatting on the scoreboard
2470          * entry owned by an exiting child process, which cannot
2471          * exit until all active requests complete.
2472          */
2473         event_note_child_lost_slot(slot, pid);
2474     }
2475     ap_scoreboard_image->parent[slot].quiescing = 0;
2476     ap_scoreboard_image->parent[slot].not_accepting = 0;
2477     ap_scoreboard_image->parent[slot].bucket = bucket;
2478     event_note_child_started(slot, pid);
2479     return 0;
2480 }
2481
2482 /* start up a bunch of children */
2483 static void startup_children(int number_to_start)
2484 {
2485     int i;
2486
2487     for (i = 0; number_to_start && i < ap_daemons_limit; ++i) {
2488         if (ap_scoreboard_image->parent[i].pid != 0) {
2489             continue;
2490         }
2491         if (make_child(ap_server_conf, i, i % retained->num_buckets) < 0) {
2492             break;
2493         }
2494         --number_to_start;
2495     }
2496 }
2497
2498 static void perform_idle_server_maintenance(int child_bucket, int num_buckets)
2499 {
2500     int i, j;
2501     int idle_thread_count;
2502     worker_score *ws;
2503     process_score *ps;
2504     int free_length;
2505     int totally_free_length = 0;
2506     int free_slots[MAX_SPAWN_RATE];
2507     int last_non_dead;
2508     int total_non_dead;
2509     int active_thread_count = 0;
2510
2511     /* initialize the free_list */
2512     free_length = 0;
2513
2514     idle_thread_count = 0;
2515     last_non_dead = -1;
2516     total_non_dead = 0;
2517
2518     for (i = 0; i < ap_daemons_limit; ++i) {
2519         /* Initialization to satisfy the compiler. It doesn't know
2520          * that threads_per_child is always > 0 */
2521         int status = SERVER_DEAD;
2522         int any_dying_threads = 0;
2523         int any_dead_threads = 0;
2524         int all_dead_threads = 1;
2525         int child_threads_active = 0;
2526
2527         if (i >= retained->max_daemons_limit
2528             && totally_free_length == retained->idle_spawn_rate[child_bucket])
2529             /* short cut if all active processes have been examined and
2530              * enough empty scoreboard slots have been found
2531              */
2532
2533             break;
2534         ps = &ap_scoreboard_image->parent[i];
2535         for (j = 0; j < threads_per_child; j++) {
2536             ws = &ap_scoreboard_image->servers[i][j];
2537             status = ws->status;
2538
2539             /* XXX any_dying_threads is probably no longer needed    GLA */
2540             any_dying_threads = any_dying_threads ||
2541                 (status == SERVER_GRACEFUL);
2542             any_dead_threads = any_dead_threads || (status == SERVER_DEAD);
2543             all_dead_threads = all_dead_threads &&
2544                 (status == SERVER_DEAD || status == SERVER_GRACEFUL);
2545
2546             /* We consider a starting server as idle because we started it
2547              * at least a cycle ago, and if it still hasn't finished starting
2548              * then we're just going to swamp things worse by forking more.
2549              * So we hopefully won't need to fork more if we count it.
2550              * This depends on the ordering of SERVER_READY and SERVER_STARTING.
2551              */
2552             if (ps->pid != 0) { /* XXX just set all_dead_threads in outer
2553                                    for loop if no pid?  not much else matters */
2554                 if (status <= SERVER_READY && !ps->quiescing && !ps->not_accepting
2555                     && ps->generation == retained->my_generation
2556                     && ps->bucket == child_bucket)
2557                 {
2558                     ++idle_thread_count;
2559                 }
2560                 if (status >= SERVER_READY && status < SERVER_GRACEFUL) {
2561                     ++child_threads_active;
2562                 }
2563             }
2564         }
2565         active_thread_count += child_threads_active;
2566         if (any_dead_threads
2567             && totally_free_length < retained->idle_spawn_rate[child_bucket]
2568             && free_length < MAX_SPAWN_RATE / num_buckets
2569             && (!ps->pid      /* no process in the slot */
2570                   || ps->quiescing)) {  /* or at least one is going away */
2571             if (all_dead_threads) {
2572                 /* great! we prefer these, because the new process can
2573                  * start more threads sooner.  So prioritize this slot
2574                  * by putting it ahead of any slots with active threads.
2575                  *
2576                  * first, make room by moving a slot that's potentially still
2577                  * in use to the end of the array
2578                  */
2579                 free_slots[free_length] = free_slots[totally_free_length];
2580                 free_slots[totally_free_length++] = i;
2581             }
2582             else {
2583                 /* slot is still in use - back of the bus
2584                  */
2585                 free_slots[free_length] = i;
2586             }
2587             ++free_length;
2588         }
2589         else if (child_threads_active == threads_per_child) {
2590             had_healthy_child = 1;
2591         }
2592         /* XXX if (!ps->quiescing)     is probably more reliable  GLA */
2593         if (!any_dying_threads) {
2594             last_non_dead = i;
2595             ++total_non_dead;
2596         }
2597     }
2598
2599     if (retained->sick_child_detected) {
2600         if (had_healthy_child) {
2601             /* Assume this is a transient error, even though it may not be.  Leave
2602              * the server up in case it is able to serve some requests or the
2603              * problem will be resolved.
2604              */
2605             retained->sick_child_detected = 0;
2606         }
2607         else {
2608             /* looks like a basket case, as no child ever fully initialized; give up.
2609              */
2610             shutdown_pending = 1;
2611             child_fatal = 1;
2612             ap_log_error(APLOG_MARK, APLOG_ALERT, 0,
2613                          ap_server_conf, APLOGNO(02324)
2614                          "A resource shortage or other unrecoverable failure "
2615                          "was encountered before any child process initialized "
2616                          "successfully... httpd is exiting!");
2617             /* the child already logged the failure details */
2618             return;
2619         }
2620     }
2621
2622     retained->max_daemons_limit = last_non_dead + 1;
2623
2624     if (idle_thread_count > max_spare_threads / num_buckets) {
2625         /* Kill off one child */
2626         ap_mpm_podx_signal(all_buckets[child_bucket].pod,
2627                            AP_MPM_PODX_GRACEFUL);
2628         retained->idle_spawn_rate[child_bucket] = 1;
2629     }
2630     else if (idle_thread_count < min_spare_threads / num_buckets) {
2631         /* terminate the free list */
2632         if (free_length == 0) { /* scoreboard is full, can't fork */
2633
2634             if (active_thread_count >= ap_daemons_limit * threads_per_child) {
2635                 if (!retained->maxclients_reported) {
2636                     /* only report this condition once */
2637                     ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(00484)
2638                                  "server reached MaxRequestWorkers setting, "
2639                                  "consider raising the MaxRequestWorkers "
2640                                  "setting");
2641                     retained->maxclients_reported = 1;
2642                 }
2643             }
2644             else {
2645                 ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(00485)
2646                              "scoreboard is full, not at MaxRequestWorkers");
2647             }
2648             retained->idle_spawn_rate[child_bucket] = 1;
2649         }
2650         else {
2651             if (free_length > retained->idle_spawn_rate[child_bucket]) {
2652                 free_length = retained->idle_spawn_rate[child_bucket];
2653             }
2654             if (retained->idle_spawn_rate[child_bucket] >= 8) {
2655                 ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, APLOGNO(00486)
2656                              "server seems busy, (you may need "
2657                              "to increase StartServers, ThreadsPerChild "
2658                              "or Min/MaxSpareThreads), "
2659                              "spawning %d children, there are around %d idle "
2660                              "threads, and %d total children", free_length,
2661                              idle_thread_count, total_non_dead);
2662             }
2663             for (i = 0; i < free_length; ++i) {
2664                 make_child(ap_server_conf, free_slots[i], child_bucket);
2665             }
2666             /* the next time around we want to spawn twice as many if this
2667              * wasn't good enough, but not if we've just done a graceful
2668              */
2669             if (retained->hold_off_on_exponential_spawning) {
2670                 --retained->hold_off_on_exponential_spawning;
2671             }
2672             else if (retained->idle_spawn_rate[child_bucket]
2673                      < MAX_SPAWN_RATE / num_buckets) {
2674                 retained->idle_spawn_rate[child_bucket] *= 2;
2675             }
2676         }
2677     }
2678     else {
2679         retained->idle_spawn_rate[child_bucket] = 1;
2680     }
2681 }
2682
2683 static void server_main_loop(int remaining_children_to_start, int num_buckets)
2684 {
2685     ap_generation_t old_gen;
2686     int child_slot;
2687     apr_exit_why_e exitwhy;
2688     int status, processed_status;
2689     apr_proc_t pid;
2690     int i;
2691
2692     while (!restart_pending && !shutdown_pending) {
2693         ap_wait_or_timeout(&exitwhy, &status, &pid, pconf, ap_server_conf);
2694
2695         if (pid.pid != -1) {
2696             processed_status = ap_process_child_status(&pid, exitwhy, status);
2697             child_slot = ap_find_child_by_pid(&pid);
2698             if (processed_status == APEXIT_CHILDFATAL) {
2699                 /* fix race condition found in PR 39311
2700                  * A child created at the same time as a graceful happens
2701                  * can find the lock missing and create a fatal error.
2702                  * It is not fatal for the last generation to be in this state.
2703                  */
2704                 if (child_slot < 0
2705                     || ap_get_scoreboard_process(child_slot)->generation
2706                        == retained->my_generation) {
2707                     shutdown_pending = 1;
2708                     child_fatal = 1;
2709                     return;
2710                 }
2711                 else {
2712                     ap_log_error(APLOG_MARK, APLOG_WARNING, 0, ap_server_conf, APLOGNO(00487)
2713                                  "Ignoring fatal error in child of previous "
2714                                  "generation (pid %ld).",
2715                                  (long)pid.pid);
2716                     retained->sick_child_detected = 1;
2717                 }
2718             }
2719             else if (processed_status == APEXIT_CHILDSICK) {
2720                 /* tell perform_idle_server_maintenance to check into this
2721                  * on the next timer pop
2722                  */
2723                 retained->sick_child_detected = 1;
2724             }
2725             /* non-fatal death... note that it's gone in the scoreboard. */
2726             if (child_slot >= 0) {
2727                 process_score *ps;
2728
2729                 for (i = 0; i < threads_per_child; i++)
2730                     ap_update_child_status_from_indexes(child_slot, i,
2731                                                         SERVER_DEAD,
2732                                                         (request_rec *) NULL);
2733
2734                 event_note_child_killed(child_slot, 0, 0);
2735                 ps = &ap_scoreboard_image->parent[child_slot];
2736                 ps->quiescing = 0;
2737                 if (processed_status == APEXIT_CHILDSICK) {
2738                     /* resource shortage, minimize the fork rate */
2739                     retained->idle_spawn_rate[ps->bucket] = 1;
2740                 }
2741                 else if (remaining_children_to_start
2742                          && child_slot < ap_daemons_limit) {
2743                     /* we're still doing a 1-for-1 replacement of dead
2744                      * children with new children
2745                      */
2746                     make_child(ap_server_conf, child_slot, ps->bucket);
2747                     --remaining_children_to_start;
2748                 }
2749             }
2750             else if (ap_unregister_extra_mpm_process(pid.pid, &old_gen) == 1) {
2751
2752                 event_note_child_killed(-1, /* already out of the scoreboard */
2753                                         pid.pid, old_gen);
2754                 if (processed_status == APEXIT_CHILDSICK
2755                     && old_gen == retained->my_generation) {
2756                     /* resource shortage, minimize the fork rate */
2757                     for (i = 0; i < num_buckets; i++) {
2758                         retained->idle_spawn_rate[i] = 1;
2759                     }
2760                 }
2761 #if APR_HAS_OTHER_CHILD
2762             }
2763             else if (apr_proc_other_child_alert(&pid, APR_OC_REASON_DEATH,
2764                                                 status) == 0) {
2765                 /* handled */
2766 #endif
2767             }
2768             else if (retained->is_graceful) {
2769                 /* Great, we've probably just lost a slot in the
2770                  * scoreboard.  Somehow we don't know about this child.
2771                  */
2772                 ap_log_error(APLOG_MARK, APLOG_WARNING, 0,
2773                              ap_server_conf, APLOGNO(00488)
2774                              "long lost child came home! (pid %ld)",
2775                              (long) pid.pid);
2776             }
2777             /* Don't perform idle maintenance when a child dies,
2778              * only do it when there's a timeout.  Remember only a
2779              * finite number of children can die, and it's pretty
2780              * pathological for a lot to die suddenly.
2781              */
2782             continue;
2783         }
2784         else if (remaining_children_to_start) {
2785             /* we hit a 1 second timeout in which none of the previous
2786              * generation of children needed to be reaped... so assume
2787              * they're all done, and pick up the slack if any is left.
2788              */
2789             startup_children(remaining_children_to_start);
2790             remaining_children_to_start = 0;
2791             /* In any event we really shouldn't do the code below because
2792              * few of the servers we just started are in the IDLE state
2793              * yet, so we'd mistakenly create an extra server.
2794              */
2795             continue;
2796         }
2797
2798         for (i = 0; i < num_buckets; i++) {
2799             perform_idle_server_maintenance(i, num_buckets);
2800         }
2801     }
2802 }
2803
2804 static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
2805 {
2806     int num_buckets = retained->num_buckets;
2807     int remaining_children_to_start;
2808     int i;
2809
2810     ap_log_pid(pconf, ap_pid_fname);
2811
2812     if (!retained->is_graceful) {
2813         if (ap_run_pre_mpm(s->process->pool, SB_SHARED) != OK) {
2814             mpm_state = AP_MPMQ_STOPPING;
2815             return DONE;
2816         }
2817         /* fix the generation number in the global score; we just got a new,
2818          * cleared scoreboard
2819          */
2820         ap_scoreboard_image->global->running_generation = retained->my_generation;
2821     }
2822
2823     restart_pending = shutdown_pending = 0;
2824     set_signals();
2825
2826     /* Don't thrash since num_buckets depends on the
2827      * system and the number of online CPU cores...
2828      */
2829     if (ap_daemons_limit < num_buckets)
2830         ap_daemons_limit = num_buckets;
2831     if (ap_daemons_to_start < num_buckets)
2832         ap_daemons_to_start = num_buckets;
2833     if (min_spare_threads < threads_per_child * num_buckets)
2834         min_spare_threads = threads_per_child * num_buckets;
2835     if (max_spare_threads < min_spare_threads + threads_per_child * num_buckets)
2836         max_spare_threads = min_spare_threads + threads_per_child * num_buckets;
2837
2838     /* If we're doing a graceful_restart then we're going to see a lot
2839      * of children exiting immediately when we get into the main loop
2840      * below (because we just sent them AP_SIG_GRACEFUL).  This happens pretty
2841      * rapidly... and for each one that exits we may start a new one, until
2842      * there are at least min_spare_threads idle threads, counting across
2843      * all children.  But we may be permitted to start more children than
2844      * that, so we'll just keep track of how many we're
2845      * supposed to start up without the 1 second penalty between each fork.
2846      */
2847     remaining_children_to_start = ap_daemons_to_start;
2848     if (remaining_children_to_start > ap_daemons_limit) {
2849         remaining_children_to_start = ap_daemons_limit;
2850     }
2851     if (!retained->is_graceful) {
2852         startup_children(remaining_children_to_start);
2853         remaining_children_to_start = 0;
2854     }
2855     else {
2856         /* give the system some time to recover before kicking into
2857          * exponential mode */
2858         retained->hold_off_on_exponential_spawning = 10;
2859     }
2860
2861     ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00489)
2862                  "%s configured -- resuming normal operations",
2863                  ap_get_server_description());
2864     ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, APLOGNO(00490)
2865                  "Server built: %s", ap_get_server_built());
2866     ap_log_command_line(plog, s);
2867     ap_log_mpm_common(s);
2868
2869     mpm_state = AP_MPMQ_RUNNING;
2870
2871     server_main_loop(remaining_children_to_start, num_buckets);
2872     mpm_state = AP_MPMQ_STOPPING;
2873
2874     if (shutdown_pending && !retained->is_graceful) {
2875         /* Time to shut down:
2876          * Kill child processes, tell them to call child_exit, etc...
2877          */
2878         for (i = 0; i < num_buckets; i++) {
2879             ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit,
2880                                AP_MPM_PODX_RESTART);
2881         }
2882         ap_reclaim_child_processes(1, /* Start with SIGTERM */
2883                                    event_note_child_killed);
2884
2885         if (!child_fatal) {
2886             /* cleanup pid file on normal shutdown */
2887             ap_remove_pid(pconf, ap_pid_fname);
2888             ap_log_error(APLOG_MARK, APLOG_NOTICE, 0,
2889                          ap_server_conf, APLOGNO(00491) "caught SIGTERM, shutting down");
2890         }
2891         return DONE;
2892     } else if (shutdown_pending) {
2893         /* Time to gracefully shut down:
2894          * Kill child processes, tell them to call child_exit, etc...
2895          */
2896         int active_children;
2897         int index;
2898         apr_time_t cutoff = 0;
2899
2900         /* Close our listeners, and then ask our children to do same */
2901         ap_close_listeners();
2902         for (i = 0; i < num_buckets; i++) {
2903             ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit,
2904                                AP_MPM_PODX_GRACEFUL);
2905         }
2906         ap_relieve_child_processes(event_note_child_killed);
2907
2908         if (!child_fatal) {
2909             /* cleanup pid file on normal shutdown */
2910             ap_remove_pid(pconf, ap_pid_fname);
2911             ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00492)
2912                          "caught " AP_SIG_GRACEFUL_STOP_STRING
2913                          ", shutting down gracefully");
2914         }
2915
2916         if (ap_graceful_shutdown_timeout) {
2917             cutoff = apr_time_now() +
2918                      apr_time_from_sec(ap_graceful_shutdown_timeout);
2919         }
2920
2921         /* Don't really exit until each child has finished */
2922         shutdown_pending = 0;
2923         do {
2924             /* Pause for a second */
2925             apr_sleep(apr_time_from_sec(1));
2926
2927             /* Relieve any children which have now exited */
2928             ap_relieve_child_processes(event_note_child_killed);
2929
2930             active_children = 0;
2931             for (index = 0; index < ap_daemons_limit; ++index) {
2932                 if (ap_mpm_safe_kill(MPM_CHILD_PID(index), 0) == APR_SUCCESS) {
2933                     active_children = 1;
2934                     /* Having just one child is enough to stay around */
2935                     break;
2936                 }
2937             }
2938         } while (!shutdown_pending && active_children &&
2939                  (!ap_graceful_shutdown_timeout || apr_time_now() < cutoff));
2940
2941         /* We might be here because we received SIGTERM, either
2942          * way, try and make sure that all of our processes are
2943          * really dead.
2944          */
2945         for (i = 0; i < num_buckets; i++) {
2946             ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit,
2947                                AP_MPM_PODX_RESTART);
2948         }
2949         ap_reclaim_child_processes(1, event_note_child_killed);
2950
2951         return DONE;
2952     }
2953
2954     /* we've been told to restart */
2955     apr_signal(SIGHUP, SIG_IGN);
2956
2957     if (one_process) {
2958         /* not worth thinking about */
2959         return DONE;
2960     }
2961
2962     /* advance to the next generation */
2963     /* XXX: we really need to make sure this new generation number isn't in
2964      * use by any of the children.
2965      */
2966     ++retained->my_generation;
2967     ap_scoreboard_image->global->running_generation = retained->my_generation;
2968
2969     if (retained->is_graceful) {
2970         ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00493)
2971                      AP_SIG_GRACEFUL_STRING
2972                      " received.  Doing graceful restart");
2973         /* wake up the children...time to die.  But we'll have more soon */
2974         for (i = 0; i < num_buckets; i++) {
2975             ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit,
2976                                AP_MPM_PODX_GRACEFUL);
2977         }
2978
2979         /* This is mostly for debugging... so that we know what is still
2980          * gracefully dealing with existing request.
2981          */
2982
2983     }
2984     else {
2985         /* Kill 'em all.  Since the child acts the same on the parents SIGTERM
2986          * and a SIGHUP, we may as well use the same signal, because some user
2987          * pthreads are stealing signals from us left and right.
2988          */
2989         for (i = 0; i < num_buckets; i++) {
2990             ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit,
2991                                AP_MPM_PODX_RESTART);
2992         }
2993
2994         ap_reclaim_child_processes(1,  /* Start with SIGTERM */
2995                                    event_note_child_killed);
2996         ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00494)
2997                      "SIGHUP received.  Attempting to restart");
2998     }
2999
3000     return OK;
3001 }
3002
3003 /* This really should be a post_config hook, but the error log is already
3004  * redirected by that point, so we need to do this in the open_logs phase.
3005  */
3006 static int event_open_logs(apr_pool_t * p, apr_pool_t * plog,
3007                            apr_pool_t * ptemp, server_rec * s)
3008 {
3009     int startup = 0;
3010     int level_flags = 0;
3011     int num_buckets = 0;
3012     ap_listen_rec **listen_buckets;
3013     apr_status_t rv;
3014     int i;
3015
3016     pconf = p;
3017
3018     /* the reverse of pre_config, we want this only the first time around */
3019     if (retained->module_loads == 1) {
3020         startup = 1;
3021         level_flags |= APLOG_STARTUP;
3022     }
3023
3024     if ((num_listensocks = ap_setup_listeners(ap_server_conf)) < 1) {
3025         ap_log_error(APLOG_MARK, APLOG_ALERT | level_flags, 0,
3026                      (startup ? NULL : s),
3027                      "no listening sockets available, shutting down");
3028         return DONE;
3029     }
3030
3031     if (one_process) {
3032         num_buckets = 1;
3033     }
3034     else if (retained->is_graceful) {
3035         /* Preserve the number of buckets on graceful restarts. */
3036         num_buckets = retained->num_buckets;
3037     }
3038     if ((rv = ap_duplicate_listeners(pconf, ap_server_conf,
3039                                      &listen_buckets, &num_buckets))) {
3040         ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
3041                      (startup ? NULL : s),
3042                      "could not duplicate listeners");
3043         return DONE;
3044     }
3045
3046     all_buckets = apr_pcalloc(pconf, num_buckets * sizeof(*all_buckets));
3047     for (i = 0; i < num_buckets; i++) {
3048         if (!one_process && /* no POD in one_process mode */
3049                 (rv = ap_mpm_podx_open(pconf, &all_buckets[i].pod))) {
3050             ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
3051                          (startup ? NULL : s),
3052                          "could not open pipe-of-death");
3053             return DONE;
3054         }
3055         all_buckets[i].listeners = listen_buckets[i];
3056     }
3057
3058     if (retained->max_buckets < num_buckets) {
3059         int new_max, *new_ptr;
3060         new_max = retained->max_buckets * 2;
3061         if (new_max < num_buckets) {
3062             new_max = num_buckets;
3063         }
3064         new_ptr = (int *)apr_palloc(ap_pglobal, new_max * sizeof(int));
3065         memcpy(new_ptr, retained->idle_spawn_rate,
3066                retained->num_buckets * sizeof(int));
3067         retained->idle_spawn_rate = new_ptr;
3068         retained->max_buckets = new_max;
3069     }
3070     if (retained->num_buckets < num_buckets) {
3071         int rate_max = 1;
3072         /* If new buckets are added, set their idle spawn rate to
3073          * the highest so far, so that they get filled as quickly
3074          * as the existing ones.
3075          */
3076         for (i = 0; i < retained->num_buckets; i++) {
3077             if (rate_max < retained->idle_spawn_rate[i]) {
3078                 rate_max = retained->idle_spawn_rate[i];
3079             }
3080         }
3081         for (/* up to date i */; i < num_buckets; i++) {
3082             retained->idle_spawn_rate[i] = rate_max;
3083         }
3084     }
3085     retained->num_buckets = num_buckets;
3086
3087     /* for skiplist */
3088     srand((unsigned int)apr_time_now());
3089     return OK;
3090 }
3091
3092 static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
3093                             apr_pool_t * ptemp)
3094 {
3095     int no_detach, debug, foreground;
3096     apr_status_t rv;
3097     const char *userdata_key = "mpm_event_module";
3098
3099     mpm_state = AP_MPMQ_STARTING;
3100
3101     debug = ap_exists_config_define("DEBUG");
3102
3103     if (debug) {
3104         foreground = one_process = 1;
3105         no_detach = 0;
3106     }
3107     else {
3108         one_process = ap_exists_config_define("ONE_PROCESS");
3109         no_detach = ap_exists_config_define("NO_DETACH");
3110         foreground = ap_exists_config_define("FOREGROUND");
3111     }
3112
3113     /* sigh, want this only the second time around */
3114     retained = ap_retained_data_get(userdata_key);
3115     if (!retained) {
3116         retained = ap_retained_data_create(userdata_key, sizeof(*retained));
3117         retained->max_daemons_limit = -1;
3118     }
3119     ++retained->module_loads;
3120     if (retained->module_loads == 2) {
3121         /* test for correct operation of fdqueue */
3122         static apr_uint32_t foo1, foo2;
3123
3124         apr_atomic_set32(&foo1, 100);
3125         foo2 = apr_atomic_add32(&foo1, -10);
3126         if (foo2 != 100 || foo1 != 90) {
3127             ap_log_error(APLOG_MARK, APLOG_CRIT, 0, NULL, APLOGNO(02405)
3128                          "atomics not working as expected - add32 of negative number");
3129             return HTTP_INTERNAL_SERVER_ERROR;
3130         }
3131
3132         rv = apr_pollset_create(&event_pollset, 1, plog,
3133                                 APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY);
3134         if (rv != APR_SUCCESS) {
3135             ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL, APLOGNO(00495)
3136                          "Couldn't create a Thread Safe Pollset. "
3137                          "Is it supported on your platform?"
3138                          "Also check system or user limits!");
3139             return HTTP_INTERNAL_SERVER_ERROR;
3140         }
3141         apr_pollset_destroy(event_pollset);
3142
3143         if (!one_process && !foreground) {
3144             /* before we detach, setup crash handlers to log to errorlog */
3145             ap_fatal_signal_setup(ap_server_conf, pconf);
3146             rv = apr_proc_detach(no_detach ? APR_PROC_DETACH_FOREGROUND
3147                                  : APR_PROC_DETACH_DAEMONIZE);
3148             if (rv != APR_SUCCESS) {
3149                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL, APLOGNO(00496)
3150                              "apr_proc_detach failed");
3151                 return HTTP_INTERNAL_SERVER_ERROR;
3152             }
3153         }
3154     }
3155
3156     parent_pid = ap_my_pid = getpid();
3157
3158     ap_listen_pre_config();
3159     ap_daemons_to_start = DEFAULT_START_DAEMON;
3160     min_spare_threads = DEFAULT_MIN_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD;
3161     max_spare_threads = DEFAULT_MAX_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD;
3162     server_limit = DEFAULT_SERVER_LIMIT;
3163     thread_limit = DEFAULT_THREAD_LIMIT;
3164     ap_daemons_limit = server_limit;
3165     threads_per_child = DEFAULT_THREADS_PER_CHILD;
3166     max_workers = ap_daemons_limit * threads_per_child;
3167     had_healthy_child = 0;
3168     ap_extended_status = 0;
3169
3170     return OK;
3171 }
3172
3173 static int event_post_config(apr_pool_t *pconf, apr_pool_t *plog,
3174                              apr_pool_t *ptemp, server_rec *s)
3175 {
3176     struct {
3177         struct timeout_queue *tail, *q;
3178         apr_hash_t *hash;
3179     } wc, ka;
3180
3181     /* Not needed in pre_config stage */
3182     if (ap_state_query(AP_SQ_MAIN_STATE) == AP_SQ_MS_CREATE_PRE_CONFIG) {
3183         return OK;
3184     }
3185
3186     wc.tail = ka.tail = NULL;
3187     wc.hash = apr_hash_make(ptemp);
3188     ka.hash = apr_hash_make(ptemp);
3189
3190     TO_QUEUE_INIT(linger_q, pconf,
3191                   apr_time_from_sec(MAX_SECS_TO_LINGER), NULL);
3192     TO_QUEUE_INIT(short_linger_q, pconf,
3193                   apr_time_from_sec(SECONDS_TO_LINGER), NULL);
3194
3195     for (; s; s = s->next) {
3196         event_srv_cfg *sc = apr_pcalloc(pconf, sizeof *sc);
3197
3198         ap_set_module_config(s->module_config, &mpm_event_module, sc);
3199         if (!wc.tail) {
3200             /* The main server uses the global queues */
3201             TO_QUEUE_INIT(wc.q, pconf, s->timeout, NULL);
3202             apr_hash_set(wc.hash, &s->timeout, sizeof s->timeout, wc.q);
3203             wc.tail = write_completion_q = wc.q;
3204
3205             TO_QUEUE_INIT(ka.q, pconf, s->keep_alive_timeout, NULL);
3206             apr_hash_set(ka.hash, &s->keep_alive_timeout,
3207                          sizeof s->keep_alive_timeout, ka.q);
3208             ka.tail = keepalive_q = ka.q;
3209         }
3210         else {
3211             /* The vhosts use any existing queue with the same timeout,
3212              * or their own queue(s) if there isn't */
3213             wc.q = apr_hash_get(wc.hash, &s->timeout, sizeof s->timeout);
3214             if (!wc.q) {
3215                 TO_QUEUE_INIT(wc.q, pconf, s->timeout, wc.tail);
3216                 apr_hash_set(wc.hash, &s->timeout, sizeof s->timeout, wc.q);
3217                 wc.tail = wc.tail->next = wc.q;
3218             }
3219
3220             ka.q = apr_hash_get(ka.hash, &s->keep_alive_timeout,
3221                                 sizeof s->keep_alive_timeout);
3222             if (!ka.q) {
3223                 TO_QUEUE_INIT(ka.q, pconf, s->keep_alive_timeout, ka.tail);
3224                 apr_hash_set(ka.hash, &s->keep_alive_timeout,
3225                              sizeof s->keep_alive_timeout, ka.q);
3226                 ka.tail = ka.tail->next = ka.q;
3227             }
3228         }
3229         sc->wc_q = wc.q;
3230         sc->ka_q = ka.q;
3231     }
3232
3233     return OK;
3234 }
3235
3236 static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
3237                               apr_pool_t *ptemp, server_rec *s)
3238 {
3239     int startup = 0;
3240
3241     /* the reverse of pre_config, we want this only the first time around */
3242     if (retained->module_loads == 1) {
3243         startup = 1;
3244     }
3245
3246     if (server_limit > MAX_SERVER_LIMIT) {
3247         if (startup) {
3248             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00497)
3249                          "WARNING: ServerLimit of %d exceeds compile-time "
3250                          "limit of", server_limit);
3251             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(03105)
3252                          " %d servers, decreasing to %d.",
3253                          MAX_SERVER_LIMIT, MAX_SERVER_LIMIT);
3254         } else {
3255             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00498)
3256                          "ServerLimit of %d exceeds compile-time limit "
3257                          "of %d, decreasing to match",
3258                          server_limit, MAX_SERVER_LIMIT);
3259         }
3260         server_limit = MAX_SERVER_LIMIT;
3261     }
3262     else if (server_limit < 1) {
3263         if (startup) {
3264             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00499)
3265                          "WARNING: ServerLimit of %d not allowed, "
3266                          "increasing to 1.", server_limit);
3267         } else {
3268             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00500)
3269                          "ServerLimit of %d not allowed, increasing to 1",
3270                          server_limit);
3271         }
3272         server_limit = 1;
3273     }
3274
3275     /* you cannot change ServerLimit across a restart; ignore
3276      * any such attempts
3277      */
3278     if (!retained->first_server_limit) {
3279         retained->first_server_limit = server_limit;
3280     }
3281     else if (server_limit != retained->first_server_limit) {
3282         /* don't need a startup console version here */
3283         ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00501)
3284                      "changing ServerLimit to %d from original value of %d "
3285                      "not allowed during restart",
3286                      server_limit, retained->first_server_limit);
3287         server_limit = retained->first_server_limit;
3288     }
3289
3290     if (thread_limit > MAX_THREAD_LIMIT) {
3291         if (startup) {
3292             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00502)
3293                          "WARNING: ThreadLimit of %d exceeds compile-time "
3294                          "limit of", thread_limit);
3295             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(03106)
3296                          " %d threads, decreasing to %d.",
3297                          MAX_THREAD_LIMIT, MAX_THREAD_LIMIT);
3298         } else {
3299             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00503)
3300                          "ThreadLimit of %d exceeds compile-time limit "
3301                          "of %d, decreasing to match",
3302                          thread_limit, MAX_THREAD_LIMIT);
3303         }
3304         thread_limit = MAX_THREAD_LIMIT;
3305     }
3306     else if (thread_limit < 1) {
3307         if (startup) {
3308             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00504)
3309                          "WARNING: ThreadLimit of %d not allowed, "
3310                          "increasing to 1.", thread_limit);
3311         } else {
3312             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00505)
3313                          "ThreadLimit of %d not allowed, increasing to 1",
3314                          thread_limit);
3315         }
3316         thread_limit = 1;
3317     }
3318
3319     /* you cannot change ThreadLimit across a restart; ignore
3320      * any such attempts
3321      */
3322     if (!retained->first_thread_limit) {
3323         retained->first_thread_limit = thread_limit;
3324     }
3325     else if (thread_limit != retained->first_thread_limit) {
3326         /* don't need a startup console version here */
3327         ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00506)
3328                      "changing ThreadLimit to %d from original value of %d "
3329                      "not allowed during restart",
3330                      thread_limit, retained->first_thread_limit);
3331         thread_limit = retained->first_thread_limit;
3332     }
3333
3334     if (threads_per_child > thread_limit) {
3335         if (startup) {
3336             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00507)
3337                          "WARNING: ThreadsPerChild of %d exceeds ThreadLimit "
3338                          "of", threads_per_child);
3339             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(03107)
3340                          " %d threads, decreasing to %d.",
3341                          thread_limit, thread_limit);
3342             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(03108)
3343                          " To increase, please see the ThreadLimit "
3344                          "directive.");
3345         } else {
3346             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00508)
3347                          "ThreadsPerChild of %d exceeds ThreadLimit "
3348                          "of %d, decreasing to match",
3349                          threads_per_child, thread_limit);
3350         }
3351         threads_per_child = thread_limit;
3352     }
3353     else if (threads_per_child < 1) {
3354         if (startup) {
3355             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00509)
3356                          "WARNING: ThreadsPerChild of %d not allowed, "
3357                          "increasing to 1.", threads_per_child);
3358         } else {
3359             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00510)
3360                          "ThreadsPerChild of %d not allowed, increasing to 1",
3361                          threads_per_child);
3362         }
3363         threads_per_child = 1;
3364     }
3365
3366     if (max_workers < threads_per_child) {
3367         if (startup) {
3368             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00511)
3369                          "WARNING: MaxRequestWorkers of %d is less than "
3370                          "ThreadsPerChild of", max_workers);
3371             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(03109)
3372                          " %d, increasing to %d.  MaxRequestWorkers must be at "
3373                          "least as large",
3374                          threads_per_child, threads_per_child);
3375             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(03110)
3376                          " as the number of threads in a single server.");
3377         } else {
3378             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00512)
3379                          "MaxRequestWorkers of %d is less than ThreadsPerChild "
3380                          "of %d, increasing to match",
3381                          max_workers, threads_per_child);
3382         }
3383         max_workers = threads_per_child;
3384     }
3385
3386     ap_daemons_limit = max_workers / threads_per_child;
3387
3388     if (max_workers % threads_per_child) {
3389         int tmp_max_workers = ap_daemons_limit * threads_per_child;
3390
3391         if (startup) {
3392             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00513)
3393                          "WARNING: MaxRequestWorkers of %d is not an integer "
3394                          "multiple of", max_workers);
3395             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(03111)
3396                          " ThreadsPerChild of %d, decreasing to nearest "
3397                          "multiple %d,", threads_per_child,
3398                          tmp_max_workers);
3399             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(03112)
3400                          " for a maximum of %d servers.",
3401                          ap_daemons_limit);
3402         } else {
3403             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00514)
3404                          "MaxRequestWorkers of %d is not an integer multiple "
3405                          "of ThreadsPerChild of %d, decreasing to nearest "
3406                          "multiple %d", max_workers, threads_per_child,
3407                          tmp_max_workers);
3408         }
3409         max_workers = tmp_max_workers;
3410     }
3411
3412     if (ap_daemons_limit > server_limit) {
3413         if (startup) {
3414             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00515)
3415                          "WARNING: MaxRequestWorkers of %d would require %d "
3416                          "servers and ", max_workers, ap_daemons_limit);
3417             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(03113)
3418                          " would exceed ServerLimit of %d, decreasing to %d.",
3419                          server_limit, server_limit * threads_per_child);
3420             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(03114)
3421                          " To increase, please see the ServerLimit "
3422                          "directive.");
3423         } else {
3424             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00516)
3425                          "MaxRequestWorkers of %d would require %d servers and "
3426                          "exceed ServerLimit of %d, decreasing to %d",
3427                          max_workers, ap_daemons_limit, server_limit,
3428                          server_limit * threads_per_child);
3429         }
3430         ap_daemons_limit = server_limit;
3431     }
3432
3433     /* ap_daemons_to_start > ap_daemons_limit checked in ap_mpm_run() */
3434     if (ap_daemons_to_start < 1) {
3435         if (startup) {
3436             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00517)
3437                          "WARNING: StartServers of %d not allowed, "
3438                          "increasing to 1.", ap_daemons_to_start);
3439         } else {
3440             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00518)
3441                          "StartServers of %d not allowed, increasing to 1",
3442                          ap_daemons_to_start);
3443         }
3444         ap_daemons_to_start = 1;
3445     }
3446
3447     if (min_spare_threads < 1) {
3448         if (startup) {
3449             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00519)
3450                          "WARNING: MinSpareThreads of %d not allowed, "
3451                          "increasing to 1", min_spare_threads);
3452             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(03115)
3453                          " to avoid almost certain server failure.");
3454             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(03116)
3455                          " Please read the documentation.");
3456         } else {
3457             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00520)
3458                          "MinSpareThreads of %d not allowed, increasing to 1",
3459                          min_spare_threads);
3460         }
3461         min_spare_threads = 1;
3462     }
3463
3464     /* max_spare_threads < min_spare_threads + threads_per_child
3465      * checked in ap_mpm_run()
3466      */
3467
3468     return OK;
3469 }
3470
3471 static void event_hooks(apr_pool_t * p)
3472 {
3473     /* Our open_logs hook function must run before the core's, or stderr
3474      * will be redirected to a file, and the messages won't print to the
3475      * console.
3476      */
3477     static const char *const aszSucc[] = { "core.c", NULL };
3478     one_process = 0;
3479
3480     ap_hook_open_logs(event_open_logs, NULL, aszSucc, APR_HOOK_REALLY_FIRST);
3481     /* we need to set the MPM state before other pre-config hooks use MPM query
3482      * to retrieve it, so register as REALLY_FIRST
3483      */
3484     ap_hook_pre_config(event_pre_config, NULL, NULL, APR_HOOK_REALLY_FIRST);
3485     ap_hook_post_config(event_post_config, NULL, NULL, APR_HOOK_MIDDLE);
3486     ap_hook_check_config(event_check_config, NULL, NULL, APR_HOOK_MIDDLE);
3487     ap_hook_mpm(event_run, NULL, NULL, APR_HOOK_MIDDLE);
3488     ap_hook_mpm_query(event_query, NULL, NULL, APR_HOOK_MIDDLE);
3489     ap_hook_mpm_register_timed_callback(event_register_timed_callback, NULL, NULL,
3490                                         APR_HOOK_MIDDLE);
3491     ap_hook_pre_read_request(event_pre_read_request, NULL, NULL, APR_HOOK_MIDDLE);
3492     ap_hook_post_read_request(event_post_read_request, NULL, NULL, APR_HOOK_MIDDLE);
3493     ap_hook_mpm_get_name(event_get_name, NULL, NULL, APR_HOOK_MIDDLE);
3494 }
3495
3496 static const char *set_daemons_to_start(cmd_parms *cmd, void *dummy,
3497                                         const char *arg)
3498 {
3499     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3500     if (err != NULL) {
3501         return err;
3502     }
3503
3504     ap_daemons_to_start = atoi(arg);
3505     return NULL;
3506 }
3507
3508 static const char *set_min_spare_threads(cmd_parms * cmd, void *dummy,
3509                                          const char *arg)
3510 {
3511     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3512     if (err != NULL) {
3513         return err;
3514     }
3515
3516     min_spare_threads = atoi(arg);
3517     return NULL;
3518 }
3519
3520 static const char *set_max_spare_threads(cmd_parms * cmd, void *dummy,
3521                                          const char *arg)
3522 {
3523     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3524     if (err != NULL) {
3525         return err;
3526     }
3527
3528     max_spare_threads = atoi(arg);
3529     return NULL;
3530 }
3531
3532 static const char *set_max_workers(cmd_parms * cmd, void *dummy,
3533                                    const char *arg)
3534 {
3535     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3536     if (err != NULL) {
3537         return err;
3538     }
3539     if (!strcasecmp(cmd->cmd->name, "MaxClients")) {
3540         ap_log_error(APLOG_MARK, APLOG_INFO, 0, NULL, APLOGNO(00521)
3541                      "MaxClients is deprecated, use MaxRequestWorkers "
3542                      "instead.");
3543     }
3544     max_workers = atoi(arg);
3545     return NULL;
3546 }
3547
3548 static const char *set_threads_per_child(cmd_parms * cmd, void *dummy,
3549                                          const char *arg)
3550 {
3551     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3552     if (err != NULL) {
3553         return err;
3554     }
3555
3556     threads_per_child = atoi(arg);
3557     return NULL;
3558 }
3559 static const char *set_server_limit (cmd_parms *cmd, void *dummy, const char *arg)
3560 {
3561     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3562     if (err != NULL) {
3563         return err;
3564     }
3565
3566     server_limit = atoi(arg);
3567     return NULL;
3568 }
3569
3570 static const char *set_thread_limit(cmd_parms * cmd, void *dummy,
3571                                     const char *arg)
3572 {
3573     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3574     if (err != NULL) {
3575         return err;
3576     }
3577
3578     thread_limit = atoi(arg);
3579     return NULL;
3580 }
3581
3582 static const char *set_worker_factor(cmd_parms * cmd, void *dummy,
3583                                      const char *arg)
3584 {
3585     double val;
3586     char *endptr;
3587     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3588     if (err != NULL) {
3589         return err;
3590     }
3591
3592     val = strtod(arg, &endptr);
3593     if (*endptr)
3594         return "error parsing value";
3595
3596     if (val <= 0)
3597         return "AsyncRequestWorkerFactor argument must be a positive number";
3598
3599     worker_factor = val * WORKER_FACTOR_SCALE;
3600     if (worker_factor == 0)
3601         worker_factor = 1;
3602     return NULL;
3603 }
3604
3605
3606 static const command_rec event_cmds[] = {
3607     LISTEN_COMMANDS,
3608     AP_INIT_TAKE1("StartServers", set_daemons_to_start, NULL, RSRC_CONF,
3609                   "Number of child processes launched at server startup"),
3610     AP_INIT_TAKE1("ServerLimit", set_server_limit, NULL, RSRC_CONF,
3611                   "Maximum number of child processes for this run of Apache"),
3612     AP_INIT_TAKE1("MinSpareThreads", set_min_spare_threads, NULL, RSRC_CONF,
3613                   "Minimum number of idle threads, to handle request spikes"),
3614     AP_INIT_TAKE1("MaxSpareThreads", set_max_spare_threads, NULL, RSRC_CONF,
3615                   "Maximum number of idle threads"),
3616     AP_INIT_TAKE1("MaxClients", set_max_workers, NULL, RSRC_CONF,
3617                   "Deprecated name of MaxRequestWorkers"),
3618     AP_INIT_TAKE1("MaxRequestWorkers", set_max_workers, NULL, RSRC_CONF,
3619                   "Maximum number of threads alive at the same time"),
3620     AP_INIT_TAKE1("ThreadsPerChild", set_threads_per_child, NULL, RSRC_CONF,
3621                   "Number of threads each child creates"),
3622     AP_INIT_TAKE1("ThreadLimit", set_thread_limit, NULL, RSRC_CONF,
3623                   "Maximum number of worker threads per child process for this "
3624                   "run of Apache - Upper limit for ThreadsPerChild"),
3625     AP_INIT_TAKE1("AsyncRequestWorkerFactor", set_worker_factor, NULL, RSRC_CONF,
3626                   "How many additional connects will be accepted per idle "
3627                   "worker thread"),
3628     AP_GRACEFUL_SHUTDOWN_TIMEOUT_COMMAND,
3629     {NULL}
3630 };
3631
3632 AP_DECLARE_MODULE(mpm_event) = {
3633     MPM20_MODULE_STUFF,
3634     NULL,                       /* hook to run before apache parses args */
3635     NULL,                       /* create per-directory config structure */
3636     NULL,                       /* merge per-directory config structures */
3637     NULL,                       /* create per-server config structure */
3638     NULL,                       /* merge per-server config structures */
3639     event_cmds,                 /* command apr_table_t */
3640     event_hooks                 /* register_hooks */
3641 };