granicus.if.org Git - apache/blob - server/mpm/event/event.c

   1 /* Licensed to the Apache Software Foundation (ASF) under one or more
   2  * contributor license agreements.  See the NOTICE file distributed with
   3  * this work for additional information regarding copyright ownership.
   4  * The ASF licenses this file to You under the Apache License, Version 2.0
   5  * (the "License"); you may not use this file except in compliance with
   6  * the License.  You may obtain a copy of the License at
   7  *
   8  *     http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /**
  18  * This MPM tries to fix the 'keep alive problem' in HTTP.
  19  *
  20  * After a client completes the first request, the client can keep the
  21  * connection open to send more requests with the same socket.  This can save
  22  * significant overhead in creating TCP connections.  However, the major
  23  * disadvantage is that Apache traditionally keeps an entire child
  24  * process/thread waiting for data from the client.  To solve this problem,
  25  * this MPM has a dedicated thread for handling both the Listening sockets,
  26  * and all sockets that are in a Keep Alive status.
  27  *
  28  * The MPM assumes the underlying apr_pollset implementation is somewhat
  29  * threadsafe.  This currently is only compatible with KQueue and EPoll.  This
  30  * enables the MPM to avoid extra high level locking or having to wake up the
  31  * listener thread when a keep-alive socket needs to be sent to it.
  32  *
  33  * This MPM does not perform well on older platforms that do not have very good
  34  * threading, like Linux with a 2.4 kernel, but this does not matter, since we
  35  * require EPoll or KQueue.
  36  *
  37  * For FreeBSD, use 5.3.  It is possible to run this MPM on FreeBSD 5.2.1, if
  38  * you use libkse (see `man libmap.conf`).
  39  *
  40  * For NetBSD, use at least 2.0.
  41  *
  42  * For Linux, you should use a 2.6 kernel, and make sure your glibc has epoll
  43  * support compiled in.
  44  *
  45  */
  46
  47 #include "apr.h"
  48 #include "apr_portable.h"
  49 #include "apr_strings.h"
  50 #include "apr_file_io.h"
  51 #include "apr_thread_proc.h"
  52 #include "apr_signal.h"
  53 #include "apr_thread_mutex.h"
  54 #include "apr_poll.h"
  55 #include "apr_ring.h"
  56 #include "apr_queue.h"
  57 #include "apr_atomic.h"
  58 #define APR_WANT_STRFUNC
  59 #include "apr_want.h"
  60
  61 #include "apr_version.h"
  62
  63 #include <stdlib.h>
  64
  65 #if APR_HAVE_UNISTD_H
  66 #include <unistd.h>
  67 #endif
  68 #if APR_HAVE_SYS_SOCKET_H
  69 #include <sys/socket.h>
  70 #endif
  71 #if APR_HAVE_SYS_WAIT_H
  72 #include <sys/wait.h>
  73 #endif
  74 #ifdef HAVE_SYS_PROCESSOR_H
  75 #include <sys/processor.h>      /* for bindprocessor() */
  76 #endif
  77
  78 #if !APR_HAS_THREADS
  79 #error The Event MPM requires APR threads, but they are unavailable.
  80 #endif
  81
  82 #include "ap_config.h"
  83 #include "httpd.h"
  84 #include "http_main.h"
  85 #include "http_log.h"
  86 #include "http_config.h"        /* for read_config */
  87 #include "http_core.h"          /* for get_remote_host */
  88 #include "http_connection.h"
  89 #include "http_protocol.h"
  90 #include "ap_mpm.h"
  91 #include "mpm_common.h"
  92 #include "ap_listen.h"
  93 #include "scoreboard.h"
  94 #include "fdqueue.h"
  95 #include "mpm_default.h"
  96 #include "http_vhost.h"
  97 #include "unixd.h"
  98 #include "apr_skiplist.h"
  99 #include "util_time.h"
 100
 101 #include <signal.h>
 102 #include <limits.h>             /* for INT_MAX */
 103
 104
 105 #if HAVE_SERF
 106 #include "mod_serf.h"
 107 #include "serf.h"
 108 #endif
 109
 110 /* Limit on the total --- clients will be locked out if more servers than
 111  * this are needed.  It is intended solely to keep the server from crashing
 112  * when things get out of hand.
 113  *
 114  * We keep a hard maximum number of servers, for two reasons --- first off,
 115  * in case something goes seriously wrong, we want to stop the fork bomb
 116  * short of actually crashing the machine we're running on by filling some
 117  * kernel table.  Secondly, it keeps the size of the scoreboard file small
 118  * enough that we can read the whole thing without worrying too much about
 119  * the overhead.
 120  */
 121 #ifndef DEFAULT_SERVER_LIMIT
 122 #define DEFAULT_SERVER_LIMIT 16
 123 #endif
 124
 125 /* Admin can't tune ServerLimit beyond MAX_SERVER_LIMIT.  We want
 126  * some sort of compile-time limit to help catch typos.
 127  */
 128 #ifndef MAX_SERVER_LIMIT
 129 #define MAX_SERVER_LIMIT 20000
 130 #endif
 131
 132 /* Limit on the threads per process.  Clients will be locked out if more than
 133  * this are needed.
 134  *
 135  * We keep this for one reason it keeps the size of the scoreboard file small
 136  * enough that we can read the whole thing without worrying too much about
 137  * the overhead.
 138  */
 139 #ifndef DEFAULT_THREAD_LIMIT
 140 #define DEFAULT_THREAD_LIMIT 64
 141 #endif
 142
 143 /* Admin can't tune ThreadLimit beyond MAX_THREAD_LIMIT.  We want
 144  * some sort of compile-time limit to help catch typos.
 145  */
 146 #ifndef MAX_THREAD_LIMIT
 147 #define MAX_THREAD_LIMIT 100000
 148 #endif
 149
 150 #define MPM_CHILD_PID(i) (ap_scoreboard_image->parent[i].pid)
 151
 152 #if !APR_VERSION_AT_LEAST(1,4,0)
 153 #define apr_time_from_msec(x) (x * 1000)
 154 #endif
 155
 156 #ifndef MAX_SECS_TO_LINGER
 157 #define MAX_SECS_TO_LINGER 30
 158 #endif
 159 #define SECONDS_TO_LINGER  2
 160
 161 /*
 162  * Actual definitions of config globals
 163  */
 164
 165 #ifndef DEFAULT_WORKER_FACTOR
 166 #define DEFAULT_WORKER_FACTOR 2
 167 #endif
 168 #define WORKER_FACTOR_SCALE   16  /* scale factor to allow fractional values */
 169 static unsigned int worker_factor = DEFAULT_WORKER_FACTOR * WORKER_FACTOR_SCALE;
 170
 171 static int threads_per_child = 0;   /* Worker threads per child */
 172 static int ap_daemons_to_start = 0;
 173 static int min_spare_threads = 0;
 174 static int max_spare_threads = 0;
 175 static int ap_daemons_limit = 0;
 176 static int max_workers = 0;
 177 static int server_limit = 0;
 178 static int thread_limit = 0;
 179 static int had_healthy_child = 0;
 180 static int dying = 0;
 181 static int workers_may_exit = 0;
 182 static int start_thread_may_exit = 0;
 183 static int listener_may_exit = 0;
 184 static int num_listensocks = 0;
 185 static apr_int32_t conns_this_child;        /* MaxConnectionsPerChild, only access
 186                                                in listener thread */
 187 static apr_uint32_t connection_count = 0;   /* Number of open connections */
 188 static apr_uint32_t lingering_count = 0;    /* Number of connections in lingering close */
 189 static apr_uint32_t suspended_count = 0;    /* Number of suspended connections */
 190 static apr_uint32_t clogged_count = 0;      /* Number of threads processing ssl conns */
 191 static int resource_shortage = 0;
 192 static fd_queue_t *worker_queue;
 193 static fd_queue_info_t *worker_queue_info;
 194 static int mpm_state = AP_MPMQ_STARTING;
 195
 196 static apr_thread_mutex_t *timeout_mutex;
 197
 198 module AP_MODULE_DECLARE_DATA mpm_event_module;
 199
 200 struct event_conn_state_t {
 201     /** APR_RING of expiration timeouts */
 202     APR_RING_ENTRY(event_conn_state_t) timeout_list;
 203     /** the expiration time of the next keepalive timeout */
 204     apr_time_t expiration_time;
 205     /** connection record this struct refers to */
 206     conn_rec *c;
 207     /** request record (if any) this struct refers to */
 208     request_rec *r;
 209     /** is the current conn_rec suspended?  (disassociated with
 210      * a particular MPM thread; for suspend_/resume_connection
 211      * hooks)
 212      */
 213     int suspended;
 214     /** memory pool to allocate from */
 215     apr_pool_t *p;
 216     /** bucket allocator */
 217     apr_bucket_alloc_t *bucket_alloc;
 218     /** poll file descriptor information */
 219     apr_pollfd_t pfd;
 220     /** public parts of the connection state */
 221     conn_state_t pub;
 222 };
 223 APR_RING_HEAD(timeout_head_t, event_conn_state_t);
 224
 225 struct timeout_queue {
 226     struct timeout_head_t head;
 227     int count;
 228     const char *tag;
 229 };
 230 /*
 231  * Several timeout queues that use different timeouts, so that we always can
 232  * simply append to the end.
 233  *   write_completion_q uses TimeOut
 234  *   keepalive_q        uses KeepAliveTimeOut
 235  *   linger_q           uses MAX_SECS_TO_LINGER
 236  *   short_linger_q     uses SECONDS_TO_LINGER
 237  */
 238 static struct timeout_queue write_completion_q, keepalive_q, linger_q,
 239                             short_linger_q;
 240 static apr_pollfd_t *listener_pollfd;
 241
 242 /*
 243  * Macros for accessing struct timeout_queue.
 244  * For TO_QUEUE_APPEND and TO_QUEUE_REMOVE, timeout_mutex must be held.
 245  */
 246 #define TO_QUEUE_APPEND(q, el)                                                  \
 247     do {                                                                        \
 248         APR_RING_INSERT_TAIL(&(q).head, el, event_conn_state_t, timeout_list);  \
 249         (q).count++;                                                            \
 250     } while (0)
 251
 252 #define TO_QUEUE_REMOVE(q, el)             \
 253     do {                                   \
 254         APR_RING_REMOVE(el, timeout_list); \
 255         (q).count--;                       \
 256     } while (0)
 257
 258 #define TO_QUEUE_INIT(q)                                                  \
 259     do {                                                                  \
 260             APR_RING_INIT(&(q).head, event_conn_state_t, timeout_list);   \
 261             (q).tag = #q;                                                 \
 262     } while (0)
 263
 264 #define TO_QUEUE_ELEM_INIT(el) APR_RING_ELEM_INIT(el, timeout_list)
 265
 266 /*
 267  * The pollset for sockets that are in any of the timeout queues. Currently
 268  * we use the timeout_mutex to make sure that connections are added/removed
 269  * atomically to/from both event_pollset and a timeout queue. Otherwise
 270  * some confusion can happen under high load if timeout queues and pollset
 271  * get out of sync.
 272  * XXX: It should be possible to make the lock unnecessary in many or even all
 273  * XXX: cases.
 274  */
 275 static apr_pollset_t *event_pollset;
 276
 277 #if HAVE_SERF
 278 typedef struct {
 279     apr_pollset_t *pollset;
 280     apr_pool_t *pool;
 281 } s_baton_t;
 282
 283 static serf_context_t *g_serf;
 284 #endif
 285
 286 /* The structure used to pass unique initialization info to each thread */
 287 typedef struct
 288 {
 289     int pid;
 290     int tid;
 291     int sd;
 292 } proc_info;
 293
 294 /* Structure used to pass information to the thread responsible for
 295  * creating the rest of the threads.
 296  */
 297 typedef struct
 298 {
 299     apr_thread_t **threads;
 300     apr_thread_t *listener;
 301     int child_num_arg;
 302     apr_threadattr_t *threadattr;
 303 } thread_starter;
 304
 305 typedef enum
 306 {
 307     PT_CSD,
 308     PT_ACCEPT
 309 #if HAVE_SERF
 310     , PT_SERF
 311 #endif
 312     , PT_USER
 313 } poll_type_e;
 314
 315 typedef struct
 316 {
 317     poll_type_e type;
 318     void *baton;
 319 } listener_poll_type;
 320
 321 typedef struct
 322 {
 323  ap_mpm_socket_callback_fn_t *cbfunc;
 324  void *user_baton;
 325  apr_pollfd_t **pfds;
 326  int nsock;
 327  timer_event_t *cancel_event;    /* If a timeout was requested, a pointer to the timer event */
 328  unsigned int signaled:1;
 329 } socket_callback_baton_t;
 330
 331 /* data retained by event across load/unload of the module
 332  * allocated on first call to pre-config hook; located on
 333  * subsequent calls to pre-config hook
 334  */
 335 typedef struct event_retained_data {
 336     int first_server_limit;
 337     int first_thread_limit;
 338     int module_loads;
 339     int sick_child_detected;
 340     ap_generation_t my_generation;
 341     int volatile is_graceful; /* set from signal handler */
 342     int maxclients_reported;
 343     /*
 344      * The max child slot ever assigned, preserved across restarts.  Necessary
 345      * to deal with MaxRequestWorkers changes across AP_SIG_GRACEFUL restarts.
 346      * We use this value to optimize routines that have to scan the entire
 347      * scoreboard.
 348      */
 349     int max_daemons_limit;
 350     /*
 351      * idle_spawn_rate is the number of children that will be spawned on the
 352      * next maintenance cycle if there aren't enough idle servers.  It is
 353      * doubled up to MAX_SPAWN_RATE, and reset only when a cycle goes by
 354      * without the need to spawn.
 355      */
 356     int *idle_spawn_rate;
 357 #ifndef MAX_SPAWN_RATE
 358 #define MAX_SPAWN_RATE        (32)
 359 #endif
 360     int hold_off_on_exponential_spawning;
 361 } event_retained_data;
 362 static event_retained_data *retained;
 363
 364 #define ID_FROM_CHILD_THREAD(c, t)    ((c * thread_limit) + t)
 365
 366 static ap_pod_t **pod;
 367 static ap_pod_t *child_pod;
 368 static ap_listen_rec *child_listen;
 369 static int *bucket;    /* bucket array for the httpd child processes */
 370
 371 /* The event MPM respects a couple of runtime flags that can aid
 372  * in debugging. Setting the -DNO_DETACH flag will prevent the root process
 373  * from detaching from its controlling terminal. Additionally, setting
 374  * the -DONE_PROCESS flag (which implies -DNO_DETACH) will get you the
 375  * child_main loop running in the process which originally started up.
 376  * This gives you a pretty nice debugging environment.  (You'll get a SIGHUP
 377  * early in standalone_main; just continue through.  This is the server
 378  * trying to kill off any child processes which it might have lying
 379  * around --- Apache doesn't keep track of their pids, it just sends
 380  * SIGHUP to the process group, ignoring it in the root process.
 381  * Continue through and you'll be fine.).
 382  */
 383
 384 static int one_process = 0;
 385
 386 #ifdef DEBUG_SIGSTOP
 387 int raise_sigstop_flags;
 388 #endif
 389
 390 static apr_pool_t *pconf;       /* Pool for config stuff */
 391 static apr_pool_t *pchild;      /* Pool for httpd child stuff */
 392
 393 static pid_t ap_my_pid;         /* Linux getpid() doesn't work except in main
 394                                    thread. Use this instead */
 395 static pid_t parent_pid;
 396 static apr_os_thread_t *listener_os_thread;
 397
 398 /* The LISTENER_SIGNAL signal will be sent from the main thread to the
 399  * listener thread to wake it up for graceful termination (what a child
 400  * process from an old generation does when the admin does "apachectl
 401  * graceful").  This signal will be blocked in all threads of a child
 402  * process except for the listener thread.
 403  */
 404 #define LISTENER_SIGNAL     SIGHUP
 405
 406 /* An array of socket descriptors in use by each thread used to
 407  * perform a non-graceful (forced) shutdown of the server.
 408  */
 409 static apr_socket_t **worker_sockets;
 410
 411 static void disable_listensocks(int process_slot)
 412 {
 413     int i;
 414     for (i = 0; i < num_listensocks; i++) {
 415         apr_pollset_remove(event_pollset, &listener_pollfd[i]);
 416     }
 417     ap_scoreboard_image->parent[process_slot].not_accepting = 1;
 418 }
 419
 420 static void enable_listensocks(int process_slot)
 421 {
 422     int i;
 423     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00457)
 424                  "Accepting new connections again: "
 425                  "%u active conns (%u lingering/%u clogged/%u suspended), "
 426                  "%u idle workers",
 427                  apr_atomic_read32(&connection_count),
 428                  apr_atomic_read32(&lingering_count),
 429                  apr_atomic_read32(&clogged_count),
 430                  apr_atomic_read32(&suspended_count),
 431                  ap_queue_info_get_idlers(worker_queue_info));
 432     for (i = 0; i < num_listensocks; i++)
 433         apr_pollset_add(event_pollset, &listener_pollfd[i]);
 434     /*
 435      * XXX: This is not yet optimal. If many workers suddenly become available,
 436      * XXX: the parent may kill some processes off too soon.
 437      */
 438     ap_scoreboard_image->parent[process_slot].not_accepting = 0;
 439 }
 440
 441 static void close_worker_sockets(void)
 442 {
 443     int i;
 444     for (i = 0; i < threads_per_child; i++) {
 445         if (worker_sockets[i]) {
 446             apr_socket_close(worker_sockets[i]);
 447             worker_sockets[i] = NULL;
 448         }
 449     }
 450 }
 451
 452 static void wakeup_listener(void)
 453 {
 454     listener_may_exit = 1;
 455     if (!listener_os_thread) {
 456         /* XXX there is an obscure path that this doesn't handle perfectly:
 457          *     right after listener thread is created but before
 458          *     listener_os_thread is set, the first worker thread hits an
 459          *     error and starts graceful termination
 460          */
 461         return;
 462     }
 463
 464     /* unblock the listener if it's waiting for a worker */
 465     ap_queue_info_term(worker_queue_info);
 466
 467     /*
 468      * we should just be able to "kill(ap_my_pid, LISTENER_SIGNAL)" on all
 469      * platforms and wake up the listener thread since it is the only thread
 470      * with SIGHUP unblocked, but that doesn't work on Linux
 471      */
 472 #ifdef HAVE_PTHREAD_KILL
 473     pthread_kill(*listener_os_thread, LISTENER_SIGNAL);
 474 #else
 475     kill(ap_my_pid, LISTENER_SIGNAL);
 476 #endif
 477 }
 478
 479 #define ST_INIT              0
 480 #define ST_GRACEFUL          1
 481 #define ST_UNGRACEFUL        2
 482
 483 static int terminate_mode = ST_INIT;
 484
 485 static void signal_threads(int mode)
 486 {
 487     if (terminate_mode == mode) {
 488         return;
 489     }
 490     terminate_mode = mode;
 491     mpm_state = AP_MPMQ_STOPPING;
 492
 493     /* in case we weren't called from the listener thread, wake up the
 494      * listener thread
 495      */
 496     wakeup_listener();
 497
 498     /* for ungraceful termination, let the workers exit now;
 499      * for graceful termination, the listener thread will notify the
 500      * workers to exit once it has stopped accepting new connections
 501      */
 502     if (mode == ST_UNGRACEFUL) {
 503         workers_may_exit = 1;
 504         ap_queue_interrupt_all(worker_queue);
 505         close_worker_sockets(); /* forcefully kill all current connections */
 506     }
 507 }
 508
 509 static int event_query(int query_code, int *result, apr_status_t *rv)
 510 {
 511     *rv = APR_SUCCESS;
 512     switch (query_code) {
 513     case AP_MPMQ_MAX_DAEMON_USED:
 514         *result = retained->max_daemons_limit;
 515         break;
 516     case AP_MPMQ_IS_THREADED:
 517         *result = AP_MPMQ_STATIC;
 518         break;
 519     case AP_MPMQ_IS_FORKED:
 520         *result = AP_MPMQ_DYNAMIC;
 521         break;
 522     case AP_MPMQ_IS_ASYNC:
 523         *result = 1;
 524         break;
 525     case AP_MPMQ_HAS_SERF:
 526         *result = 1;
 527         break;
 528     case AP_MPMQ_HARD_LIMIT_DAEMONS:
 529         *result = server_limit;
 530         break;
 531     case AP_MPMQ_HARD_LIMIT_THREADS:
 532         *result = thread_limit;
 533         break;
 534     case AP_MPMQ_MAX_THREADS:
 535         *result = threads_per_child;
 536         break;
 537     case AP_MPMQ_MIN_SPARE_DAEMONS:
 538         *result = 0;
 539         break;
 540     case AP_MPMQ_MIN_SPARE_THREADS:
 541         *result = min_spare_threads;
 542         break;
 543     case AP_MPMQ_MAX_SPARE_DAEMONS:
 544         *result = 0;
 545         break;
 546     case AP_MPMQ_MAX_SPARE_THREADS:
 547         *result = max_spare_threads;
 548         break;
 549     case AP_MPMQ_MAX_REQUESTS_DAEMON:
 550         *result = ap_max_requests_per_child;
 551         break;
 552     case AP_MPMQ_MAX_DAEMONS:
 553         *result = ap_daemons_limit;
 554         break;
 555     case AP_MPMQ_MPM_STATE:
 556         *result = mpm_state;
 557         break;
 558     case AP_MPMQ_GENERATION:
 559         *result = retained->my_generation;
 560         break;
 561     case AP_MPMQ_CAN_SUSPEND:
 562         *result = 1;
 563         break;
 564     default:
 565         *rv = APR_ENOTIMPL;
 566         break;
 567     }
 568     return OK;
 569 }
 570
 571 static void event_note_child_killed(int childnum, pid_t pid, ap_generation_t gen)
 572 {
 573     if (childnum != -1) { /* child had a scoreboard slot? */
 574         ap_run_child_status(ap_server_conf,
 575                             ap_scoreboard_image->parent[childnum].pid,
 576                             ap_scoreboard_image->parent[childnum].generation,
 577                             childnum, MPM_CHILD_EXITED);
 578         ap_scoreboard_image->parent[childnum].pid = 0;
 579     }
 580     else {
 581         ap_run_child_status(ap_server_conf, pid, gen, -1, MPM_CHILD_EXITED);
 582     }
 583 }
 584
 585 static void event_note_child_started(int slot, pid_t pid)
 586 {
 587     ap_scoreboard_image->parent[slot].pid = pid;
 588     ap_run_child_status(ap_server_conf,
 589                         ap_scoreboard_image->parent[slot].pid,
 590                         retained->my_generation, slot, MPM_CHILD_STARTED);
 591 }
 592
 593 static void event_note_child_lost_slot(int slot, pid_t newpid)
 594 {
 595     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00458)
 596                  "pid %" APR_PID_T_FMT " taking over scoreboard slot from "
 597                  "%" APR_PID_T_FMT "%s",
 598                  newpid,
 599                  ap_scoreboard_image->parent[slot].pid,
 600                  ap_scoreboard_image->parent[slot].quiescing ?
 601                  " (quiescing)" : "");
 602     ap_run_child_status(ap_server_conf,
 603                         ap_scoreboard_image->parent[slot].pid,
 604                         ap_scoreboard_image->parent[slot].generation,
 605                         slot, MPM_CHILD_LOST_SLOT);
 606     /* Don't forget about this exiting child process, or we
 607      * won't be able to kill it if it doesn't exit by the
 608      * time the server is shut down.
 609      */
 610     ap_register_extra_mpm_process(ap_scoreboard_image->parent[slot].pid,
 611                                   ap_scoreboard_image->parent[slot].generation);
 612 }
 613
 614 static const char *event_get_name(void)
 615 {
 616     return "event";
 617 }
 618
 619 /* a clean exit from a child with proper cleanup */
 620 static void clean_child_exit(int code) __attribute__ ((noreturn));
 621 static void clean_child_exit(int code)
 622 {
 623     mpm_state = AP_MPMQ_STOPPING;
 624     if (pchild) {
 625         apr_pool_destroy(pchild);
 626     }
 627
 628     if (one_process) {
 629         event_note_child_killed(/* slot */ 0, 0, 0);
 630     }
 631
 632     exit(code);
 633 }
 634
 635 static void just_die(int sig)
 636 {
 637     clean_child_exit(0);
 638 }
 639
 640 /*****************************************************************
 641  * Connection structures and accounting...
 642  */
 643
 644 static int child_fatal;
 645
 646 /* volatile because they're updated from a signal handler */
 647 static int volatile shutdown_pending;
 648 static int volatile restart_pending;
 649
 650 static apr_status_t decrement_connection_count(void *cs_)
 651 {
 652     event_conn_state_t *cs = cs_;
 653     switch (cs->pub.state) {
 654         case CONN_STATE_LINGER_NORMAL:
 655         case CONN_STATE_LINGER_SHORT:
 656             apr_atomic_dec32(&lingering_count);
 657             break;
 658         case CONN_STATE_SUSPENDED:
 659             apr_atomic_dec32(&suspended_count);
 660             break;
 661         default:
 662             break;
 663     }
 664     apr_atomic_dec32(&connection_count);
 665     return APR_SUCCESS;
 666 }
 667
 668 /*
 669  * ap_start_shutdown() and ap_start_restart(), below, are a first stab at
 670  * functions to initiate shutdown or restart without relying on signals.
 671  * Previously this was initiated in sig_term() and restart() signal handlers,
 672  * but we want to be able to start a shutdown/restart from other sources --
 673  * e.g. on Win32, from the service manager. Now the service manager can
 674  * call ap_start_shutdown() or ap_start_restart() as appropriate.  Note that
 675  * these functions can also be called by the child processes, since global
 676  * variables are no longer used to pass on the required action to the parent.
 677  *
 678  * These should only be called from the parent process itself, since the
 679  * parent process will use the shutdown_pending and restart_pending variables
 680  * to determine whether to shutdown or restart. The child process should
 681  * call signal_parent() directly to tell the parent to die -- this will
 682  * cause neither of those variable to be set, which the parent will
 683  * assume means something serious is wrong (which it will be, for the
 684  * child to force an exit) and so do an exit anyway.
 685  */
 686
 687 static void ap_start_shutdown(int graceful)
 688 {
 689     mpm_state = AP_MPMQ_STOPPING;
 690     if (shutdown_pending == 1) {
 691         /* Um, is this _probably_ not an error, if the user has
 692          * tried to do a shutdown twice quickly, so we won't
 693          * worry about reporting it.
 694          */
 695         return;
 696     }
 697     shutdown_pending = 1;
 698     retained->is_graceful = graceful;
 699 }
 700
 701 /* do a graceful restart if graceful == 1 */
 702 static void ap_start_restart(int graceful)
 703 {
 704     mpm_state = AP_MPMQ_STOPPING;
 705     if (restart_pending == 1) {
 706         /* Probably not an error - don't bother reporting it */
 707         return;
 708     }
 709     restart_pending = 1;
 710     retained->is_graceful = graceful;
 711 }
 712
 713 static void sig_term(int sig)
 714 {
 715     ap_start_shutdown(sig == AP_SIG_GRACEFUL_STOP);
 716 }
 717
 718 static void restart(int sig)
 719 {
 720     ap_start_restart(sig == AP_SIG_GRACEFUL);
 721 }
 722
 723 static void set_signals(void)
 724 {
 725 #ifndef NO_USE_SIGACTION
 726     struct sigaction sa;
 727 #endif
 728
 729     if (!one_process) {
 730         ap_fatal_signal_setup(ap_server_conf, pconf);
 731     }
 732
 733 #ifndef NO_USE_SIGACTION
 734     sigemptyset(&sa.sa_mask);
 735     sa.sa_flags = 0;
 736
 737     sa.sa_handler = sig_term;
 738     if (sigaction(SIGTERM, &sa, NULL) < 0)
 739         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf, APLOGNO(00459)
 740                      "sigaction(SIGTERM)");
 741 #ifdef AP_SIG_GRACEFUL_STOP
 742     if (sigaction(AP_SIG_GRACEFUL_STOP, &sa, NULL) < 0)
 743         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf, APLOGNO(00460)
 744                      "sigaction(" AP_SIG_GRACEFUL_STOP_STRING ")");
 745 #endif
 746 #ifdef SIGINT
 747     if (sigaction(SIGINT, &sa, NULL) < 0)
 748         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf, APLOGNO(00461)
 749                      "sigaction(SIGINT)");
 750 #endif
 751 #ifdef SIGXCPU
 752     sa.sa_handler = SIG_DFL;
 753     if (sigaction(SIGXCPU, &sa, NULL) < 0)
 754         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf, APLOGNO(00462)
 755                      "sigaction(SIGXCPU)");
 756 #endif
 757 #ifdef SIGXFSZ
 758     /* For systems following the LFS standard, ignoring SIGXFSZ allows
 759      * a write() beyond the 2GB limit to fail gracefully with E2BIG
 760      * rather than terminate the process. */
 761     sa.sa_handler = SIG_IGN;
 762     if (sigaction(SIGXFSZ, &sa, NULL) < 0)
 763         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf, APLOGNO(00463)
 764                      "sigaction(SIGXFSZ)");
 765 #endif
 766 #ifdef SIGPIPE
 767     sa.sa_handler = SIG_IGN;
 768     if (sigaction(SIGPIPE, &sa, NULL) < 0)
 769         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf, APLOGNO(00464)
 770                      "sigaction(SIGPIPE)");
 771 #endif
 772
 773     /* we want to ignore HUPs and AP_SIG_GRACEFUL while we're busy
 774      * processing one */
 775     sigaddset(&sa.sa_mask, SIGHUP);
 776     sigaddset(&sa.sa_mask, AP_SIG_GRACEFUL);
 777     sa.sa_handler = restart;
 778     if (sigaction(SIGHUP, &sa, NULL) < 0)
 779         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf, APLOGNO(00465)
 780                      "sigaction(SIGHUP)");
 781     if (sigaction(AP_SIG_GRACEFUL, &sa, NULL) < 0)
 782         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf, APLOGNO(00466)
 783                      "sigaction(" AP_SIG_GRACEFUL_STRING ")");
 784 #else
 785     if (!one_process) {
 786 #ifdef SIGXCPU
 787         apr_signal(SIGXCPU, SIG_DFL);
 788 #endif /* SIGXCPU */
 789 #ifdef SIGXFSZ
 790         apr_signal(SIGXFSZ, SIG_IGN);
 791 #endif /* SIGXFSZ */
 792     }
 793
 794     apr_signal(SIGTERM, sig_term);
 795 #ifdef SIGHUP
 796     apr_signal(SIGHUP, restart);
 797 #endif /* SIGHUP */
 798 #ifdef AP_SIG_GRACEFUL
 799     apr_signal(AP_SIG_GRACEFUL, restart);
 800 #endif /* AP_SIG_GRACEFUL */
 801 #ifdef AP_SIG_GRACEFUL_STOP
 802      apr_signal(AP_SIG_GRACEFUL_STOP, sig_term);
 803 #endif /* AP_SIG_GRACEFUL_STOP */
 804 #ifdef SIGPIPE
 805     apr_signal(SIGPIPE, SIG_IGN);
 806 #endif /* SIGPIPE */
 807
 808 #endif
 809 }
 810
 811 static int start_lingering_close_common(event_conn_state_t *cs)
 812 {
 813     apr_status_t rv;
 814     struct timeout_queue *q;
 815     apr_socket_t *csd = cs->pfd.desc.s;
 816 #ifdef AP_DEBUG
 817     {
 818         rv = apr_socket_timeout_set(csd, 0);
 819         AP_DEBUG_ASSERT(rv == APR_SUCCESS);
 820     }
 821 #else
 822     apr_socket_timeout_set(csd, 0);
 823 #endif
 824     /*
 825      * If some module requested a shortened waiting period, only wait for
 826      * 2s (SECONDS_TO_LINGER). This is useful for mitigating certain
 827      * DoS attacks.
 828      */
 829     if (apr_table_get(cs->c->notes, "short-lingering-close")) {
 830         cs->expiration_time =
 831             apr_time_now() + apr_time_from_sec(SECONDS_TO_LINGER);
 832         q = &short_linger_q;
 833         cs->pub.state = CONN_STATE_LINGER_SHORT;
 834     }
 835     else {
 836         cs->expiration_time =
 837             apr_time_now() + apr_time_from_sec(MAX_SECS_TO_LINGER);
 838         q = &linger_q;
 839         cs->pub.state = CONN_STATE_LINGER_NORMAL;
 840     }
 841     apr_atomic_inc32(&lingering_count);
 842     apr_thread_mutex_lock(timeout_mutex);
 843     TO_QUEUE_APPEND(*q, cs);
 844     cs->pfd.reqevents = (
 845             cs->pub.sense == CONN_SENSE_WANT_WRITE ? APR_POLLOUT :
 846                     APR_POLLIN) | APR_POLLHUP | APR_POLLERR;
 847     cs->pub.sense = CONN_SENSE_DEFAULT;
 848     rv = apr_pollset_add(event_pollset, &cs->pfd);
 849     apr_thread_mutex_unlock(timeout_mutex);
 850     if (rv != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rv)) {
 851         ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf,
 852                      "start_lingering_close: apr_pollset_add failure");
 853         apr_thread_mutex_lock(timeout_mutex);
 854         TO_QUEUE_REMOVE(*q, cs);
 855         apr_thread_mutex_unlock(timeout_mutex);
 856         apr_socket_close(cs->pfd.desc.s);
 857         apr_pool_clear(cs->p);
 858         ap_push_pool(worker_queue_info, cs->p);
 859         return 0;
 860     }
 861     return 1;
 862 }
 863
 864 /*
 865  * Close our side of the connection, flushing data to the client first.
 866  * Pre-condition: cs is not in any timeout queue and not in the pollset,
 867  *                timeout_mutex is not locked
 868  * return: 0 if connection is fully closed,
 869  *         1 if connection is lingering
 870  * May only be called by worker thread.
 871  */
 872 static int start_lingering_close_blocking(event_conn_state_t *cs)
 873 {
 874     if (ap_start_lingering_close(cs->c)) {
 875         apr_pool_clear(cs->p);
 876         ap_push_pool(worker_queue_info, cs->p);
 877         return 0;
 878     }
 879     return start_lingering_close_common(cs);
 880 }
 881
 882 /*
 883  * Close our side of the connection, NOT flushing data to the client.
 884  * This should only be called if there has been an error or if we know
 885  * that our send buffers are empty.
 886  * Pre-condition: cs is not in any timeout queue and not in the pollset,
 887  *                timeout_mutex is not locked
 888  * return: 0 if connection is fully closed,
 889  *         1 if connection is lingering
 890  * may be called by listener thread
 891  */
 892 static int start_lingering_close_nonblocking(event_conn_state_t *cs)
 893 {
 894     conn_rec *c = cs->c;
 895     apr_socket_t *csd = cs->pfd.desc.s;
 896
 897     if (c->aborted
 898         || ap_shutdown_conn(c, 0) != APR_SUCCESS || c->aborted
 899         || apr_socket_shutdown(csd, APR_SHUTDOWN_WRITE) != APR_SUCCESS) {
 900         apr_socket_close(csd);
 901         apr_pool_clear(cs->p);
 902         ap_push_pool(worker_queue_info, cs->p);
 903         return 0;
 904     }
 905     return start_lingering_close_common(cs);
 906 }
 907
 908 /*
 909  * forcibly close a lingering connection after the lingering period has
 910  * expired
 911  * Pre-condition: cs is not in any timeout queue and not in the pollset
 912  * return: irrelevant (need same prototype as start_lingering_close)
 913  */
 914 static int stop_lingering_close(event_conn_state_t *cs)
 915 {
 916     apr_status_t rv;
 917     apr_socket_t *csd = ap_get_conn_socket(cs->c);
 918     ap_log_error(APLOG_MARK, APLOG_TRACE4, 0, ap_server_conf,
 919                  "socket reached timeout in lingering-close state");
 920     rv = apr_socket_close(csd);
 921     if (rv != APR_SUCCESS) {
 922         ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(00468) "error closing socket");
 923         AP_DEBUG_ASSERT(0);
 924     }
 925     apr_pool_clear(cs->p);
 926     ap_push_pool(worker_queue_info, cs->p);
 927     return 0;
 928 }
 929
 930 static void notify_suspend(event_conn_state_t *cs)
 931 {
 932     ap_run_suspend_connection(cs->c, cs->r);
 933     cs->suspended = 1;
 934 }
 935
 936 static void notify_resume(event_conn_state_t *cs)
 937 {
 938     cs->suspended = 0;
 939     ap_run_resume_connection(cs->c, cs->r);
 940 }
 941
 942 /*
 943  * This runs before any non-MPM cleanup code on the connection;
 944  * if the connection is currently suspended as far as modules
 945  * know, provide notification of resumption.
 946  */
 947 static apr_status_t ptrans_pre_cleanup(void *dummy)
 948 {
 949     event_conn_state_t *cs = dummy;
 950
 951     if (cs->suspended) {
 952         notify_resume(cs);
 953     }
 954     return APR_SUCCESS;
 955 }
 956
 957 /*
 958  * event_pre_read_request() and event_request_cleanup() track the
 959  * current r for a given connection.
 960  */
 961 static apr_status_t event_request_cleanup(void *dummy)
 962 {
 963     conn_rec *c = dummy;
 964     event_conn_state_t *cs = ap_get_module_config(c->conn_config,
 965                                                   &mpm_event_module);
 966
 967     cs->r = NULL;
 968     return APR_SUCCESS;
 969 }
 970
 971 static void event_pre_read_request(request_rec *r, conn_rec *c)
 972 {
 973     event_conn_state_t *cs = ap_get_module_config(c->conn_config,
 974                                                   &mpm_event_module);
 975
 976     cs->r = r;
 977     apr_pool_cleanup_register(r->pool, c, event_request_cleanup,
 978                               apr_pool_cleanup_null);
 979 }
 980
 981 /*
 982  * process one connection in the worker
 983  */
 984 static void process_socket(apr_thread_t *thd, apr_pool_t * p, apr_socket_t * sock,
 985                           event_conn_state_t * cs, int my_child_num,
 986                           int my_thread_num)
 987 {
 988     conn_rec *c;
 989     long conn_id = ID_FROM_CHILD_THREAD(my_child_num, my_thread_num);
 990     int rc;
 991     ap_sb_handle_t *sbh;
 992
 993     /* XXX: This will cause unbounded mem usage for long lasting connections */
 994     ap_create_sb_handle(&sbh, p, my_child_num, my_thread_num);
 995
 996     if (cs == NULL) {           /* This is a new connection */
 997         listener_poll_type *pt = apr_pcalloc(p, sizeof(*pt));
 998         cs = apr_pcalloc(p, sizeof(event_conn_state_t));
 999         cs->bucket_alloc = apr_bucket_alloc_create(p);
1000         c = ap_run_create_connection(p, ap_server_conf, sock,
1001                                      conn_id, sbh, cs->bucket_alloc);
1002         if (!c) {
1003             apr_bucket_alloc_destroy(cs->bucket_alloc);
1004             apr_pool_clear(p);
1005             ap_push_pool(worker_queue_info, p);
1006             return;
1007         }
1008         apr_atomic_inc32(&connection_count);
1009         apr_pool_cleanup_register(c->pool, cs, decrement_connection_count,
1010                                   apr_pool_cleanup_null);
1011         ap_set_module_config(c->conn_config, &mpm_event_module, cs);
1012         c->current_thread = thd;
1013         cs->c = c;
1014         c->cs = &(cs->pub);
1015         cs->p = p;
1016         cs->pfd.desc_type = APR_POLL_SOCKET;
1017         cs->pfd.reqevents = APR_POLLIN;
1018         cs->pfd.desc.s = sock;
1019         pt->type = PT_CSD;
1020         pt->baton = cs;
1021         cs->pfd.client_data = pt;
1022         apr_pool_pre_cleanup_register(p, cs, ptrans_pre_cleanup);
1023         TO_QUEUE_ELEM_INIT(cs);
1024
1025         ap_update_vhost_given_ip(c);
1026
1027         rc = ap_run_pre_connection(c, sock);
1028         if (rc != OK && rc != DONE) {
1029             ap_log_cerror(APLOG_MARK, APLOG_DEBUG, 0, c, APLOGNO(00469)
1030                           "process_socket: connection aborted");
1031             c->aborted = 1;
1032         }
1033
1034         /**
1035          * XXX If the platform does not have a usable way of bundling
1036          * accept() with a socket readability check, like Win32,
1037          * and there are measurable delays before the
1038          * socket is readable due to the first data packet arriving,
1039          * it might be better to create the cs on the listener thread
1040          * with the state set to CONN_STATE_CHECK_REQUEST_LINE_READABLE
1041          *
1042          * FreeBSD users will want to enable the HTTP accept filter
1043          * module in their kernel for the highest performance
1044          * When the accept filter is active, sockets are kept in the
1045          * kernel until a HTTP request is received.
1046          */
1047         cs->pub.state = CONN_STATE_READ_REQUEST_LINE;
1048
1049         cs->pub.sense = CONN_SENSE_DEFAULT;
1050     }
1051     else {
1052         c = cs->c;
1053         c->sbh = sbh;
1054         notify_resume(cs);
1055         c->current_thread = thd;
1056     }
1057
1058     if (c->clogging_input_filters && !c->aborted) {
1059         /* Since we have an input filter which 'clogs' the input stream,
1060          * like mod_ssl used to, lets just do the normal read from input
1061          * filters, like the Worker MPM does. Filters that need to write
1062          * where they would otherwise read, or read where they would
1063          * otherwise write, should set the sense appropriately.
1064          */
1065         apr_atomic_inc32(&clogged_count);
1066         ap_run_process_connection(c);
1067         if (cs->pub.state != CONN_STATE_SUSPENDED) {
1068             cs->pub.state = CONN_STATE_LINGER;
1069         }
1070         apr_atomic_dec32(&clogged_count);
1071     }
1072
1073 read_request:
1074     if (cs->pub.state == CONN_STATE_READ_REQUEST_LINE) {
1075         if (!c->aborted) {
1076             ap_run_process_connection(c);
1077
1078             /* state will be updated upon return
1079              * fall thru to either wait for readability/timeout or
1080              * do lingering close
1081              */
1082         }
1083         else {
1084             cs->pub.state = CONN_STATE_LINGER;
1085         }
1086     }
1087
1088     if (cs->pub.state == CONN_STATE_WRITE_COMPLETION) {
1089         ap_filter_t *output_filter = c->output_filters;
1090         apr_status_t rv;
1091         ap_update_child_status_from_conn(sbh, SERVER_BUSY_WRITE, c);
1092         while (output_filter->next != NULL) {
1093             output_filter = output_filter->next;
1094         }
1095         rv = output_filter->frec->filter_func.out_func(output_filter, NULL);
1096         if (rv != APR_SUCCESS) {
1097             ap_log_cerror(APLOG_MARK, APLOG_DEBUG, rv, c, APLOGNO(00470)
1098                           "network write failure in core output filter");
1099             cs->pub.state = CONN_STATE_LINGER;
1100         }
1101         else if (c->data_in_output_filters) {
1102             /* Still in WRITE_COMPLETION_STATE:
1103              * Set a write timeout for this connection, and let the
1104              * event thread poll for writeability.
1105              */
1106             cs->expiration_time = ap_server_conf->timeout + apr_time_now();
1107             c->sbh = NULL;
1108             notify_suspend(cs);
1109             apr_thread_mutex_lock(timeout_mutex);
1110             TO_QUEUE_APPEND(write_completion_q, cs);
1111             cs->pfd.reqevents = (
1112                     cs->pub.sense == CONN_SENSE_WANT_READ ? APR_POLLIN :
1113                             APR_POLLOUT) | APR_POLLHUP | APR_POLLERR;
1114             cs->pub.sense = CONN_SENSE_DEFAULT;
1115             rc = apr_pollset_add(event_pollset, &cs->pfd);
1116             apr_thread_mutex_unlock(timeout_mutex);
1117             return;
1118         }
1119         else if (c->keepalive != AP_CONN_KEEPALIVE || c->aborted ||
1120             listener_may_exit) {
1121             cs->pub.state = CONN_STATE_LINGER;
1122         }
1123         else if (c->data_in_input_filters) {
1124             cs->pub.state = CONN_STATE_READ_REQUEST_LINE;
1125             goto read_request;
1126         }
1127         else {
1128             cs->pub.state = CONN_STATE_CHECK_REQUEST_LINE_READABLE;
1129         }
1130     }
1131
1132     if (cs->pub.state == CONN_STATE_LINGER) {
1133         if (!start_lingering_close_blocking(cs)) {
1134             c->sbh = NULL;
1135             notify_suspend(cs);
1136             return;
1137         }
1138     }
1139     else if (cs->pub.state == CONN_STATE_CHECK_REQUEST_LINE_READABLE) {
1140         /* It greatly simplifies the logic to use a single timeout value here
1141          * because the new element can just be added to the end of the list and
1142          * it will stay sorted in expiration time sequence.  If brand new
1143          * sockets are sent to the event thread for a readability check, this
1144          * will be a slight behavior change - they use the non-keepalive
1145          * timeout today.  With a normal client, the socket will be readable in
1146          * a few milliseconds anyway.
1147          */
1148         cs->expiration_time = ap_server_conf->keep_alive_timeout +
1149                               apr_time_now();
1150         c->sbh = NULL;
1151         notify_suspend(cs);
1152         apr_thread_mutex_lock(timeout_mutex);
1153         TO_QUEUE_APPEND(keepalive_q, cs);
1154
1155         /* Add work to pollset. */
1156         cs->pfd.reqevents = APR_POLLIN;
1157         rc = apr_pollset_add(event_pollset, &cs->pfd);
1158         apr_thread_mutex_unlock(timeout_mutex);
1159
1160         if (rc != APR_SUCCESS) {
1161             ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
1162                          "process_socket: apr_pollset_add failure");
1163             AP_DEBUG_ASSERT(rc == APR_SUCCESS);
1164         }
1165         return;
1166     }
1167     else if (cs->pub.state == CONN_STATE_SUSPENDED) {
1168         cs->c->suspended_baton = cs;
1169         apr_atomic_inc32(&suspended_count);
1170     }
1171     /*
1172      * Prevent this connection from writing to our connection state after it
1173      * is no longer associated with this thread. This would happen if the EOR
1174      * bucket is destroyed from the listener thread due to a connection abort
1175      * or timeout.
1176      */
1177     c->sbh = NULL;
1178     notify_suspend(cs);
1179     return;
1180 }
1181
1182 /* Put a SUSPENDED connection back into a queue. */
1183 static apr_status_t event_resume_suspended (conn_rec *c) {
1184     event_conn_state_t* cs = (event_conn_state_t*) c->suspended_baton;
1185     if (cs == NULL) {
1186         ap_log_cerror (APLOG_MARK, LOG_WARNING, 0, c, APLOGNO(02615)
1187                 "event_resume_suspended: suspended_baton is NULL");
1188         return APR_EGENERAL;
1189     } else if (!cs->suspended) {
1190         ap_log_cerror (APLOG_MARK, LOG_WARNING, 0, c, APLOGNO(02616)
1191                 "event_resume_suspended: Thread isn't suspended");
1192         return APR_EGENERAL;
1193     }
1194     apr_atomic_dec32(&suspended_count);
1195     c->suspended_baton = NULL;
1196
1197     apr_thread_mutex_lock(timeout_mutex);
1198     TO_QUEUE_APPEND(write_completion_q, cs);
1199     cs->pfd.reqevents = (
1200             cs->pub.sense == CONN_SENSE_WANT_READ ? APR_POLLIN :
1201                     APR_POLLOUT) | APR_POLLHUP | APR_POLLERR;
1202     cs->pub.sense = CONN_SENSE_DEFAULT;
1203     apr_pollset_add(event_pollset, &cs->pfd);
1204     apr_thread_mutex_unlock(timeout_mutex);
1205
1206     return OK;
1207 }
1208
1209 /* conns_this_child has gone to zero or below.  See if the admin coded
1210    "MaxConnectionsPerChild 0", and keep going in that case.  Doing it this way
1211    simplifies the hot path in worker_thread */
1212 static void check_infinite_requests(void)
1213 {
1214     if (ap_max_requests_per_child) {
1215         ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
1216                      "Stopping process due to MaxConnectionsPerChild");
1217         signal_threads(ST_GRACEFUL);
1218     }
1219     else {
1220         /* keep going */
1221         conns_this_child = APR_INT32_MAX;
1222     }
1223 }
1224
1225 static void close_listeners(int process_slot, int *closed) {
1226     if (!*closed) {
1227         int i;
1228         disable_listensocks(process_slot);
1229         ap_close_listeners();
1230         *closed = 1;
1231         dying = 1;
1232         ap_scoreboard_image->parent[process_slot].quiescing = 1;
1233         for (i = 0; i < threads_per_child; ++i) {
1234             ap_update_child_status_from_indexes(process_slot, i,
1235                                                 SERVER_GRACEFUL, NULL);
1236         }
1237         /* wake up the main thread */
1238         kill(ap_my_pid, SIGTERM);
1239     }
1240 }
1241
1242 static void unblock_signal(int sig)
1243 {
1244     sigset_t sig_mask;
1245
1246     sigemptyset(&sig_mask);
1247     sigaddset(&sig_mask, sig);
1248 #if defined(SIGPROCMASK_SETS_THREAD_MASK)
1249     sigprocmask(SIG_UNBLOCK, &sig_mask, NULL);
1250 #else
1251     pthread_sigmask(SIG_UNBLOCK, &sig_mask, NULL);
1252 #endif
1253 }
1254
1255 static void dummy_signal_handler(int sig)
1256 {
1257     /* XXX If specifying SIG_IGN is guaranteed to unblock a syscall,
1258      *     then we don't need this goofy function.
1259      */
1260 }
1261
1262
1263 #if HAVE_SERF
1264 static apr_status_t s_socket_add(void *user_baton,
1265                                  apr_pollfd_t *pfd,
1266                                  void *serf_baton)
1267 {
1268     s_baton_t *s = (s_baton_t*)user_baton;
1269     /* XXXXX: recycle listener_poll_types */
1270     listener_poll_type *pt = ap_malloc(sizeof(*pt));
1271     pt->type = PT_SERF;
1272     pt->baton = serf_baton;
1273     pfd->client_data = pt;
1274     return apr_pollset_add(s->pollset, pfd);
1275 }
1276
1277 static apr_status_t s_socket_remove(void *user_baton,
1278                                     apr_pollfd_t *pfd,
1279                                     void *serf_baton)
1280 {
1281     s_baton_t *s = (s_baton_t*)user_baton;
1282     listener_poll_type *pt = pfd->client_data;
1283     free(pt);
1284     return apr_pollset_remove(s->pollset, pfd);
1285 }
1286 #endif
1287
1288 static apr_status_t init_pollset(apr_pool_t *p)
1289 {
1290 #if HAVE_SERF
1291     s_baton_t *baton = NULL;
1292 #endif
1293     ap_listen_rec *lr;
1294     listener_poll_type *pt;
1295     int i = 0;
1296
1297     TO_QUEUE_INIT(write_completion_q);
1298     TO_QUEUE_INIT(keepalive_q);
1299     TO_QUEUE_INIT(linger_q);
1300     TO_QUEUE_INIT(short_linger_q);
1301
1302     listener_pollfd = apr_palloc(p, sizeof(apr_pollfd_t) * num_listensocks);
1303     for (lr = child_listen; lr != NULL; lr = lr->next, i++) {
1304         apr_pollfd_t *pfd;
1305         AP_DEBUG_ASSERT(i < num_listensocks);
1306         pfd = &listener_pollfd[i];
1307         pt = apr_pcalloc(p, sizeof(*pt));
1308         pfd->desc_type = APR_POLL_SOCKET;
1309         pfd->desc.s = lr->sd;
1310         pfd->reqevents = APR_POLLIN;
1311
1312         pt->type = PT_ACCEPT;
1313         pt->baton = lr;
1314
1315         pfd->client_data = pt;
1316
1317         apr_socket_opt_set(pfd->desc.s, APR_SO_NONBLOCK, 1);
1318         apr_pollset_add(event_pollset, pfd);
1319
1320         lr->accept_func = ap_unixd_accept;
1321     }
1322
1323 #if HAVE_SERF
1324     baton = apr_pcalloc(p, sizeof(*baton));
1325     baton->pollset = event_pollset;
1326     /* TODO: subpools, threads, reuse, etc.  -- currently use malloc() inside :( */
1327     baton->pool = p;
1328
1329     g_serf = serf_context_create_ex(baton,
1330                                     s_socket_add,
1331                                     s_socket_remove, p);
1332
1333     ap_register_provider(p, "mpm_serf",
1334                          "instance", "0", g_serf);
1335
1336 #endif
1337
1338     return APR_SUCCESS;
1339 }
1340
1341 static apr_status_t push_timer2worker(timer_event_t* te)
1342 {
1343     return ap_queue_push_timer(worker_queue, te);
1344 }
1345
1346 /*
1347  * Pre-condition: pfd->cs is neither in pollset nor timeout queue
1348  * this function may only be called by the listener
1349  */
1350 static apr_status_t push2worker(const apr_pollfd_t * pfd,
1351                                 apr_pollset_t * pollset)
1352 {
1353     listener_poll_type *pt = (listener_poll_type *) pfd->client_data;
1354     event_conn_state_t *cs = (event_conn_state_t *) pt->baton;
1355     apr_status_t rc;
1356
1357     rc = ap_queue_push(worker_queue, cs->pfd.desc.s, cs, cs->p);
1358     if (rc != APR_SUCCESS) {
1359         /* trash the connection; we couldn't queue the connected
1360          * socket to a worker
1361          */
1362         apr_bucket_alloc_destroy(cs->bucket_alloc);
1363         apr_socket_close(cs->pfd.desc.s);
1364         ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1365                      ap_server_conf, APLOGNO(00471) "push2worker: ap_queue_push failed");
1366         apr_pool_clear(cs->p);
1367         ap_push_pool(worker_queue_info, cs->p);
1368     }
1369
1370     return rc;
1371 }
1372
1373 /* get_worker:
1374  *     If *have_idle_worker_p == 0, reserve a worker thread, and set
1375  *     *have_idle_worker_p = 1.
1376  *     If *have_idle_worker_p is already 1, will do nothing.
1377  *     If blocking == 1, block if all workers are currently busy.
1378  *     If no worker was available immediately, will set *all_busy to 1.
1379  *     XXX: If there are no workers, we should not block immediately but
1380  *     XXX: close all keep-alive connections first.
1381  */
1382 static void get_worker(int *have_idle_worker_p, int blocking, int *all_busy)
1383 {
1384     apr_status_t rc;
1385
1386     if (*have_idle_worker_p) {
1387         /* already reserved a worker thread - must have hit a
1388          * transient error on a previous pass
1389          */
1390         return;
1391     }
1392
1393     if (blocking)
1394         rc = ap_queue_info_wait_for_idler(worker_queue_info, all_busy);
1395     else
1396         rc = ap_queue_info_try_get_idler(worker_queue_info);
1397
1398     if (rc == APR_SUCCESS) {
1399         *have_idle_worker_p = 1;
1400     }
1401     else if (!blocking && rc == APR_EAGAIN) {
1402         *all_busy = 1;
1403     }
1404     else if (!APR_STATUS_IS_EOF(rc)) {
1405         ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf, APLOGNO(00472)
1406                      "ap_queue_info_wait_for_idler failed.  "
1407                      "Attempting to shutdown process gracefully");
1408         signal_threads(ST_GRACEFUL);
1409     }
1410 }
1411
1412 /* Structures to reuse */
1413 static APR_RING_HEAD(timer_free_ring_t, timer_event_t) timer_free_ring;
1414
1415 static apr_skiplist *timer_skiplist;
1416
1417 static int indexing_comp(void *a, void *b)
1418 {
1419     apr_time_t t1 = (apr_time_t) (((timer_event_t *) a)->when);
1420     apr_time_t t2 = (apr_time_t) (((timer_event_t *) b)->when);
1421     AP_DEBUG_ASSERT(t1);
1422     AP_DEBUG_ASSERT(t2);
1423     return ((t1 < t2) ? -1 : ((t1 > t2) ? 1 : 0));
1424 }
1425
1426 static int indexing_compk(void *ac, void *b)
1427 {
1428     apr_time_t *t1 = (apr_time_t *) ac;
1429     apr_time_t t2 = (apr_time_t) (((timer_event_t *) b)->when);
1430     AP_DEBUG_ASSERT(t2);
1431     return ((*t1 < t2) ? -1 : ((*t1 > t2) ? 1 : 0));
1432 }
1433
1434 static apr_thread_mutex_t *g_timer_skiplist_mtx;
1435
1436 static timer_event_t * event_get_timer_event(apr_time_t t,
1437                                              ap_mpm_callback_fn_t *cbfn,
1438                                              void *baton,
1439                                              int insert,
1440                                              apr_pollfd_t **remove)
1441 {
1442     timer_event_t *te;
1443     /* oh yeah, and make locking smarter/fine grained. */
1444
1445     apr_thread_mutex_lock(g_timer_skiplist_mtx);
1446
1447     if (!APR_RING_EMPTY(&timer_free_ring, timer_event_t, link)) {
1448         te = APR_RING_FIRST(&timer_free_ring);
1449         APR_RING_REMOVE(te, link);
1450     }
1451     else {
1452         te = apr_skiplist_alloc(timer_skiplist, sizeof(timer_event_t));
1453         APR_RING_ELEM_INIT(te, link);
1454     }
1455
1456     te->cbfunc = cbfn;
1457     te->baton = baton;
1458     te->canceled = 0;
1459     te->when = t;
1460     te->remove = remove;
1461
1462     if (insert) {
1463         /* Okay, insert sorted by when.. */
1464         apr_skiplist_insert(timer_skiplist, (void *)te);
1465     }
1466     apr_thread_mutex_unlock(g_timer_skiplist_mtx);
1467
1468     return te;
1469 }
1470
1471 static apr_status_t event_register_timed_callback_ex(apr_time_t t,
1472                                                   ap_mpm_callback_fn_t *cbfn,
1473                                                   void *baton,
1474                                                   apr_pollfd_t **remove)
1475 {
1476     event_get_timer_event(t + apr_time_now(), cbfn, baton, 1, remove);
1477     return APR_SUCCESS;
1478 }
1479
1480 static apr_status_t event_register_timed_callback(apr_time_t t,
1481                                                   ap_mpm_callback_fn_t *cbfn,
1482                                                   void *baton)
1483 {
1484     event_register_timed_callback_ex(t, cbfn, baton, NULL);
1485     return APR_SUCCESS;
1486 }
1487
1488 static apr_status_t event_register_socket_callback_ex(apr_socket_t **s,
1489                                                   apr_pool_t *p,
1490                                                   int for_read,
1491                                                   ap_mpm_socket_callback_fn_t *cbfn,
1492                                                   ap_mpm_callback_fn_t *tofn,
1493                                                   void *baton,
1494                                                   apr_time_t timeout)
1495 {
1496     apr_status_t rc, final_rc= APR_SUCCESS;
1497     int i = 0, nsock;
1498     socket_callback_baton_t *scb = apr_pcalloc(p, sizeof(*scb));
1499     listener_poll_type *pt = apr_palloc(p, sizeof(*pt));
1500     apr_pollfd_t **pfds = NULL;
1501
1502     while(s[i] != NULL) {
1503         i++;
1504     }
1505     nsock = i;
1506
1507     pfds = apr_pcalloc(p, (nsock+1) * sizeof(apr_pollfd_t*));
1508
1509     pt->type = PT_USER;
1510     pt->baton = scb;
1511
1512     scb->cbfunc = cbfn;
1513     scb->user_baton = baton;
1514     scb->nsock = nsock;
1515     scb->pfds = pfds;
1516
1517     for (i = 0; i<nsock; i++) {
1518         pfds[i] = apr_pcalloc(p, sizeof(apr_pollfd_t));
1519         pfds[i]->desc_type = APR_POLL_SOCKET;
1520         pfds[i]->reqevents = (for_read ? APR_POLLIN : APR_POLLOUT) | APR_POLLERR | APR_POLLHUP;
1521         pfds[i]->desc.s = s[i];
1522         pfds[i]->p = p;
1523         pfds[i]->client_data = pt;
1524     }
1525
1526     if (timeout > 0) {
1527         /* XXX:  This cancel timer event count fire before the pollset is updated */
1528         scb->cancel_event = event_get_timer_event(timeout + apr_time_now(), tofn, baton, 1, pfds);
1529     }
1530     for (i = 0; i<nsock; i++) {
1531         rc = apr_pollset_add(event_pollset, pfds[i]);
1532         if (rc != APR_SUCCESS) final_rc = rc;
1533     }
1534     return final_rc;
1535 }
1536 static apr_status_t event_register_socket_callback(apr_socket_t **s,
1537                                                   apr_pool_t *p,
1538                                                   int for_read,
1539                                                   ap_mpm_socket_callback_fn_t *cbfn,
1540                                                   void *baton)
1541 {
1542     return event_register_socket_callback_ex(s, p, for_read,
1543                                              cbfn,
1544                                              NULL, /* no timeout function */
1545                                              baton,
1546                                              0     /* no timeout */);
1547 }
1548 static apr_status_t event_unregister_socket_callback(apr_socket_t **s, apr_pool_t *p)
1549 {
1550     int i = 0, nsock;
1551     apr_status_t final_rc = APR_SUCCESS;
1552     apr_pollfd_t **pfds = NULL;
1553
1554     while(s[i] != NULL) {
1555         i++;
1556     }
1557     nsock = i;
1558
1559     pfds = apr_palloc(p, nsock * sizeof(apr_pollfd_t*));
1560
1561     for (i = 0; i<nsock; i++) {
1562         apr_status_t rc;
1563         pfds[i] = apr_pcalloc(p, sizeof(apr_pollfd_t));
1564         pfds[i]->desc_type = APR_POLL_SOCKET;
1565         pfds[i]->reqevents = APR_POLLERR | APR_POLLHUP;
1566         pfds[i]->desc.s = s[i];
1567         pfds[i]->client_data = NULL;
1568         rc = apr_pollset_remove(event_pollset, pfds[i]);
1569         if (rc != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rc)) final_rc = APR_SUCCESS;
1570     }
1571
1572     return final_rc;
1573 }
1574
1575 /*
1576  * Close socket and clean up if remote closed its end while we were in
1577  * lingering close.
1578  * Only to be called in the listener thread;
1579  * Pre-condition: cs is in one of the linger queues and in the pollset
1580  */
1581 static void process_lingering_close(event_conn_state_t *cs, const apr_pollfd_t *pfd)
1582 {
1583     apr_socket_t *csd = ap_get_conn_socket(cs->c);
1584     char dummybuf[2048];
1585     apr_size_t nbytes;
1586     apr_status_t rv;
1587     struct timeout_queue *q;
1588     q = (cs->pub.state == CONN_STATE_LINGER_SHORT) ?  &short_linger_q : &linger_q;
1589
1590     /* socket is already in non-blocking state */
1591     do {
1592         nbytes = sizeof(dummybuf);
1593         rv = apr_socket_recv(csd, dummybuf, &nbytes);
1594     } while (rv == APR_SUCCESS);
1595
1596     if (APR_STATUS_IS_EAGAIN(rv)) {
1597         return;
1598     }
1599
1600     apr_thread_mutex_lock(timeout_mutex);
1601     rv = apr_pollset_remove(event_pollset, pfd);
1602     AP_DEBUG_ASSERT(rv == APR_SUCCESS);
1603
1604     rv = apr_socket_close(csd);
1605     AP_DEBUG_ASSERT(rv == APR_SUCCESS);
1606
1607     TO_QUEUE_REMOVE(*q, cs);
1608     apr_thread_mutex_unlock(timeout_mutex);
1609     TO_QUEUE_ELEM_INIT(cs);
1610
1611     apr_pool_clear(cs->p);
1612     ap_push_pool(worker_queue_info, cs->p);
1613 }
1614
1615 /* call 'func' for all elements of 'q' with timeout less than 'timeout_time'.
1616  * Pre-condition: timeout_mutex must already be locked
1617  * Post-condition: timeout_mutex will be locked again
1618  */
1619 static void process_timeout_queue(struct timeout_queue *q,
1620                                   apr_time_t timeout_time,
1621                                   int (*func)(event_conn_state_t *))
1622 {
1623     int count = 0;
1624     event_conn_state_t *first, *cs, *last;
1625     apr_status_t rv;
1626     if (!q->count) {
1627         return;
1628     }
1629     AP_DEBUG_ASSERT(!APR_RING_EMPTY(&q->head, event_conn_state_t, timeout_list));
1630
1631     cs = first = APR_RING_FIRST(&q->head);
1632     while (cs != APR_RING_SENTINEL(&q->head, event_conn_state_t, timeout_list)
1633            && cs->expiration_time < timeout_time) {
1634         last = cs;
1635         rv = apr_pollset_remove(event_pollset, &cs->pfd);
1636         if (rv != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rv)) {
1637             ap_log_cerror(APLOG_MARK, APLOG_ERR, rv, cs->c, APLOGNO(00473)
1638                           "apr_pollset_remove failed");
1639         }
1640         cs = APR_RING_NEXT(cs, timeout_list);
1641         count++;
1642     }
1643     if (!count)
1644         return;
1645
1646     APR_RING_UNSPLICE(first, last, timeout_list);
1647     AP_DEBUG_ASSERT(q->count >= count);
1648     q->count -= count;
1649     apr_thread_mutex_unlock(timeout_mutex);
1650     while (count) {
1651         cs = APR_RING_NEXT(first, timeout_list);
1652         TO_QUEUE_ELEM_INIT(first);
1653         func(first);
1654         first = cs;
1655         count--;
1656     }
1657     apr_thread_mutex_lock(timeout_mutex);
1658 }
1659
1660 static void socket_callback_wrapper(void *baton){
1661     const apr_pollfd_t *out_pfd = (const apr_pollfd_t *)baton;
1662     socket_callback_baton_t *scb_baton = (socket_callback_baton_t *) ((listener_poll_type *) out_pfd->client_data)->baton;
1663     scb_baton->cbfunc(scb_baton->user_baton, out_pfd);
1664 }
1665
1666 static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
1667 {
1668     timer_event_t *ep;
1669     timer_event_t *te;
1670     apr_status_t rc;
1671     proc_info *ti = dummy;
1672     int process_slot = ti->pid;
1673     apr_pool_t *tpool = apr_thread_pool_get(thd);
1674     void *csd = NULL;
1675     apr_pool_t *ptrans;         /* Pool for per-transaction stuff */
1676     ap_listen_rec *lr;
1677     int have_idle_worker = 0;
1678     event_conn_state_t *cs;
1679     const apr_pollfd_t *out_pfd;
1680     apr_int32_t num = 0;
1681     apr_interval_time_t timeout_interval;
1682     apr_time_t timeout_time = 0, now, last_log;
1683     listener_poll_type *pt;
1684     int closed = 0, listeners_disabled = 0;
1685
1686     last_log = apr_time_now();
1687     free(ti);
1688
1689     /* the following times out events that are really close in the future
1690      *   to prevent extra poll calls
1691      *
1692      * current value is .1 second
1693      */
1694 #define TIMEOUT_FUDGE_FACTOR 100000
1695 #define EVENT_FUDGE_FACTOR 10000
1696
1697     rc = init_pollset(tpool);
1698     if (rc != APR_SUCCESS) {
1699         ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
1700                      "failed to initialize pollset, "
1701                      "attempting to shutdown process gracefully");
1702         signal_threads(ST_GRACEFUL);
1703         return NULL;
1704     }
1705
1706     /* Unblock the signal used to wake this thread up, and set a handler for
1707      * it.
1708      */
1709     unblock_signal(LISTENER_SIGNAL);
1710     apr_signal(LISTENER_SIGNAL, dummy_signal_handler);
1711
1712     for (;;) {
1713         int workers_were_busy = 0;
1714         if (listener_may_exit) {
1715             close_listeners(process_slot, &closed);
1716             if (terminate_mode == ST_UNGRACEFUL
1717                 || apr_atomic_read32(&connection_count) == 0)
1718                 break;
1719         }
1720
1721         if (conns_this_child <= 0)
1722             check_infinite_requests();
1723
1724         now = apr_time_now();
1725         if (APLOGtrace6(ap_server_conf)) {
1726             /* trace log status every second */
1727             if (now - last_log > apr_time_from_msec(1000)) {
1728                 last_log = now;
1729                 apr_thread_mutex_lock(timeout_mutex);
1730                 ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf,
1731                              "connections: %u (clogged: %u write-completion: %d "
1732                              "keep-alive: %d lingering: %d suspended: %u)",
1733                              apr_atomic_read32(&connection_count),
1734                              apr_atomic_read32(&clogged_count),
1735                              write_completion_q.count,
1736                              keepalive_q.count,
1737                              apr_atomic_read32(&lingering_count),
1738                              apr_atomic_read32(&suspended_count));
1739                 apr_thread_mutex_unlock(timeout_mutex);
1740             }
1741         }
1742
1743         apr_thread_mutex_lock(g_timer_skiplist_mtx);
1744         te = apr_skiplist_peek(timer_skiplist);
1745         if (te) {
1746             if (te->when > now) {
1747                 timeout_interval = te->when - now;
1748             }
1749             else {
1750                 timeout_interval = 1;
1751             }
1752         }
1753         else {
1754             timeout_interval = apr_time_from_msec(100);
1755         }
1756         apr_thread_mutex_unlock(g_timer_skiplist_mtx);
1757
1758 #if HAVE_SERF
1759         rc = serf_context_prerun(g_serf);
1760         if (rc != APR_SUCCESS) {
1761             /* TOOD: what should do here? ugh. */
1762         }
1763 #endif
1764         now = apr_time_now();
1765         apr_thread_mutex_lock(g_timer_skiplist_mtx);
1766         ep = apr_skiplist_peek(timer_skiplist);
1767         while (ep) {
1768             if (ep->when < now + EVENT_FUDGE_FACTOR) {
1769                 apr_skiplist_pop(timer_skiplist, NULL);
1770                 if (!ep->canceled) {
1771                     if (ep->remove != NULL) {
1772                         for (apr_pollfd_t **pfds = (ep->remove); *pfds != NULL; pfds++) {
1773                             apr_pollset_remove(event_pollset, *pfds);
1774                         }
1775                     }
1776                 }
1777                 push_timer2worker(ep);
1778             }
1779             else {
1780                 break;
1781             }
1782             ep = apr_skiplist_peek(timer_skiplist);
1783         }
1784         apr_thread_mutex_unlock(g_timer_skiplist_mtx);
1785
1786         rc = apr_pollset_poll(event_pollset, timeout_interval, &num, &out_pfd);
1787         if (rc != APR_SUCCESS) {
1788             if (APR_STATUS_IS_EINTR(rc)) {
1789                 continue;
1790             }
1791             if (!APR_STATUS_IS_TIMEUP(rc)) {
1792                 ap_log_error(APLOG_MARK, APLOG_CRIT, rc, ap_server_conf,
1793                              "apr_pollset_poll failed.  Attempting to "
1794                              "shutdown process gracefully");
1795                 signal_threads(ST_GRACEFUL);
1796             }
1797         }
1798
1799         if (listener_may_exit) {
1800             close_listeners(process_slot, &closed);
1801             if (terminate_mode == ST_UNGRACEFUL
1802                 || apr_atomic_read32(&connection_count) == 0)
1803                 break;
1804         }
1805
1806         while (num) {
1807             pt = (listener_poll_type *) out_pfd->client_data;
1808             if (pt->type == PT_CSD) {
1809                 /* one of the sockets is readable */
1810                 struct timeout_queue *remove_from_q = &write_completion_q;
1811                 int blocking = 1;
1812                 cs = (event_conn_state_t *) pt->baton;
1813                 switch (cs->pub.state) {
1814                 case CONN_STATE_CHECK_REQUEST_LINE_READABLE:
1815                     cs->pub.state = CONN_STATE_READ_REQUEST_LINE;
1816                     remove_from_q = &keepalive_q;
1817                     /* don't wait for a worker for a keepalive request */
1818                     blocking = 0;
1819                     /* FALL THROUGH */
1820                 case CONN_STATE_WRITE_COMPLETION:
1821                     get_worker(&have_idle_worker, blocking,
1822                                &workers_were_busy);
1823                     apr_thread_mutex_lock(timeout_mutex);
1824                     TO_QUEUE_REMOVE(*remove_from_q, cs);
1825                     rc = apr_pollset_remove(event_pollset, &cs->pfd);
1826
1827                     /*
1828                      * Some of the pollset backends, like KQueue or Epoll
1829                      * automagically remove the FD if the socket is closed,
1830                      * therefore, we can accept _SUCCESS or _NOTFOUND,
1831                      * and we still want to keep going
1832                      */
1833                     if (rc != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rc)) {
1834                         ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
1835                                      "pollset remove failed");
1836                         apr_thread_mutex_unlock(timeout_mutex);
1837                         start_lingering_close_nonblocking(cs);
1838                         break;
1839                     }
1840
1841                     apr_thread_mutex_unlock(timeout_mutex);
1842                     TO_QUEUE_ELEM_INIT(cs);
1843                     /* If we didn't get a worker immediately for a keep-alive
1844                      * request, we close the connection, so that the client can
1845                      * re-connect to a different process.
1846                      */
1847                     if (!have_idle_worker) {
1848                         start_lingering_close_nonblocking(cs);
1849                         break;
1850                     }
1851                     rc = push2worker(out_pfd, event_pollset);
1852                     if (rc != APR_SUCCESS) {
1853                         ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1854                                      ap_server_conf, "push2worker failed");
1855                     }
1856                     else {
1857                         have_idle_worker = 0;
1858                     }
1859                     break;
1860                 case CONN_STATE_LINGER_NORMAL:
1861                 case CONN_STATE_LINGER_SHORT:
1862                     process_lingering_close(cs, out_pfd);
1863                     break;
1864                 default:
1865                     ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1866                                  ap_server_conf,
1867                                  "event_loop: unexpected state %d",
1868                                  cs->pub.state);
1869                     ap_assert(0);
1870                 }
1871             }
1872             else if (pt->type == PT_ACCEPT) {
1873                 /* A Listener Socket is ready for an accept() */
1874                 if (workers_were_busy) {
1875                     if (!listeners_disabled)
1876                         disable_listensocks(process_slot);
1877                     listeners_disabled = 1;
1878                     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
1879                                  "All workers busy, not accepting new conns "
1880                                  "in this process");
1881                 }
1882                 else if (  (int)apr_atomic_read32(&connection_count)
1883                            - (int)apr_atomic_read32(&lingering_count)
1884                          > threads_per_child
1885                            + ap_queue_info_get_idlers(worker_queue_info) *
1886                              worker_factor / WORKER_FACTOR_SCALE)
1887                 {
1888                     if (!listeners_disabled)
1889                         disable_listensocks(process_slot);
1890                     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
1891                                  "Too many open connections (%u), "
1892                                  "not accepting new conns in this process",
1893                                  apr_atomic_read32(&connection_count));
1894                     ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
1895                                  "Idle workers: %u",
1896                                  ap_queue_info_get_idlers(worker_queue_info));
1897                     listeners_disabled = 1;
1898                 }
1899                 else if (listeners_disabled) {
1900                     listeners_disabled = 0;
1901                     enable_listensocks(process_slot);
1902                 }
1903                 if (!listeners_disabled) {
1904                     lr = (ap_listen_rec *) pt->baton;
1905                     ap_pop_pool(&ptrans, worker_queue_info);
1906
1907                     if (ptrans == NULL) {
1908                         /* create a new transaction pool for each accepted socket */
1909                         apr_allocator_t *allocator;
1910
1911                         apr_allocator_create(&allocator);
1912                         apr_allocator_max_free_set(allocator,
1913                                                    ap_max_mem_free);
1914                         apr_pool_create_ex(&ptrans, pconf, NULL, allocator);
1915                         apr_allocator_owner_set(allocator, ptrans);
1916                         if (ptrans == NULL) {
1917                             ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1918                                          ap_server_conf,
1919                                          "Failed to create transaction pool");
1920                             signal_threads(ST_GRACEFUL);
1921                             return NULL;
1922                         }
1923                     }
1924                     apr_pool_tag(ptrans, "transaction");
1925
1926                     get_worker(&have_idle_worker, 1, &workers_were_busy);
1927                     rc = lr->accept_func(&csd, lr, ptrans);
1928
1929                     /* later we trash rv and rely on csd to indicate
1930                      * success/failure
1931                      */
1932                     AP_DEBUG_ASSERT(rc == APR_SUCCESS || !csd);
1933
1934                     if (rc == APR_EGENERAL) {
1935                         /* E[NM]FILE, ENOMEM, etc */
1936                         resource_shortage = 1;
1937                         signal_threads(ST_GRACEFUL);
1938                     }
1939
1940                     if (csd != NULL) {
1941                         conns_this_child--;
1942                         rc = ap_queue_push(worker_queue, csd, NULL, ptrans);
1943                         if (rc != APR_SUCCESS) {
1944                             /* trash the connection; we couldn't queue the connected
1945                              * socket to a worker
1946                              */
1947                             apr_socket_close(csd);
1948                             ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1949                                          ap_server_conf,
1950                                          "ap_queue_push failed");
1951                             apr_pool_clear(ptrans);
1952                             ap_push_pool(worker_queue_info, ptrans);
1953                         }
1954                         else {
1955                             have_idle_worker = 0;
1956                         }
1957                     }
1958                     else {
1959                         apr_pool_clear(ptrans);
1960                         ap_push_pool(worker_queue_info, ptrans);
1961                     }
1962                 }
1963             }               /* if:else on pt->type */
1964 #if HAVE_SERF
1965             else if (pt->type == PT_SERF) {
1966                 /* send socket to serf. */
1967                 /* XXXX: this doesn't require get_worker() */
1968                 serf_event_trigger(g_serf, pt->baton, out_pfd);
1969             }
1970
1971 #endif
1972             else if (pt->type == PT_USER) {
1973                 /* masquerade as a timer event that is firing */
1974                 timer_event_t *te;
1975                 int i = 0;
1976                 socket_callback_baton_t *baton = (socket_callback_baton_t *) pt->baton;
1977                 if (baton->cancel_event) {
1978                     baton->cancel_event->canceled = 1;
1979                 }
1980
1981                 /* We only signal once per N sockets with this baton */
1982                 if (!(baton->signaled)) {
1983                     baton->signaled = 1;
1984                     te = event_get_timer_event(-1 /* fake timer */,
1985                                                socket_callback_wrapper,
1986                                                (apr_pollfd_t *)out_pfd,
1987                                                0, /* don't insert it */
1988                                                NULL /* no associated socket callback */);
1989                     /* remove other sockets in my set */
1990                     for (i = 0; i < baton->nsock; i++) {
1991                         apr_pollset_remove(event_pollset, baton->pfds[i]);
1992                     }
1993
1994                     push_timer2worker(te);
1995                 }
1996                 apr_pollset_remove(event_pollset, out_pfd);
1997             }
1998             out_pfd++;
1999             num--;
2000         }                   /* while for processing poll */
2001
2002         /* XXX possible optimization: stash the current time for use as
2003          * r->request_time for new requests
2004          */
2005         now = apr_time_now();
2006         /* we only do this once per 0.1s (TIMEOUT_FUDGE_FACTOR) */
2007         if (now > timeout_time) {
2008             struct process_score *ps;
2009             timeout_time = now + TIMEOUT_FUDGE_FACTOR;
2010
2011             /* handle timed out sockets */
2012             apr_thread_mutex_lock(timeout_mutex);
2013
2014             /* Step 1: keepalive timeouts */
2015             /* If all workers are busy, we kill older keep-alive connections so that they
2016              * may connect to another process.
2017              */
2018             if (workers_were_busy && keepalive_q.count) {
2019                 ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
2020                              "All workers are busy, will close %d keep-alive "
2021                              "connections",
2022                              keepalive_q.count);
2023                 process_timeout_queue(&keepalive_q,
2024                                       timeout_time + ap_server_conf->keep_alive_timeout,
2025                                       start_lingering_close_nonblocking);
2026             }
2027             else {
2028                 process_timeout_queue(&keepalive_q, timeout_time,
2029                                       start_lingering_close_nonblocking);
2030             }
2031             /* Step 2: write completion timeouts */
2032             process_timeout_queue(&write_completion_q, timeout_time,
2033                                   start_lingering_close_nonblocking);
2034             /* Step 3: (normal) lingering close completion timeouts */
2035             process_timeout_queue(&linger_q, timeout_time, stop_lingering_close);
2036             /* Step 4: (short) lingering close completion timeouts */
2037             process_timeout_queue(&short_linger_q, timeout_time, stop_lingering_close);
2038
2039             ps = ap_get_scoreboard_process(process_slot);
2040             ps->write_completion = write_completion_q.count;
2041             ps->keep_alive = keepalive_q.count;
2042             apr_thread_mutex_unlock(timeout_mutex);
2043
2044             ps->connections = apr_atomic_read32(&connection_count);
2045             ps->suspended = apr_atomic_read32(&suspended_count);
2046             ps->lingering_close = apr_atomic_read32(&lingering_count);
2047         }
2048         if (listeners_disabled && !workers_were_busy
2049             && (int)apr_atomic_read32(&connection_count)
2050                - (int)apr_atomic_read32(&lingering_count)
2051                < ((int)ap_queue_info_get_idlers(worker_queue_info) - 1)
2052                  * worker_factor / WORKER_FACTOR_SCALE + threads_per_child)
2053         {
2054             listeners_disabled = 0;
2055             enable_listensocks(process_slot);
2056         }
2057         /*
2058          * XXX: do we need to set some timeout that re-enables the listensocks
2059          * XXX: in case no other event occurs?
2060          */
2061     }     /* listener main loop */
2062
2063     close_listeners(process_slot, &closed);
2064     ap_queue_term(worker_queue);
2065
2066     apr_thread_exit(thd, APR_SUCCESS);
2067     return NULL;
2068 }
2069
2070 /* XXX For ungraceful termination/restart, we definitely don't want to
2071  *     wait for active connections to finish but we may want to wait
2072  *     for idle workers to get out of the queue code and release mutexes,
2073  *     since those mutexes are cleaned up pretty soon and some systems
2074  *     may not react favorably (i.e., segfault) if operations are attempted
2075  *     on cleaned-up mutexes.
2076  */
2077 static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy)
2078 {
2079     proc_info *ti = dummy;
2080     int process_slot = ti->pid;
2081     int thread_slot = ti->tid;
2082     apr_socket_t *csd = NULL;
2083     event_conn_state_t *cs;
2084     apr_pool_t *ptrans;         /* Pool for per-transaction stuff */
2085     apr_status_t rv;
2086     int is_idle = 0;
2087     timer_event_t *te = NULL;
2088
2089     free(ti);
2090
2091     ap_scoreboard_image->servers[process_slot][thread_slot].pid = ap_my_pid;
2092     ap_scoreboard_image->servers[process_slot][thread_slot].tid = apr_os_thread_current();
2093     ap_scoreboard_image->servers[process_slot][thread_slot].generation = retained->my_generation;
2094     ap_update_child_status_from_indexes(process_slot, thread_slot,
2095                                         SERVER_STARTING, NULL);
2096
2097     while (!workers_may_exit) {
2098         if (!is_idle) {
2099             rv = ap_queue_info_set_idle(worker_queue_info, NULL);
2100             if (rv != APR_SUCCESS) {
2101                 ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf,
2102                              "ap_queue_info_set_idle failed. Attempting to "
2103                              "shutdown process gracefully.");
2104                 signal_threads(ST_GRACEFUL);
2105                 break;
2106             }
2107             is_idle = 1;
2108         }
2109
2110         ap_update_child_status_from_indexes(process_slot, thread_slot,
2111                                             dying ? SERVER_GRACEFUL : SERVER_READY, NULL);
2112       worker_pop:
2113         if (workers_may_exit) {
2114             break;
2115         }
2116
2117         te = NULL;
2118         rv = ap_queue_pop_something(worker_queue, &csd, &cs, &ptrans, &te);
2119
2120         if (rv != APR_SUCCESS) {
2121             /* We get APR_EOF during a graceful shutdown once all the
2122              * connections accepted by this server process have been handled.
2123              */
2124             if (APR_STATUS_IS_EOF(rv)) {
2125                 break;
2126             }
2127             /* We get APR_EINTR whenever ap_queue_pop() has been interrupted
2128              * from an explicit call to ap_queue_interrupt_all(). This allows
2129              * us to unblock threads stuck in ap_queue_pop() when a shutdown
2130              * is pending.
2131              *
2132              * If workers_may_exit is set and this is ungraceful termination/
2133              * restart, we are bound to get an error on some systems (e.g.,
2134              * AIX, which sanity-checks mutex operations) since the queue
2135              * may have already been cleaned up.  Don't log the "error" if
2136              * workers_may_exit is set.
2137              */
2138             else if (APR_STATUS_IS_EINTR(rv)) {
2139                 goto worker_pop;
2140             }
2141             /* We got some other error. */
2142             else if (!workers_may_exit) {
2143                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
2144                              "ap_queue_pop failed");
2145             }
2146             continue;
2147         }
2148         if (te != NULL) {
2149             te->cbfunc(te->baton);
2150             {
2151                 apr_thread_mutex_lock(g_timer_skiplist_mtx);
2152                 APR_RING_INSERT_TAIL(&timer_free_ring, te, timer_event_t, link);
2153                 apr_thread_mutex_unlock(g_timer_skiplist_mtx);
2154             }
2155         }
2156         else {
2157             is_idle = 0;
2158             worker_sockets[thread_slot] = csd;
2159             process_socket(thd, ptrans, csd, cs, process_slot, thread_slot);
2160             worker_sockets[thread_slot] = NULL;
2161         }
2162     }
2163
2164     ap_update_child_status_from_indexes(process_slot, thread_slot,
2165                                         dying ? SERVER_DEAD :
2166                                         SERVER_GRACEFUL,
2167                                         (request_rec *) NULL);
2168
2169     apr_thread_exit(thd, APR_SUCCESS);
2170     return NULL;
2171 }
2172
2173 static int check_signal(int signum)
2174 {
2175     switch (signum) {
2176     case SIGTERM:
2177     case SIGINT:
2178         return 1;
2179     }
2180     return 0;
2181 }
2182
2183
2184
2185 static void create_listener_thread(thread_starter * ts)
2186 {
2187     int my_child_num = ts->child_num_arg;
2188     apr_threadattr_t *thread_attr = ts->threadattr;
2189     proc_info *my_info;
2190     apr_status_t rv;
2191
2192     my_info = (proc_info *) ap_malloc(sizeof(proc_info));
2193     my_info->pid = my_child_num;
2194     my_info->tid = -1;          /* listener thread doesn't have a thread slot */
2195     my_info->sd = 0;
2196     rv = apr_thread_create(&ts->listener, thread_attr, listener_thread,
2197                            my_info, pchild);
2198     if (rv != APR_SUCCESS) {
2199         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf, APLOGNO(00474)
2200                      "apr_thread_create: unable to create listener thread");
2201         /* let the parent decide how bad this really is */
2202         clean_child_exit(APEXIT_CHILDSICK);
2203     }
2204     apr_os_thread_get(&listener_os_thread, ts->listener);
2205 }
2206
2207 /* XXX under some circumstances not understood, children can get stuck
2208  *     in start_threads forever trying to take over slots which will
2209  *     never be cleaned up; for now there is an APLOG_DEBUG message issued
2210  *     every so often when this condition occurs
2211  */
2212 static void *APR_THREAD_FUNC start_threads(apr_thread_t * thd, void *dummy)
2213 {
2214     thread_starter *ts = dummy;
2215     apr_thread_t **threads = ts->threads;
2216     apr_threadattr_t *thread_attr = ts->threadattr;
2217     int child_num_arg = ts->child_num_arg;
2218     int my_child_num = child_num_arg;
2219     proc_info *my_info;
2220     apr_status_t rv;
2221     int i;
2222     int threads_created = 0;
2223     int listener_started = 0;
2224     int loops;
2225     int prev_threads_created;
2226     int max_recycled_pools = -1;
2227     int good_methods[] = {APR_POLLSET_KQUEUE, APR_POLLSET_PORT, APR_POLLSET_EPOLL};
2228
2229     /* We must create the fd queues before we start up the listener
2230      * and worker threads. */
2231     worker_queue = apr_pcalloc(pchild, sizeof(*worker_queue));
2232     rv = ap_queue_init(worker_queue, threads_per_child, pchild);
2233     if (rv != APR_SUCCESS) {
2234         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
2235                      "ap_queue_init() failed");
2236         clean_child_exit(APEXIT_CHILDFATAL);
2237     }
2238
2239     if (ap_max_mem_free != APR_ALLOCATOR_MAX_FREE_UNLIMITED) {
2240         /* If we want to conserve memory, let's not keep an unlimited number of
2241          * pools & allocators.
2242          * XXX: This should probably be a separate config directive
2243          */
2244         max_recycled_pools = threads_per_child * 3 / 4 ;
2245     }
2246     rv = ap_queue_info_create(&worker_queue_info, pchild,
2247                               threads_per_child, max_recycled_pools);
2248     if (rv != APR_SUCCESS) {
2249         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
2250                      "ap_queue_info_create() failed");
2251         clean_child_exit(APEXIT_CHILDFATAL);
2252     }
2253
2254     /* Create the timeout mutex and main pollset before the listener
2255      * thread starts.
2256      */
2257     rv = apr_thread_mutex_create(&timeout_mutex, APR_THREAD_MUTEX_DEFAULT,
2258                                  pchild);
2259     if (rv != APR_SUCCESS) {
2260         ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf,
2261                      "creation of the timeout mutex failed.");
2262         clean_child_exit(APEXIT_CHILDFATAL);
2263     }
2264
2265     /* Create the main pollset */
2266     for (i = 0; i < sizeof(good_methods) / sizeof(void*); i++) {
2267         rv = apr_pollset_create_ex(&event_pollset,
2268                             threads_per_child*2, /* XXX don't we need more, to handle
2269                                                 * connections in K-A or lingering
2270                                                 * close?
2271                                                 */
2272                             pchild, APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY | APR_POLLSET_NODEFAULT,
2273                             good_methods[i]);
2274         if (rv == APR_SUCCESS) {
2275             break;
2276         }
2277     }
2278     if (rv != APR_SUCCESS) {
2279         rv = apr_pollset_create(&event_pollset,
2280                                threads_per_child*2, /* XXX don't we need more, to handle
2281                                                      * connections in K-A or lingering
2282                                                      * close?
2283                                                      */
2284                                pchild, APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY);
2285     }
2286     if (rv != APR_SUCCESS) {
2287         ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf,
2288                      "apr_pollset_create with Thread Safety failed.");
2289         clean_child_exit(APEXIT_CHILDFATAL);
2290     }
2291
2292     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(02471)
2293                  "start_threads: Using %s", apr_pollset_method_name(event_pollset));
2294     worker_sockets = apr_pcalloc(pchild, threads_per_child
2295                                  * sizeof(apr_socket_t *));
2296
2297     loops = prev_threads_created = 0;
2298     while (1) {
2299         /* threads_per_child does not include the listener thread */
2300         for (i = 0; i < threads_per_child; i++) {
2301             int status =
2302                 ap_scoreboard_image->servers[child_num_arg][i].status;
2303
2304             if (status != SERVER_GRACEFUL && status != SERVER_DEAD) {
2305                 continue;
2306             }
2307
2308             my_info = (proc_info *) ap_malloc(sizeof(proc_info));
2309             my_info->pid = my_child_num;
2310             my_info->tid = i;
2311             my_info->sd = 0;
2312
2313             /* We are creating threads right now */
2314             ap_update_child_status_from_indexes(my_child_num, i,
2315                                                 SERVER_STARTING, NULL);
2316             /* We let each thread update its own scoreboard entry.  This is
2317              * done because it lets us deal with tid better.
2318              */
2319             rv = apr_thread_create(&threads[i], thread_attr,
2320                                    worker_thread, my_info, pchild);
2321             if (rv != APR_SUCCESS) {
2322                 ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
2323                              "apr_thread_create: unable to create worker thread");
2324                 /* let the parent decide how bad this really is */
2325                 clean_child_exit(APEXIT_CHILDSICK);
2326             }
2327             threads_created++;
2328         }
2329
2330         /* Start the listener only when there are workers available */
2331         if (!listener_started && threads_created) {
2332             create_listener_thread(ts);
2333             listener_started = 1;
2334         }
2335
2336
2337         if (start_thread_may_exit || threads_created == threads_per_child) {
2338             break;
2339         }
2340         /* wait for previous generation to clean up an entry */
2341         apr_sleep(apr_time_from_sec(1));
2342         ++loops;
2343         if (loops % 120 == 0) { /* every couple of minutes */
2344             if (prev_threads_created == threads_created) {
2345                 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
2346                              "child %" APR_PID_T_FMT " isn't taking over "
2347                              "slots very quickly (%d of %d)",
2348                              ap_my_pid, threads_created,
2349                              threads_per_child);
2350             }
2351             prev_threads_created = threads_created;
2352         }
2353     }
2354
2355     /* What state should this child_main process be listed as in the
2356      * scoreboard...?
2357      *  ap_update_child_status_from_indexes(my_child_num, i, SERVER_STARTING,
2358      *                                      (request_rec *) NULL);
2359      *
2360      *  This state should be listed separately in the scoreboard, in some kind
2361      *  of process_status, not mixed in with the worker threads' status.
2362      *  "life_status" is almost right, but it's in the worker's structure, and
2363      *  the name could be clearer.   gla
2364      */
2365     apr_thread_exit(thd, APR_SUCCESS);
2366     return NULL;
2367 }
2368
2369 static void join_workers(apr_thread_t * listener, apr_thread_t ** threads)
2370 {
2371     int i;
2372     apr_status_t rv, thread_rv;
2373
2374     if (listener) {
2375         int iter;
2376
2377         /* deal with a rare timing window which affects waking up the
2378          * listener thread...  if the signal sent to the listener thread
2379          * is delivered between the time it verifies that the
2380          * listener_may_exit flag is clear and the time it enters a
2381          * blocking syscall, the signal didn't do any good...  work around
2382          * that by sleeping briefly and sending it again
2383          */
2384
2385         iter = 0;
2386         while (iter < 10 && !dying) {
2387             /* listener has not stopped accepting yet */
2388             apr_sleep(apr_time_make(0, 500000));
2389             wakeup_listener();
2390             ++iter;
2391         }
2392         if (iter >= 10) {
2393             ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00475)
2394                          "the listener thread didn't stop accepting");
2395         }
2396         else {
2397             rv = apr_thread_join(&thread_rv, listener);
2398             if (rv != APR_SUCCESS) {
2399                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf, APLOGNO(00476)
2400                              "apr_thread_join: unable to join listener thread");
2401             }
2402         }
2403     }
2404
2405     for (i = 0; i < threads_per_child; i++) {
2406         if (threads[i]) {       /* if we ever created this thread */
2407             rv = apr_thread_join(&thread_rv, threads[i]);
2408             if (rv != APR_SUCCESS) {
2409                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf, APLOGNO(00477)
2410                              "apr_thread_join: unable to join worker "
2411                              "thread %d", i);
2412             }
2413         }
2414     }
2415 }
2416
2417 static void join_start_thread(apr_thread_t * start_thread_id)
2418 {
2419     apr_status_t rv, thread_rv;
2420
2421     start_thread_may_exit = 1;  /* tell it to give up in case it is still
2422                                  * trying to take over slots from a
2423                                  * previous generation
2424                                  */
2425     rv = apr_thread_join(&thread_rv, start_thread_id);
2426     if (rv != APR_SUCCESS) {
2427         ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf, APLOGNO(00478)
2428                      "apr_thread_join: unable to join the start " "thread");
2429     }
2430 }
2431
2432 static void child_main(int child_num_arg)
2433 {
2434     apr_thread_t **threads;
2435     apr_status_t rv;
2436     thread_starter *ts;
2437     apr_threadattr_t *thread_attr;
2438     apr_thread_t *start_thread_id;
2439     apr_pool_t *pskip;
2440     int i;
2441     ap_listen_rec *lr;
2442
2443     mpm_state = AP_MPMQ_STARTING;       /* for benefit of any hooks that run as this
2444                                          * child initializes
2445                                          */
2446     ap_my_pid = getpid();
2447     ap_fatal_signal_child_setup(ap_server_conf);
2448     apr_pool_create(&pchild, pconf);
2449
2450     /* close unused listeners and pods */
2451     for (i = 0; i < num_buckets; i++) {
2452         if (i != bucket[child_num_arg]) {
2453             lr = mpm_listen[i];
2454             while(lr) {
2455                 apr_socket_close(lr->sd);
2456                 lr->active = 0;
2457                 lr = lr->next;
2458             }
2459             ap_mpm_podx_close(pod[i]);
2460         }
2461     }
2462
2463     /*stuff to do before we switch id's, so we have permissions. */
2464     ap_reopen_scoreboard(pchild, NULL, 0);
2465
2466     if (ap_run_drop_privileges(pchild, ap_server_conf)) {
2467         clean_child_exit(APEXIT_CHILDFATAL);
2468     }
2469
2470     apr_thread_mutex_create(&g_timer_skiplist_mtx, APR_THREAD_MUTEX_DEFAULT, pchild);
2471     APR_RING_INIT(&timer_free_ring, timer_event_t, link);
2472     apr_pool_create(&pskip, pchild);
2473     apr_skiplist_init(&timer_skiplist, pskip);
2474     apr_skiplist_set_compare(timer_skiplist, indexing_comp, indexing_compk);
2475     ap_run_child_init(pchild, ap_server_conf);
2476
2477     /* done with init critical section */
2478
2479     /* Just use the standard apr_setup_signal_thread to block all signals
2480      * from being received.  The child processes no longer use signals for
2481      * any communication with the parent process.
2482      */
2483     rv = apr_setup_signal_thread();
2484     if (rv != APR_SUCCESS) {
2485         ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf, APLOGNO(00479)
2486                      "Couldn't initialize signal thread");
2487         clean_child_exit(APEXIT_CHILDFATAL);
2488     }
2489
2490     if (ap_max_requests_per_child) {
2491         conns_this_child = ap_max_requests_per_child;
2492     }
2493     else {
2494         /* coding a value of zero means infinity */
2495         conns_this_child = APR_INT32_MAX;
2496     }
2497
2498     /* Setup worker threads */
2499
2500     /* clear the storage; we may not create all our threads immediately,
2501      * and we want a 0 entry to indicate a thread which was not created
2502      */
2503     threads = ap_calloc(threads_per_child, sizeof(apr_thread_t *));
2504     ts = apr_palloc(pchild, sizeof(*ts));
2505
2506     apr_threadattr_create(&thread_attr, pchild);
2507     /* 0 means PTHREAD_CREATE_JOINABLE */
2508     apr_threadattr_detach_set(thread_attr, 0);
2509
2510     if (ap_thread_stacksize != 0) {
2511         rv = apr_threadattr_stacksize_set(thread_attr, ap_thread_stacksize);
2512         if (rv != APR_SUCCESS && rv != APR_ENOTIMPL) {
2513             ap_log_error(APLOG_MARK, APLOG_WARNING, rv, ap_server_conf, APLOGNO(02436)
2514                          "WARNING: ThreadStackSize of %" APR_SIZE_T_FMT " is "
2515                          "inappropriate, using default",
2516                          ap_thread_stacksize);
2517         }
2518     }
2519
2520     ts->threads = threads;
2521     ts->listener = NULL;
2522     ts->child_num_arg = child_num_arg;
2523     ts->threadattr = thread_attr;
2524
2525     rv = apr_thread_create(&start_thread_id, thread_attr, start_threads,
2526                            ts, pchild);
2527     if (rv != APR_SUCCESS) {
2528         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf, APLOGNO(00480)
2529                      "apr_thread_create: unable to create worker thread");
2530         /* let the parent decide how bad this really is */
2531         clean_child_exit(APEXIT_CHILDSICK);
2532     }
2533
2534     mpm_state = AP_MPMQ_RUNNING;
2535
2536     /* If we are only running in one_process mode, we will want to
2537      * still handle signals. */
2538     if (one_process) {
2539         /* Block until we get a terminating signal. */
2540         apr_signal_thread(check_signal);
2541         /* make sure the start thread has finished; signal_threads()
2542          * and join_workers() depend on that
2543          */
2544         /* XXX join_start_thread() won't be awakened if one of our
2545          *     threads encounters a critical error and attempts to
2546          *     shutdown this child
2547          */
2548         join_start_thread(start_thread_id);
2549
2550         /* helps us terminate a little more quickly than the dispatch of the
2551          * signal thread; beats the Pipe of Death and the browsers
2552          */
2553         signal_threads(ST_UNGRACEFUL);
2554
2555         /* A terminating signal was received. Now join each of the
2556          * workers to clean them up.
2557          *   If the worker already exited, then the join frees
2558          *   their resources and returns.
2559          *   If the worker hasn't exited, then this blocks until
2560          *   they have (then cleans up).
2561          */
2562         join_workers(ts->listener, threads);
2563     }
2564     else {                      /* !one_process */
2565         /* remove SIGTERM from the set of blocked signals...  if one of
2566          * the other threads in the process needs to take us down
2567          * (e.g., for MaxConnectionsPerChild) it will send us SIGTERM
2568          */
2569         unblock_signal(SIGTERM);
2570         apr_signal(SIGTERM, dummy_signal_handler);
2571         /* Watch for any messages from the parent over the POD */
2572         while (1) {
2573             rv = ap_mpm_podx_check(child_pod);
2574             if (rv == AP_MPM_PODX_NORESTART) {
2575                 /* see if termination was triggered while we slept */
2576                 switch (terminate_mode) {
2577                 case ST_GRACEFUL:
2578                     rv = AP_MPM_PODX_GRACEFUL;
2579                     break;
2580                 case ST_UNGRACEFUL:
2581                     rv = AP_MPM_PODX_RESTART;
2582                     break;
2583                 }
2584             }
2585             if (rv == AP_MPM_PODX_GRACEFUL || rv == AP_MPM_PODX_RESTART) {
2586                 /* make sure the start thread has finished;
2587                  * signal_threads() and join_workers depend on that
2588                  */
2589                 join_start_thread(start_thread_id);
2590                 signal_threads(rv ==
2591                                AP_MPM_PODX_GRACEFUL ? ST_GRACEFUL : ST_UNGRACEFUL);
2592                 break;
2593             }
2594         }
2595
2596         /* A terminating signal was received. Now join each of the
2597          * workers to clean them up.
2598          *   If the worker already exited, then the join frees
2599          *   their resources and returns.
2600          *   If the worker hasn't exited, then this blocks until
2601          *   they have (then cleans up).
2602          */
2603         join_workers(ts->listener, threads);
2604     }
2605
2606     free(threads);
2607
2608     clean_child_exit(resource_shortage ? APEXIT_CHILDSICK : 0);
2609 }
2610
2611 static int make_child(server_rec * s, int slot)
2612 {
2613     int pid;
2614
2615     if (slot + 1 > retained->max_daemons_limit) {
2616         retained->max_daemons_limit = slot + 1;
2617     }
2618
2619     child_listen = mpm_listen[bucket[slot]];
2620     child_pod = pod[bucket[slot]];
2621
2622     if (one_process) {
2623         set_signals();
2624         event_note_child_started(slot, getpid());
2625         child_main(slot);
2626         /* NOTREACHED */
2627     }
2628
2629     if ((pid = fork()) == -1) {
2630         ap_log_error(APLOG_MARK, APLOG_ERR, errno, s, APLOGNO(00481)
2631                      "fork: Unable to fork new process");
2632
2633         /* fork didn't succeed.  There's no need to touch the scoreboard;
2634          * if we were trying to replace a failed child process, then
2635          * server_main_loop() marked its workers SERVER_DEAD, and if
2636          * we were trying to replace a child process that exited normally,
2637          * its worker_thread()s left SERVER_DEAD or SERVER_GRACEFUL behind.
2638          */
2639
2640         /* In case system resources are maxxed out, we don't want
2641            Apache running away with the CPU trying to fork over and
2642            over and over again. */
2643         apr_sleep(apr_time_from_sec(10));
2644
2645         return -1;
2646     }
2647
2648     if (!pid) {
2649 #ifdef HAVE_BINDPROCESSOR
2650         /* By default, AIX binds to a single processor.  This bit unbinds
2651          * children which will then bind to another CPU.
2652          */
2653         int status = bindprocessor(BINDPROCESS, (int) getpid(),
2654                                    PROCESSOR_CLASS_ANY);
2655         if (status != OK)
2656             ap_log_error(APLOG_MARK, APLOG_DEBUG, errno,
2657                          ap_server_conf, APLOGNO(00482)
2658                          "processor unbind failed");
2659 #endif
2660         RAISE_SIGSTOP(MAKE_CHILD);
2661
2662         apr_signal(SIGTERM, just_die);
2663         child_main(slot);
2664         /* NOTREACHED */
2665     }
2666     /* else */
2667     if (ap_scoreboard_image->parent[slot].pid != 0) {
2668         /* This new child process is squatting on the scoreboard
2669          * entry owned by an exiting child process, which cannot
2670          * exit until all active requests complete.
2671          */
2672         event_note_child_lost_slot(slot, pid);
2673     }
2674     ap_scoreboard_image->parent[slot].quiescing = 0;
2675     ap_scoreboard_image->parent[slot].not_accepting = 0;
2676     event_note_child_started(slot, pid);
2677     return 0;
2678 }
2679
2680 /* start up a bunch of children */
2681 static void startup_children(int number_to_start)
2682 {
2683     int i;
2684
2685     for (i = 0; number_to_start && i < ap_daemons_limit; ++i) {
2686         if (ap_scoreboard_image->parent[i].pid != 0) {
2687             continue;
2688         }
2689         bucket[i] = i % num_buckets;
2690         if (make_child(ap_server_conf, i) < 0) {
2691             break;
2692         }
2693         --number_to_start;
2694     }
2695 }
2696
2697 static void perform_idle_server_maintenance(int child_bucket)
2698 {
2699     int i, j;
2700     int idle_thread_count;
2701     worker_score *ws;
2702     process_score *ps;
2703     int free_length;
2704     int totally_free_length = 0;
2705     int free_slots[MAX_SPAWN_RATE];
2706     int last_non_dead;
2707     int total_non_dead;
2708     int active_thread_count = 0;
2709
2710     /* initialize the free_list */
2711     free_length = 0;
2712
2713     idle_thread_count = 0;
2714     last_non_dead = -1;
2715     total_non_dead = 0;
2716
2717     for (i = 0; i < ap_daemons_limit; ++i) {
2718         /* Initialization to satisfy the compiler. It doesn't know
2719          * that threads_per_child is always > 0 */
2720         int status = SERVER_DEAD;
2721         int any_dying_threads = 0;
2722         int any_dead_threads = 0;
2723         int all_dead_threads = 1;
2724         int child_threads_active = 0;
2725
2726         if (i >= retained->max_daemons_limit
2727             && totally_free_length == retained->idle_spawn_rate[child_bucket])
2728             /* short cut if all active processes have been examined and
2729              * enough empty scoreboard slots have been found
2730              */
2731
2732             break;
2733         ps = &ap_scoreboard_image->parent[i];
2734         for (j = 0; j < threads_per_child; j++) {
2735             ws = &ap_scoreboard_image->servers[i][j];
2736             status = ws->status;
2737
2738             /* XXX any_dying_threads is probably no longer needed    GLA */
2739             any_dying_threads = any_dying_threads ||
2740                 (status == SERVER_GRACEFUL);
2741             any_dead_threads = any_dead_threads || (status == SERVER_DEAD);
2742             all_dead_threads = all_dead_threads &&
2743                 (status == SERVER_DEAD || status == SERVER_GRACEFUL);
2744
2745             /* We consider a starting server as idle because we started it
2746              * at least a cycle ago, and if it still hasn't finished starting
2747              * then we're just going to swamp things worse by forking more.
2748              * So we hopefully won't need to fork more if we count it.
2749              * This depends on the ordering of SERVER_READY and SERVER_STARTING.
2750              */
2751             if (ps->pid != 0) { /* XXX just set all_dead_threads in outer
2752                                    for loop if no pid?  not much else matters */
2753                 if (status <= SERVER_READY && !ps->quiescing && !ps->not_accepting
2754                     && ps->generation == retained->my_generation &&
2755                     bucket[i] == child_bucket)
2756                 {
2757                     ++idle_thread_count;
2758                 }
2759                 if (status >= SERVER_READY && status < SERVER_GRACEFUL) {
2760                     ++child_threads_active;
2761                 }
2762             }
2763         }
2764         active_thread_count += child_threads_active;
2765         if (any_dead_threads
2766             && totally_free_length < retained->idle_spawn_rate[child_bucket]
2767             && free_length < MAX_SPAWN_RATE/num_buckets
2768             && (!ps->pid      /* no process in the slot */
2769                   || ps->quiescing)) {  /* or at least one is going away */
2770             if (all_dead_threads) {
2771                 /* great! we prefer these, because the new process can
2772                  * start more threads sooner.  So prioritize this slot
2773                  * by putting it ahead of any slots with active threads.
2774                  *
2775                  * first, make room by moving a slot that's potentially still
2776                  * in use to the end of the array
2777                  */
2778                 free_slots[free_length] = free_slots[totally_free_length];
2779                 free_slots[totally_free_length++] = i;
2780             }
2781             else {
2782                 /* slot is still in use - back of the bus
2783                  */
2784                 free_slots[free_length] = i;
2785             }
2786             ++free_length;
2787         }
2788         else if (child_threads_active == threads_per_child) {
2789             had_healthy_child = 1;
2790         }
2791         /* XXX if (!ps->quiescing)     is probably more reliable  GLA */
2792         if (!any_dying_threads) {
2793             last_non_dead = i;
2794             ++total_non_dead;
2795         }
2796     }
2797
2798     if (retained->sick_child_detected) {
2799         if (had_healthy_child) {
2800             /* Assume this is a transient error, even though it may not be.  Leave
2801              * the server up in case it is able to serve some requests or the
2802              * problem will be resolved.
2803              */
2804             retained->sick_child_detected = 0;
2805         }
2806         else {
2807             /* looks like a basket case, as no child ever fully initialized; give up.
2808              */
2809             shutdown_pending = 1;
2810             child_fatal = 1;
2811             ap_log_error(APLOG_MARK, APLOG_ALERT, 0,
2812                          ap_server_conf, APLOGNO(02324)
2813                          "A resource shortage or other unrecoverable failure "
2814                          "was encountered before any child process initialized "
2815                          "successfully... httpd is exiting!");
2816             /* the child already logged the failure details */
2817             return;
2818         }
2819     }
2820
2821     retained->max_daemons_limit = last_non_dead + 1;
2822
2823     if (idle_thread_count > max_spare_threads/num_buckets) {
2824         /* Kill off one child */
2825         ap_mpm_podx_signal(pod[child_bucket], AP_MPM_PODX_GRACEFUL);
2826         retained->idle_spawn_rate[child_bucket] = 1;
2827     }
2828     else if (idle_thread_count < min_spare_threads/num_buckets) {
2829         /* terminate the free list */
2830         if (free_length == 0) { /* scoreboard is full, can't fork */
2831
2832             if (active_thread_count >= ap_daemons_limit * threads_per_child) {
2833                 if (!retained->maxclients_reported) {
2834                     /* only report this condition once */
2835                     ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(00484)
2836                                  "server reached MaxRequestWorkers setting, "
2837                                  "consider raising the MaxRequestWorkers "
2838                                  "setting");
2839                     retained->maxclients_reported = 1;
2840                 }
2841             }
2842             else {
2843                 ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(00485)
2844                              "scoreboard is full, not at MaxRequestWorkers");
2845             }
2846             retained->idle_spawn_rate[child_bucket] = 1;
2847         }
2848         else {
2849             if (free_length > retained->idle_spawn_rate[child_bucket]) {
2850                 free_length = retained->idle_spawn_rate[child_bucket];
2851             }
2852             if (retained->idle_spawn_rate[child_bucket] >= 8) {
2853                 ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, APLOGNO(00486)
2854                              "server seems busy, (you may need "
2855                              "to increase StartServers, ThreadsPerChild "
2856                              "or Min/MaxSpareThreads), "
2857                              "spawning %d children, there are around %d idle "
2858                              "threads, and %d total children", free_length,
2859                              idle_thread_count, total_non_dead);
2860             }
2861             for (i = 0; i < free_length; ++i) {
2862                 bucket[free_slots[i]] = child_bucket;
2863                 make_child(ap_server_conf, free_slots[i]);
2864             }
2865             /* the next time around we want to spawn twice as many if this
2866              * wasn't good enough, but not if we've just done a graceful
2867              */
2868             if (retained->hold_off_on_exponential_spawning) {
2869                 --retained->hold_off_on_exponential_spawning;
2870             }
2871             else if (retained->idle_spawn_rate[child_bucket] < MAX_SPAWN_RATE/num_buckets) {
2872                 retained->idle_spawn_rate[child_bucket] *= 2;
2873             }
2874         }
2875     }
2876     else {
2877         retained->idle_spawn_rate[child_bucket] = 1;
2878     }
2879 }
2880
2881 static void server_main_loop(int remaining_children_to_start)
2882 {
2883     ap_generation_t old_gen;
2884     int child_slot;
2885     apr_exit_why_e exitwhy;
2886     int status, processed_status;
2887     apr_proc_t pid;
2888     int i;
2889
2890     while (!restart_pending && !shutdown_pending) {
2891         ap_wait_or_timeout(&exitwhy, &status, &pid, pconf, ap_server_conf);
2892
2893         if (pid.pid != -1) {
2894             processed_status = ap_process_child_status(&pid, exitwhy, status);
2895             child_slot = ap_find_child_by_pid(&pid);
2896             if (processed_status == APEXIT_CHILDFATAL) {
2897                 /* fix race condition found in PR 39311
2898                  * A child created at the same time as a graceful happens
2899                  * can find the lock missing and create a fatal error.
2900                  * It is not fatal for the last generation to be in this state.
2901                  */
2902                 if (child_slot < 0
2903                     || ap_get_scoreboard_process(child_slot)->generation
2904                        == retained->my_generation) {
2905                     shutdown_pending = 1;
2906                     child_fatal = 1;
2907                     return;
2908                 }
2909                 else {
2910                     ap_log_error(APLOG_MARK, APLOG_WARNING, 0, ap_server_conf, APLOGNO(00487)
2911                                  "Ignoring fatal error in child of previous "
2912                                  "generation (pid %ld).",
2913                                  (long)pid.pid);
2914                     retained->sick_child_detected = 1;
2915                 }
2916             }
2917             else if (processed_status == APEXIT_CHILDSICK) {
2918                 /* tell perform_idle_server_maintenance to check into this
2919                  * on the next timer pop
2920                  */
2921                 retained->sick_child_detected = 1;
2922             }
2923             /* non-fatal death... note that it's gone in the scoreboard. */
2924             if (child_slot >= 0) {
2925                 for (i = 0; i < threads_per_child; i++)
2926                     ap_update_child_status_from_indexes(child_slot, i,
2927                                                         SERVER_DEAD,
2928                                                         (request_rec *) NULL);
2929
2930                 event_note_child_killed(child_slot, 0, 0);
2931                 ap_scoreboard_image->parent[child_slot].quiescing = 0;
2932                 if (processed_status == APEXIT_CHILDSICK) {
2933                     /* resource shortage, minimize the fork rate */
2934                     retained->idle_spawn_rate[bucket[child_slot]] = 1;
2935                 }
2936                 else if (remaining_children_to_start
2937                          && child_slot < ap_daemons_limit) {
2938                     /* we're still doing a 1-for-1 replacement of dead
2939                      * children with new children
2940                      */
2941                     make_child(ap_server_conf, child_slot);
2942                     --remaining_children_to_start;
2943                 }
2944             }
2945             else if (ap_unregister_extra_mpm_process(pid.pid, &old_gen) == 1) {
2946
2947                 event_note_child_killed(-1, /* already out of the scoreboard */
2948                                         pid.pid, old_gen);
2949                 if (processed_status == APEXIT_CHILDSICK
2950                     && old_gen == retained->my_generation) {
2951                     /* resource shortage, minimize the fork rate */
2952                     for (i = 0; i < num_buckets; i++) {
2953                         retained->idle_spawn_rate[i] = 1;
2954                     }
2955                 }
2956 #if APR_HAS_OTHER_CHILD
2957             }
2958             else if (apr_proc_other_child_alert(&pid, APR_OC_REASON_DEATH,
2959                                                 status) == 0) {
2960                 /* handled */
2961 #endif
2962             }
2963             else if (retained->is_graceful) {
2964                 /* Great, we've probably just lost a slot in the
2965                  * scoreboard.  Somehow we don't know about this child.
2966                  */
2967                 ap_log_error(APLOG_MARK, APLOG_WARNING, 0,
2968                              ap_server_conf, APLOGNO(00488)
2969                              "long lost child came home! (pid %ld)",
2970                              (long) pid.pid);
2971             }
2972             /* Don't perform idle maintenance when a child dies,
2973              * only do it when there's a timeout.  Remember only a
2974              * finite number of children can die, and it's pretty
2975              * pathological for a lot to die suddenly.
2976              */
2977             continue;
2978         }
2979         else if (remaining_children_to_start) {
2980             /* we hit a 1 second timeout in which none of the previous
2981              * generation of children needed to be reaped... so assume
2982              * they're all done, and pick up the slack if any is left.
2983              */
2984             startup_children(remaining_children_to_start);
2985             remaining_children_to_start = 0;
2986             /* In any event we really shouldn't do the code below because
2987              * few of the servers we just started are in the IDLE state
2988              * yet, so we'd mistakenly create an extra server.
2989              */
2990             continue;
2991         }
2992
2993         for (i = 0; i < num_buckets; i++) {
2994             perform_idle_server_maintenance(i);
2995         }
2996     }
2997 }
2998
2999 static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
3000 {
3001     int remaining_children_to_start;
3002
3003     int i;
3004
3005     ap_log_pid(pconf, ap_pid_fname);
3006
3007     if (!retained->is_graceful) {
3008         if (ap_run_pre_mpm(s->process->pool, SB_SHARED) != OK) {
3009             mpm_state = AP_MPMQ_STOPPING;
3010             return DONE;
3011         }
3012         /* fix the generation number in the global score; we just got a new,
3013          * cleared scoreboard
3014          */
3015         ap_scoreboard_image->global->running_generation = retained->my_generation;
3016     }
3017
3018     bucket = apr_palloc(_pconf, sizeof(int) *  ap_daemons_limit);
3019
3020     restart_pending = shutdown_pending = 0;
3021     set_signals();
3022     /* Don't thrash... */
3023     if (max_spare_threads < min_spare_threads + threads_per_child * num_buckets)
3024         max_spare_threads = min_spare_threads + threads_per_child * num_buckets;
3025
3026     /* If we're doing a graceful_restart then we're going to see a lot
3027      * of children exiting immediately when we get into the main loop
3028      * below (because we just sent them AP_SIG_GRACEFUL).  This happens pretty
3029      * rapidly... and for each one that exits we may start a new one, until
3030      * there are at least min_spare_threads idle threads, counting across
3031      * all children.  But we may be permitted to start more children than
3032      * that, so we'll just keep track of how many we're
3033      * supposed to start up without the 1 second penalty between each fork.
3034      */
3035     remaining_children_to_start = ap_daemons_to_start;
3036     if (remaining_children_to_start > ap_daemons_limit) {
3037         remaining_children_to_start = ap_daemons_limit;
3038     }
3039     if (!retained->is_graceful) {
3040         startup_children(remaining_children_to_start);
3041         remaining_children_to_start = 0;
3042     }
3043     else {
3044         /* give the system some time to recover before kicking into
3045          * exponential mode */
3046         retained->hold_off_on_exponential_spawning = 10;
3047     }
3048
3049     ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00489)
3050                  "%s configured -- resuming normal operations",
3051                  ap_get_server_description());
3052     ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, APLOGNO(00490)
3053                  "Server built: %s", ap_get_server_built());
3054     ap_log_command_line(plog, s);
3055     ap_log_common(s);
3056
3057     mpm_state = AP_MPMQ_RUNNING;
3058
3059     server_main_loop(remaining_children_to_start);
3060     mpm_state = AP_MPMQ_STOPPING;
3061
3062     if (shutdown_pending && !retained->is_graceful) {
3063         /* Time to shut down:
3064          * Kill child processes, tell them to call child_exit, etc...
3065          */
3066         for (i = 0; i < num_buckets; i++) {
3067             ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_RESTART);
3068         }
3069         ap_reclaim_child_processes(1, /* Start with SIGTERM */
3070                                    event_note_child_killed);
3071
3072         if (!child_fatal) {
3073             /* cleanup pid file on normal shutdown */
3074             ap_remove_pid(pconf, ap_pid_fname);
3075             ap_log_error(APLOG_MARK, APLOG_NOTICE, 0,
3076                          ap_server_conf, APLOGNO(00491) "caught SIGTERM, shutting down");
3077         }
3078         return DONE;
3079     } else if (shutdown_pending) {
3080         /* Time to gracefully shut down:
3081          * Kill child processes, tell them to call child_exit, etc...
3082          */
3083         int active_children;
3084         int index;
3085         apr_time_t cutoff = 0;
3086
3087         /* Close our listeners, and then ask our children to do same */
3088         ap_close_listeners();
3089         for (i = 0; i < num_buckets; i++) {
3090             ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_GRACEFUL);
3091         }
3092         ap_relieve_child_processes(event_note_child_killed);
3093
3094         if (!child_fatal) {
3095             /* cleanup pid file on normal shutdown */
3096             ap_remove_pid(pconf, ap_pid_fname);
3097             ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00492)
3098                          "caught " AP_SIG_GRACEFUL_STOP_STRING
3099                          ", shutting down gracefully");
3100         }
3101
3102         if (ap_graceful_shutdown_timeout) {
3103             cutoff = apr_time_now() +
3104                      apr_time_from_sec(ap_graceful_shutdown_timeout);
3105         }
3106
3107         /* Don't really exit until each child has finished */
3108         shutdown_pending = 0;
3109         do {
3110             /* Pause for a second */
3111             apr_sleep(apr_time_from_sec(1));
3112
3113             /* Relieve any children which have now exited */
3114             ap_relieve_child_processes(event_note_child_killed);
3115
3116             active_children = 0;
3117             for (index = 0; index < ap_daemons_limit; ++index) {
3118                 if (ap_mpm_safe_kill(MPM_CHILD_PID(index), 0) == APR_SUCCESS) {
3119                     active_children = 1;
3120                     /* Having just one child is enough to stay around */
3121                     break;
3122                 }
3123             }
3124         } while (!shutdown_pending && active_children &&
3125                  (!ap_graceful_shutdown_timeout || apr_time_now() < cutoff));
3126
3127         /* We might be here because we received SIGTERM, either
3128          * way, try and make sure that all of our processes are
3129          * really dead.
3130          */
3131         for (i = 0; i < num_buckets; i++) {
3132             ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_RESTART);
3133         }
3134         ap_reclaim_child_processes(1, event_note_child_killed);
3135
3136         return DONE;
3137     }
3138
3139     /* we've been told to restart */
3140     apr_signal(SIGHUP, SIG_IGN);
3141
3142     if (one_process) {
3143         /* not worth thinking about */
3144         return DONE;
3145     }
3146
3147     /* advance to the next generation */
3148     /* XXX: we really need to make sure this new generation number isn't in
3149      * use by any of the children.
3150      */
3151     ++retained->my_generation;
3152     ap_scoreboard_image->global->running_generation = retained->my_generation;
3153
3154     if (retained->is_graceful) {
3155         ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00493)
3156                      AP_SIG_GRACEFUL_STRING
3157                      " received.  Doing graceful restart");
3158         /* wake up the children...time to die.  But we'll have more soon */
3159         for (i = 0; i < num_buckets; i++) {
3160             ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_GRACEFUL);
3161         }
3162
3163         /* This is mostly for debugging... so that we know what is still
3164          * gracefully dealing with existing request.
3165          */
3166
3167     }
3168     else {
3169         /* Kill 'em all.  Since the child acts the same on the parents SIGTERM
3170          * and a SIGHUP, we may as well use the same signal, because some user
3171          * pthreads are stealing signals from us left and right.
3172          */
3173         for (i = 0; i < num_buckets; i++) {
3174             ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_RESTART);
3175         }
3176
3177         ap_reclaim_child_processes(1,  /* Start with SIGTERM */
3178                                    event_note_child_killed);
3179         ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00494)
3180                      "SIGHUP received.  Attempting to restart");
3181     }
3182
3183     return OK;
3184 }
3185
3186 /* This really should be a post_config hook, but the error log is already
3187  * redirected by that point, so we need to do this in the open_logs phase.
3188  */
3189 static int event_open_logs(apr_pool_t * p, apr_pool_t * plog,
3190                            apr_pool_t * ptemp, server_rec * s)
3191 {
3192     int startup = 0;
3193     int level_flags = 0;
3194     apr_status_t rv;
3195     int i;
3196     int num_of_cores = 0;
3197
3198     pconf = p;
3199
3200     /* the reverse of pre_config, we want this only the first time around */
3201     if (retained->module_loads == 1) {
3202         startup = 1;
3203         level_flags |= APLOG_STARTUP;
3204     }
3205
3206     enable_default_listener = 0;
3207
3208     if ((num_listensocks = ap_setup_listeners(ap_server_conf)) < 1) {
3209         ap_log_error(APLOG_MARK, APLOG_ALERT | level_flags, 0,
3210                      (startup ? NULL : s),
3211                      "no listening sockets available, shutting down");
3212         return DONE;
3213     }
3214
3215     enable_default_listener = 1;
3216     if (have_so_reuseport) {
3217 #ifdef _SC_NPROCESSORS_ONLN
3218         num_of_cores = sysconf(_SC_NPROCESSORS_ONLN);
3219 #else
3220         num_of_cores = 1;
3221 #endif
3222         if (num_of_cores > 8) {
3223             num_buckets = num_of_cores/8;
3224         }
3225         else {
3226             num_buckets = 1;
3227         }
3228     }
3229     else {
3230         num_buckets = 1;
3231     }
3232
3233     ap_duplicate_listeners(ap_server_conf, pconf, num_buckets);
3234
3235     pod = apr_palloc(pconf, sizeof(ap_pod_t *) * num_buckets);
3236
3237     if (!one_process) {
3238         for (i = 0; i < num_buckets; i++) {
3239             if ((rv = ap_mpm_podx_open(pconf, &pod[i]))) {
3240                 ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
3241                              (startup ? NULL : s),
3242                              "could not open pipe-of-death");
3243                 return DONE;
3244             }
3245         }
3246     }
3247     /* for skiplist */
3248     srand((unsigned int)apr_time_now());
3249     return OK;
3250 }
3251
3252 static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
3253                             apr_pool_t * ptemp)
3254 {
3255     int no_detach, debug, foreground;
3256     apr_status_t rv;
3257     const char *userdata_key = "mpm_event_module";
3258     int i;
3259
3260     mpm_state = AP_MPMQ_STARTING;
3261
3262     debug = ap_exists_config_define("DEBUG");
3263
3264     if (debug) {
3265         foreground = one_process = 1;
3266         no_detach = 0;
3267     }
3268     else {
3269         one_process = ap_exists_config_define("ONE_PROCESS");
3270         no_detach = ap_exists_config_define("NO_DETACH");
3271         foreground = ap_exists_config_define("FOREGROUND");
3272     }
3273
3274     /* sigh, want this only the second time around */
3275     retained = ap_retained_data_get(userdata_key);
3276     if (!retained) {
3277         retained = ap_retained_data_create(userdata_key, sizeof(*retained));
3278         retained->max_daemons_limit = -1;
3279     }
3280     ++retained->module_loads;
3281     if (retained->module_loads == 2) {
3282         /* test for correct operation of fdqueue */
3283         static apr_uint32_t foo1, foo2;
3284
3285         apr_atomic_set32(&foo1, 100);
3286         foo2 = apr_atomic_add32(&foo1, -10);
3287         if (foo2 != 100 || foo1 != 90) {
3288             ap_log_error(APLOG_MARK, APLOG_CRIT, 0, NULL, APLOGNO(02405)
3289                          "atomics not working as expected - add32 of negative number");
3290             return HTTP_INTERNAL_SERVER_ERROR;
3291         }
3292         retained->idle_spawn_rate = apr_palloc(pconf, sizeof(int) * num_buckets);
3293         for (i = 0; i< num_buckets; i++) {
3294             retained->idle_spawn_rate[i] = 1;
3295         }
3296         rv = apr_pollset_create(&event_pollset, 1, plog,
3297                                 APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY);
3298         if (rv != APR_SUCCESS) {
3299             ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL, APLOGNO(00495)
3300                          "Couldn't create a Thread Safe Pollset. "
3301                          "Is it supported on your platform?"
3302                          "Also check system or user limits!");
3303             return HTTP_INTERNAL_SERVER_ERROR;
3304         }
3305         apr_pollset_destroy(event_pollset);
3306
3307         if (!one_process && !foreground) {
3308             /* before we detach, setup crash handlers to log to errorlog */
3309             ap_fatal_signal_setup(ap_server_conf, pconf);
3310             rv = apr_proc_detach(no_detach ? APR_PROC_DETACH_FOREGROUND
3311                                  : APR_PROC_DETACH_DAEMONIZE);
3312             if (rv != APR_SUCCESS) {
3313                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL, APLOGNO(00496)
3314                              "apr_proc_detach failed");
3315                 return HTTP_INTERNAL_SERVER_ERROR;
3316             }
3317         }
3318     }
3319
3320     parent_pid = ap_my_pid = getpid();
3321
3322     ap_listen_pre_config();
3323     ap_daemons_to_start = DEFAULT_START_DAEMON;
3324     min_spare_threads = DEFAULT_MIN_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD;
3325     max_spare_threads = DEFAULT_MAX_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD;
3326     server_limit = DEFAULT_SERVER_LIMIT;
3327     thread_limit = DEFAULT_THREAD_LIMIT;
3328     ap_daemons_limit = server_limit;
3329     threads_per_child = DEFAULT_THREADS_PER_CHILD;
3330     max_workers = ap_daemons_limit * threads_per_child;
3331     had_healthy_child = 0;
3332     ap_extended_status = 0;
3333
3334     return OK;
3335 }
3336
3337 static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
3338                               apr_pool_t *ptemp, server_rec *s)
3339 {
3340     int startup = 0;
3341
3342     /* the reverse of pre_config, we want this only the first time around */
3343     if (retained->module_loads == 1) {
3344         startup = 1;
3345     }
3346
3347     if (server_limit > MAX_SERVER_LIMIT) {
3348         if (startup) {
3349             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00497)
3350                          "WARNING: ServerLimit of %d exceeds compile-time "
3351                          "limit of", server_limit);
3352             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3353                          " %d servers, decreasing to %d.",
3354                          MAX_SERVER_LIMIT, MAX_SERVER_LIMIT);
3355         } else {
3356             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00498)
3357                          "ServerLimit of %d exceeds compile-time limit "
3358                          "of %d, decreasing to match",
3359                          server_limit, MAX_SERVER_LIMIT);
3360         }
3361         server_limit = MAX_SERVER_LIMIT;
3362     }
3363     else if (server_limit < 1) {
3364         if (startup) {
3365             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00499)
3366                          "WARNING: ServerLimit of %d not allowed, "
3367                          "increasing to 1.", server_limit);
3368         } else {
3369             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00500)
3370                          "ServerLimit of %d not allowed, increasing to 1",
3371                          server_limit);
3372         }
3373         server_limit = 1;
3374     }
3375
3376     /* you cannot change ServerLimit across a restart; ignore
3377      * any such attempts
3378      */
3379     if (!retained->first_server_limit) {
3380         retained->first_server_limit = server_limit;
3381     }
3382     else if (server_limit != retained->first_server_limit) {
3383         /* don't need a startup console version here */
3384         ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00501)
3385                      "changing ServerLimit to %d from original value of %d "
3386                      "not allowed during restart",
3387                      server_limit, retained->first_server_limit);
3388         server_limit = retained->first_server_limit;
3389     }
3390
3391     if (thread_limit > MAX_THREAD_LIMIT) {
3392         if (startup) {
3393             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00502)
3394                          "WARNING: ThreadLimit of %d exceeds compile-time "
3395                          "limit of", thread_limit);
3396             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3397                          " %d threads, decreasing to %d.",
3398                          MAX_THREAD_LIMIT, MAX_THREAD_LIMIT);
3399         } else {
3400             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00503)
3401                          "ThreadLimit of %d exceeds compile-time limit "
3402                          "of %d, decreasing to match",
3403                          thread_limit, MAX_THREAD_LIMIT);
3404         }
3405         thread_limit = MAX_THREAD_LIMIT;
3406     }
3407     else if (thread_limit < 1) {
3408         if (startup) {
3409             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00504)
3410                          "WARNING: ThreadLimit of %d not allowed, "
3411                          "increasing to 1.", thread_limit);
3412         } else {
3413             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00505)
3414                          "ThreadLimit of %d not allowed, increasing to 1",
3415                          thread_limit);
3416         }
3417         thread_limit = 1;
3418     }
3419
3420     /* you cannot change ThreadLimit across a restart; ignore
3421      * any such attempts
3422      */
3423     if (!retained->first_thread_limit) {
3424         retained->first_thread_limit = thread_limit;
3425     }
3426     else if (thread_limit != retained->first_thread_limit) {
3427         /* don't need a startup console version here */
3428         ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00506)
3429                      "changing ThreadLimit to %d from original value of %d "
3430                      "not allowed during restart",
3431                      thread_limit, retained->first_thread_limit);
3432         thread_limit = retained->first_thread_limit;
3433     }
3434
3435     if (threads_per_child > thread_limit) {
3436         if (startup) {
3437             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00507)
3438                          "WARNING: ThreadsPerChild of %d exceeds ThreadLimit "
3439                          "of", threads_per_child);
3440             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3441                          " %d threads, decreasing to %d.",
3442                          thread_limit, thread_limit);
3443             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3444                          " To increase, please see the ThreadLimit "
3445                          "directive.");
3446         } else {
3447             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00508)
3448                          "ThreadsPerChild of %d exceeds ThreadLimit "
3449                          "of %d, decreasing to match",
3450                          threads_per_child, thread_limit);
3451         }
3452         threads_per_child = thread_limit;
3453     }
3454     else if (threads_per_child < 1) {
3455         if (startup) {
3456             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00509)
3457                          "WARNING: ThreadsPerChild of %d not allowed, "
3458                          "increasing to 1.", threads_per_child);
3459         } else {
3460             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00510)
3461                          "ThreadsPerChild of %d not allowed, increasing to 1",
3462                          threads_per_child);
3463         }
3464         threads_per_child = 1;
3465     }
3466
3467     if (max_workers < threads_per_child) {
3468         if (startup) {
3469             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00511)
3470                          "WARNING: MaxRequestWorkers of %d is less than "
3471                          "ThreadsPerChild of", max_workers);
3472             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3473                          " %d, increasing to %d.  MaxRequestWorkers must be at "
3474                          "least as large",
3475                          threads_per_child, threads_per_child);
3476             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3477                          " as the number of threads in a single server.");
3478         } else {
3479             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00512)
3480                          "MaxRequestWorkers of %d is less than ThreadsPerChild "
3481                          "of %d, increasing to match",
3482                          max_workers, threads_per_child);
3483         }
3484         max_workers = threads_per_child;
3485     }
3486
3487     ap_daemons_limit = max_workers / threads_per_child;
3488
3489     if (max_workers % threads_per_child) {
3490         int tmp_max_workers = ap_daemons_limit * threads_per_child;
3491
3492         if (startup) {
3493             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00513)
3494                          "WARNING: MaxRequestWorkers of %d is not an integer "
3495                          "multiple of", max_workers);
3496             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3497                          " ThreadsPerChild of %d, decreasing to nearest "
3498                          "multiple %d,", threads_per_child,
3499                          tmp_max_workers);
3500             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3501                          " for a maximum of %d servers.",
3502                          ap_daemons_limit);
3503         } else {
3504             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00514)
3505                          "MaxRequestWorkers of %d is not an integer multiple "
3506                          "of ThreadsPerChild of %d, decreasing to nearest "
3507                          "multiple %d", max_workers, threads_per_child,
3508                          tmp_max_workers);
3509         }
3510         max_workers = tmp_max_workers;
3511     }
3512
3513     if (ap_daemons_limit > server_limit) {
3514         if (startup) {
3515             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00515)
3516                          "WARNING: MaxRequestWorkers of %d would require %d "
3517                          "servers and ", max_workers, ap_daemons_limit);
3518             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3519                          " would exceed ServerLimit of %d, decreasing to %d.",
3520                          server_limit, server_limit * threads_per_child);
3521             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3522                          " To increase, please see the ServerLimit "
3523                          "directive.");
3524         } else {
3525             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00516)
3526                          "MaxRequestWorkers of %d would require %d servers and "
3527                          "exceed ServerLimit of %d, decreasing to %d",
3528                          max_workers, ap_daemons_limit, server_limit,
3529                          server_limit * threads_per_child);
3530         }
3531         ap_daemons_limit = server_limit;
3532     }
3533
3534     /* ap_daemons_to_start > ap_daemons_limit checked in ap_mpm_run() */
3535     if (ap_daemons_to_start < 0) {
3536         if (startup) {
3537             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00517)
3538                          "WARNING: StartServers of %d not allowed, "
3539                          "increasing to 1.", ap_daemons_to_start);
3540         } else {
3541             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00518)
3542                          "StartServers of %d not allowed, increasing to 1",
3543                          ap_daemons_to_start);
3544         }
3545         ap_daemons_to_start = 1;
3546     }
3547
3548     if (min_spare_threads < 1) {
3549         if (startup) {
3550             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00519)
3551                          "WARNING: MinSpareThreads of %d not allowed, "
3552                          "increasing to 1", min_spare_threads);
3553             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3554                          " to avoid almost certain server failure.");
3555             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3556                          " Please read the documentation.");
3557         } else {
3558             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00520)
3559                          "MinSpareThreads of %d not allowed, increasing to 1",
3560                          min_spare_threads);
3561         }
3562         min_spare_threads = 1;
3563     }
3564
3565     /* max_spare_threads < min_spare_threads + threads_per_child
3566      * checked in ap_mpm_run()
3567      */
3568
3569     return OK;
3570 }
3571
3572 static void event_hooks(apr_pool_t * p)
3573 {
3574     /* Our open_logs hook function must run before the core's, or stderr
3575      * will be redirected to a file, and the messages won't print to the
3576      * console.
3577      */
3578     static const char *const aszSucc[] = { "core.c", NULL };
3579     one_process = 0;
3580     ap_force_set_tz(p);
3581
3582     ap_hook_open_logs(event_open_logs, NULL, aszSucc, APR_HOOK_REALLY_FIRST);
3583     /* we need to set the MPM state before other pre-config hooks use MPM query
3584      * to retrieve it, so register as REALLY_FIRST
3585      */
3586     ap_hook_pre_config(event_pre_config, NULL, NULL, APR_HOOK_REALLY_FIRST);
3587     ap_hook_check_config(event_check_config, NULL, NULL, APR_HOOK_MIDDLE);
3588     ap_hook_mpm(event_run, NULL, NULL, APR_HOOK_MIDDLE);
3589     ap_hook_mpm_query(event_query, NULL, NULL, APR_HOOK_MIDDLE);
3590     ap_hook_mpm_register_timed_callback(event_register_timed_callback, NULL, NULL,
3591                                         APR_HOOK_MIDDLE);
3592     ap_hook_mpm_register_socket_callback(event_register_socket_callback, NULL, NULL,
3593                                         APR_HOOK_MIDDLE);
3594     ap_hook_mpm_register_socket_callback_timeout(event_register_socket_callback_ex, NULL, NULL,
3595                                         APR_HOOK_MIDDLE);
3596     ap_hook_mpm_unregister_socket_callback(event_unregister_socket_callback, NULL, NULL,
3597                                         APR_HOOK_MIDDLE);
3598     ap_hook_pre_read_request(event_pre_read_request, NULL, NULL, APR_HOOK_MIDDLE);
3599     ap_hook_mpm_get_name(event_get_name, NULL, NULL, APR_HOOK_MIDDLE);
3600     ap_hook_mpm_resume_suspended(event_resume_suspended, NULL, NULL, APR_HOOK_MIDDLE);
3601 }
3602
3603 static const char *set_daemons_to_start(cmd_parms *cmd, void *dummy,
3604                                         const char *arg)
3605 {
3606     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3607     if (err != NULL) {
3608         return err;
3609     }
3610
3611     ap_daemons_to_start = atoi(arg);
3612     return NULL;
3613 }
3614
3615 static const char *set_min_spare_threads(cmd_parms * cmd, void *dummy,
3616                                          const char *arg)
3617 {
3618     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3619     if (err != NULL) {
3620         return err;
3621     }
3622
3623     min_spare_threads = atoi(arg);
3624     return NULL;
3625 }
3626
3627 static const char *set_max_spare_threads(cmd_parms * cmd, void *dummy,
3628                                          const char *arg)
3629 {
3630     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3631     if (err != NULL) {
3632         return err;
3633     }
3634
3635     max_spare_threads = atoi(arg);
3636     return NULL;
3637 }
3638
3639 static const char *set_max_workers(cmd_parms * cmd, void *dummy,
3640                                    const char *arg)
3641 {
3642     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3643     if (err != NULL) {
3644         return err;
3645     }
3646     if (!strcasecmp(cmd->cmd->name, "MaxClients")) {
3647         ap_log_error(APLOG_MARK, APLOG_INFO, 0, NULL, APLOGNO(00521)
3648                      "MaxClients is deprecated, use MaxRequestWorkers "
3649                      "instead.");
3650     }
3651     max_workers = atoi(arg);
3652     return NULL;
3653 }
3654
3655 static const char *set_threads_per_child(cmd_parms * cmd, void *dummy,
3656                                          const char *arg)
3657 {
3658     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3659     if (err != NULL) {
3660         return err;
3661     }
3662
3663     threads_per_child = atoi(arg);
3664     return NULL;
3665 }
3666 static const char *set_server_limit (cmd_parms *cmd, void *dummy, const char *arg)
3667 {
3668     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3669     if (err != NULL) {
3670         return err;
3671     }
3672
3673     server_limit = atoi(arg);
3674     return NULL;
3675 }
3676
3677 static const char *set_thread_limit(cmd_parms * cmd, void *dummy,
3678                                     const char *arg)
3679 {
3680     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3681     if (err != NULL) {
3682         return err;
3683     }
3684
3685     thread_limit = atoi(arg);
3686     return NULL;
3687 }
3688
3689 static const char *set_worker_factor(cmd_parms * cmd, void *dummy,
3690                                      const char *arg)
3691 {
3692     double val;
3693     char *endptr;
3694     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3695     if (err != NULL) {
3696         return err;
3697     }
3698
3699     val = strtod(arg, &endptr);
3700     if (*endptr)
3701         return "error parsing value";
3702
3703     if (val <= 0)
3704         return "AsyncRequestWorkerFactor argument must be a positive number";
3705
3706     worker_factor = val * WORKER_FACTOR_SCALE;
3707     if (worker_factor == 0)
3708         worker_factor = 1;
3709     return NULL;
3710 }
3711
3712
3713 static const command_rec event_cmds[] = {
3714     LISTEN_COMMANDS,
3715     AP_INIT_TAKE1("StartServers", set_daemons_to_start, NULL, RSRC_CONF,
3716                   "Number of child processes launched at server startup"),
3717     AP_INIT_TAKE1("ServerLimit", set_server_limit, NULL, RSRC_CONF,
3718                   "Maximum number of child processes for this run of Apache"),
3719     AP_INIT_TAKE1("MinSpareThreads", set_min_spare_threads, NULL, RSRC_CONF,
3720                   "Minimum number of idle threads, to handle request spikes"),
3721     AP_INIT_TAKE1("MaxSpareThreads", set_max_spare_threads, NULL, RSRC_CONF,
3722                   "Maximum number of idle threads"),
3723     AP_INIT_TAKE1("MaxClients", set_max_workers, NULL, RSRC_CONF,
3724                   "Deprecated name of MaxRequestWorkers"),
3725     AP_INIT_TAKE1("MaxRequestWorkers", set_max_workers, NULL, RSRC_CONF,
3726                   "Maximum number of threads alive at the same time"),
3727     AP_INIT_TAKE1("ThreadsPerChild", set_threads_per_child, NULL, RSRC_CONF,
3728                   "Number of threads each child creates"),
3729     AP_INIT_TAKE1("ThreadLimit", set_thread_limit, NULL, RSRC_CONF,
3730                   "Maximum number of worker threads per child process for this "
3731                   "run of Apache - Upper limit for ThreadsPerChild"),
3732     AP_INIT_TAKE1("AsyncRequestWorkerFactor", set_worker_factor, NULL, RSRC_CONF,
3733                   "How many additional connects will be accepted per idle "
3734                   "worker thread"),
3735     AP_GRACEFUL_SHUTDOWN_TIMEOUT_COMMAND,
3736     {NULL}
3737 };
3738
3739 AP_DECLARE_MODULE(mpm_event) = {
3740     MPM20_MODULE_STUFF,
3741     NULL,                       /* hook to run before apache parses args */
3742     NULL,                       /* create per-directory config structure */
3743     NULL,                       /* merge per-directory config structures */
3744     NULL,                       /* create per-server config structure */
3745     NULL,                       /* merge per-server config structures */
3746     event_cmds,                 /* command apr_table_t */
3747     event_hooks                 /* register_hooks */
3748 };