granicus.if.org Git - apache/blob - server/mpm/event/event.c

   1 /* Licensed to the Apache Software Foundation (ASF) under one or more
   2  * contributor license agreements.  See the NOTICE file distributed with
   3  * this work for additional information regarding copyright ownership.
   4  * The ASF licenses this file to You under the Apache License, Version 2.0
   5  * (the "License"); you may not use this file except in compliance with
   6  * the License.  You may obtain a copy of the License at
   7  *
   8  *     http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /**
  18  * This MPM tries to fix the 'keep alive problem' in HTTP.
  19  *
  20  * After a client completes the first request, the client can keep the
  21  * connection open to send more requests with the same socket.  This can save
  22  * signifigant overhead in creating TCP connections.  However, the major
  23  * disadvantage is that Apache traditionally keeps an entire child
  24  * process/thread waiting for data from the client.  To solve this problem,
  25  * this MPM has a dedicated thread for handling both the Listenting sockets,
  26  * and all sockets that are in a Keep Alive status.
  27  *
  28  * The MPM assumes the underlying apr_pollset implementation is somewhat
  29  * threadsafe.  This currently is only compatible with KQueue and EPoll.  This
  30  * enables the MPM to avoid extra high level locking or having to wake up the
  31  * listener thread when a keep-alive socket needs to be sent to it.
  32  *
  33  * This MPM not preform well on older platforms that do not have very good
  34  * threading, like Linux with a 2.4 kernel, but this does not matter, since we
  35  * require EPoll or KQueue.
  36  *
  37  * For FreeBSD, use 5.3.  It is possible to run this MPM on FreeBSD 5.2.1, if
  38  * you use libkse (see `man libmap.conf`).
  39  *
  40  * For NetBSD, use at least 2.0.
  41  *
  42  * For Linux, you should use a 2.6 kernel, and make sure your glibc has epoll
  43  * support compiled in.
  44  *
  45  */
  46
  47 #include "apr.h"
  48 #include "apr_portable.h"
  49 #include "apr_strings.h"
  50 #include "apr_file_io.h"
  51 #include "apr_thread_proc.h"
  52 #include "apr_signal.h"
  53 #include "apr_thread_mutex.h"
  54 #include "apr_poll.h"
  55 #include "apr_ring.h"
  56 #include "apr_queue.h"
  57 #include "apr_atomic.h"
  58 #define APR_WANT_STRFUNC
  59 #include "apr_want.h"
  60 #include "apr_version.h"
  61
  62 #if APR_HAVE_UNISTD_H
  63 #include <unistd.h>
  64 #endif
  65 #if APR_HAVE_SYS_SOCKET_H
  66 #include <sys/socket.h>
  67 #endif
  68 #if APR_HAVE_SYS_WAIT_H
  69 #include <sys/wait.h>
  70 #endif
  71 #ifdef HAVE_SYS_PROCESSOR_H
  72 #include <sys/processor.h>      /* for bindprocessor() */
  73 #endif
  74
  75 #if !APR_HAS_THREADS
  76 #error The Event MPM requires APR threads, but they are unavailable.
  77 #endif
  78
  79 #include "ap_config.h"
  80 #include "httpd.h"
  81 #include "http_main.h"
  82 #include "http_log.h"
  83 #include "http_config.h"        /* for read_config */
  84 #include "http_core.h"          /* for get_remote_host */
  85 #include "http_connection.h"
  86 #include "ap_mpm.h"
  87 #include "pod.h"
  88 #include "mpm_common.h"
  89 #include "ap_listen.h"
  90 #include "scoreboard.h"
  91 #include "fdqueue.h"
  92 #include "mpm_default.h"
  93 #include "http_vhost.h"
  94 #include "unixd.h"
  95
  96 #include <signal.h>
  97 #include <limits.h>             /* for INT_MAX */
  98
  99
 100 #include "equeue.h"
 101
 102 #if HAVE_SERF
 103 #include "mod_serf.h"
 104 #include "serf.h"
 105 #endif
 106
 107 /* Limit on the total --- clients will be locked out if more servers than
 108  * this are needed.  It is intended solely to keep the server from crashing
 109  * when things get out of hand.
 110  *
 111  * We keep a hard maximum number of servers, for two reasons --- first off,
 112  * in case something goes seriously wrong, we want to stop the fork bomb
 113  * short of actually crashing the machine we're running on by filling some
 114  * kernel table.  Secondly, it keeps the size of the scoreboard file small
 115  * enough that we can read the whole thing without worrying too much about
 116  * the overhead.
 117  */
 118 #ifndef DEFAULT_SERVER_LIMIT
 119 #define DEFAULT_SERVER_LIMIT 16
 120 #endif
 121
 122 /* Admin can't tune ServerLimit beyond MAX_SERVER_LIMIT.  We want
 123  * some sort of compile-time limit to help catch typos.
 124  */
 125 #ifndef MAX_SERVER_LIMIT
 126 #define MAX_SERVER_LIMIT 20000
 127 #endif
 128
 129 /* Limit on the threads per process.  Clients will be locked out if more than
 130  * this are needed.
 131  *
 132  * We keep this for one reason it keeps the size of the scoreboard file small
 133  * enough that we can read the whole thing without worrying too much about
 134  * the overhead.
 135  */
 136 #ifndef DEFAULT_THREAD_LIMIT
 137 #define DEFAULT_THREAD_LIMIT 64
 138 #endif
 139
 140 /* Admin can't tune ThreadLimit beyond MAX_THREAD_LIMIT.  We want
 141  * some sort of compile-time limit to help catch typos.
 142  */
 143 #ifndef MAX_THREAD_LIMIT
 144 #define MAX_THREAD_LIMIT 100000
 145 #endif
 146
 147 #define MPM_CHILD_PID(i) (ap_scoreboard_image->parent[i].pid)
 148
 149 #if !APR_VERSION_AT_LEAST(1,4,0)
 150 #define apr_time_from_msec(x) (x * 1000)
 151 #endif
 152
 153 #ifndef MAX_SECS_TO_LINGER
 154 #define MAX_SECS_TO_LINGER 30
 155 #endif
 156 #define SECONDS_TO_LINGER  2
 157
 158 /*
 159  * Actual definitions of config globals
 160  */
 161
 162 #ifndef DEFAULT_WORKER_FACTOR
 163 #define DEFAULT_WORKER_FACTOR 2
 164 #endif
 165 #define WORKER_FACTOR_SCALE   16  /* scale factor to allow fractional values */
 166 static unsigned int worker_factor = DEFAULT_WORKER_FACTOR * WORKER_FACTOR_SCALE;
 167
 168 static int threads_per_child = 0;   /* Worker threads per child */
 169 static int ap_daemons_to_start = 0;
 170 static int min_spare_threads = 0;
 171 static int max_spare_threads = 0;
 172 static int ap_daemons_limit = 0;
 173 static int max_workers = 0;
 174 static int server_limit = 0;
 175 static int thread_limit = 0;
 176 static int dying = 0;
 177 static int workers_may_exit = 0;
 178 static int start_thread_may_exit = 0;
 179 static int listener_may_exit = 0;
 180 static int requests_this_child;
 181 static int num_listensocks = 0;
 182 static apr_uint32_t connection_count = 0;
 183 static int resource_shortage = 0;
 184 static fd_queue_t *worker_queue;
 185 static fd_queue_info_t *worker_queue_info;
 186 static int mpm_state = AP_MPMQ_STARTING;
 187
 188 typedef enum {
 189     TIMEOUT_WRITE_COMPLETION,
 190     TIMEOUT_KEEPALIVE,
 191     TIMEOUT_LINGER,
 192     TIMEOUT_SHORT_LINGER
 193 } timeout_type_e;
 194
 195 typedef struct pollset_op_t {
 196     timeout_type_e timeout_type;
 197     conn_state_t *cs;
 198     const char *tag;
 199 } pollset_op_t;
 200
 201 APR_RING_HEAD(timeout_head_t, conn_state_t);
 202 struct timeout_queue {
 203     struct timeout_head_t head;
 204     int count;
 205     const char *tag;
 206 };
 207 /*
 208  * Several timeout queues that use different timeouts, so that we always can
 209  * simply append to the end.
 210  *   write_completion_q uses TimeOut
 211  *   keepalive_q        uses KeepAliveTimeOut
 212  *   linger_q           uses MAX_SECS_TO_LINGER
 213  *   short_linger_q     uses SECONDS_TO_LINGER
 214  */
 215 static struct timeout_queue write_completion_q, keepalive_q, linger_q,
 216                             short_linger_q;
 217 static apr_pollfd_t *listener_pollfd;
 218
 219 /*
 220  * Macros for accessing struct timeout_queue.
 221  * For TO_QUEUE_APPEND and TO_QUEUE_REMOVE, timeout_mutex must be held.
 222  */
 223 #define TO_QUEUE_APPEND(q, el)                                            \
 224     do {                                                                  \
 225         APR_RING_INSERT_TAIL(&(q).head, el, conn_state_t, timeout_list);  \
 226         (q).count++;                                                      \
 227     } while (0)
 228
 229 #define TO_QUEUE_REMOVE(q, el)             \
 230     do {                                   \
 231         APR_RING_REMOVE(el, timeout_list); \
 232         (q).count--;                       \
 233     } while (0)
 234
 235 #define TO_QUEUE_INIT(q)                                            \
 236     do {                                                            \
 237             APR_RING_INIT(&(q).head, conn_state_t, timeout_list);   \
 238             (q).tag = #q;                                           \
 239     } while (0)
 240
 241 #define TO_QUEUE_ELEM_INIT(el) APR_RING_ELEM_INIT(el, timeout_list)
 242
 243 /*
 244  * The pollset for sockets that are in any of the timeout queues. Currently
 245  * we use the timeout_mutex to make sure that connections are added/removed
 246  * atomically to/from both event_pollset and a timeout queue. Otherwise
 247  * some confusion can happen under high load if timeout queues and pollset
 248  * get out of sync.
 249  * XXX: It should be possible to make the lock unnecessary in many or even all
 250  * XXX: cases.
 251  */
 252 static apr_pollset_t *event_pollset;
 253
 254 #if HAVE_SERF
 255 typedef struct {
 256     apr_pollset_t *pollset;
 257     apr_pool_t *pool;
 258 } s_baton_t;
 259
 260 static serf_context_t *g_serf;
 261 #endif
 262
 263 /* The structure used to pass unique initialization info to each thread */
 264 typedef struct
 265 {
 266     int pid;
 267     int tid;
 268     int sd;
 269 } proc_info;
 270
 271 /* Structure used to pass information to the thread responsible for
 272  * creating the rest of the threads.
 273  */
 274 typedef struct
 275 {
 276     apr_thread_t **threads;
 277     apr_thread_t *listener;
 278     int child_num_arg;
 279     apr_threadattr_t *threadattr;
 280 } thread_starter;
 281
 282 typedef enum
 283 {
 284     PT_CSD,
 285     PT_ACCEPT
 286 #if HAVE_SERF
 287     , PT_SERF
 288 #endif
 289 } poll_type_e;
 290
 291 typedef struct
 292 {
 293     poll_type_e type;
 294     void *baton;
 295 } listener_poll_type;
 296
 297 /* data retained by event across load/unload of the module
 298  * allocated on first call to pre-config hook; located on
 299  * subsequent calls to pre-config hook
 300  */
 301 typedef struct event_retained_data {
 302     int first_server_limit;
 303     int first_thread_limit;
 304     int module_loads;
 305     int sick_child_detected;
 306     ap_generation_t my_generation;
 307     int volatile is_graceful; /* set from signal handler */
 308     int maxclients_reported;
 309     /*
 310      * The max child slot ever assigned, preserved across restarts.  Necessary
 311      * to deal with MaxRequestWorkers changes across AP_SIG_GRACEFUL restarts.
 312      * We use this value to optimize routines that have to scan the entire
 313      * scoreboard.
 314      */
 315     int max_daemons_limit;
 316     /*
 317      * idle_spawn_rate is the number of children that will be spawned on the
 318      * next maintenance cycle if there aren't enough idle servers.  It is
 319      * doubled up to MAX_SPAWN_RATE, and reset only when a cycle goes by
 320      * without the need to spawn.
 321      */
 322     int idle_spawn_rate;
 323 #ifndef MAX_SPAWN_RATE
 324 #define MAX_SPAWN_RATE        (32)
 325 #endif
 326     int hold_off_on_exponential_spawning;
 327 } event_retained_data;
 328 static event_retained_data *retained;
 329
 330 #define ID_FROM_CHILD_THREAD(c, t)    ((c * thread_limit) + t)
 331
 332 static ap_event_pod_t *pod;
 333
 334 /* The event MPM respects a couple of runtime flags that can aid
 335  * in debugging. Setting the -DNO_DETACH flag will prevent the root process
 336  * from detaching from its controlling terminal. Additionally, setting
 337  * the -DONE_PROCESS flag (which implies -DNO_DETACH) will get you the
 338  * child_main loop running in the process which originally started up.
 339  * This gives you a pretty nice debugging environment.  (You'll get a SIGHUP
 340  * early in standalone_main; just continue through.  This is the server
 341  * trying to kill off any child processes which it might have lying
 342  * around --- Apache doesn't keep track of their pids, it just sends
 343  * SIGHUP to the process group, ignoring it in the root process.
 344  * Continue through and you'll be fine.).
 345  */
 346
 347 static int one_process = 0;
 348
 349 #ifdef DEBUG_SIGSTOP
 350 int raise_sigstop_flags;
 351 #endif
 352
 353 static apr_pool_t *pconf;       /* Pool for config stuff */
 354 static apr_pool_t *pchild;      /* Pool for httpd child stuff */
 355
 356 static pid_t ap_my_pid;         /* Linux getpid() doesn't work except in main
 357                                    thread. Use this instead */
 358 static pid_t parent_pid;
 359 static apr_os_thread_t *listener_os_thread;
 360
 361 /* The LISTENER_SIGNAL signal will be sent from the main thread to the
 362  * listener thread to wake it up for graceful termination (what a child
 363  * process from an old generation does when the admin does "apachectl
 364  * graceful").  This signal will be blocked in all threads of a child
 365  * process except for the listener thread.
 366  */
 367 #define LISTENER_SIGNAL     SIGHUP
 368
 369 /* An array of socket descriptors in use by each thread used to
 370  * perform a non-graceful (forced) shutdown of the server.
 371  */
 372 static apr_socket_t **worker_sockets;
 373 static ap_equeue_t **worker_equeues;
 374
 375 static void disable_listensocks(int process_slot)
 376 {
 377     int i;
 378     for (i = 0; i < num_listensocks; i++) {
 379         apr_pollset_remove(event_pollset, &listener_pollfd[i]);
 380     }
 381     ap_scoreboard_image->parent[process_slot].not_accepting = 1;
 382 }
 383
 384 static void enable_listensocks(int process_slot)
 385 {
 386     int i;
 387     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
 388                  "Accepting new connections again: "
 389                  "%u active conns, %u idle workers",
 390                  apr_atomic_read32(&connection_count),
 391                  ap_queue_info_get_idlers(worker_queue_info));
 392     for (i = 0; i < num_listensocks; i++)
 393         apr_pollset_add(event_pollset, &listener_pollfd[i]);
 394     /*
 395      * XXX: This is not yet optimal. If many workers suddenly become available,
 396      * XXX: the parent may kill some processes off too soon.
 397      */
 398     ap_scoreboard_image->parent[process_slot].not_accepting = 0;
 399 }
 400
 401 static void close_worker_sockets(void)
 402 {
 403     int i;
 404     for (i = 0; i < threads_per_child; i++) {
 405         if (worker_sockets[i]) {
 406             apr_socket_close(worker_sockets[i]);
 407             worker_sockets[i] = NULL;
 408         }
 409     }
 410 }
 411
 412 static void wakeup_listener(void)
 413 {
 414     listener_may_exit = 1;
 415     if (!listener_os_thread) {
 416         /* XXX there is an obscure path that this doesn't handle perfectly:
 417          *     right after listener thread is created but before
 418          *     listener_os_thread is set, the first worker thread hits an
 419          *     error and starts graceful termination
 420          */
 421         return;
 422     }
 423
 424     /* unblock the listener if it's waiting for a worker */
 425     ap_queue_info_term(worker_queue_info);
 426
 427     /*
 428      * we should just be able to "kill(ap_my_pid, LISTENER_SIGNAL)" on all
 429      * platforms and wake up the listener thread since it is the only thread
 430      * with SIGHUP unblocked, but that doesn't work on Linux
 431      */
 432 #ifdef HAVE_PTHREAD_KILL
 433     pthread_kill(*listener_os_thread, LISTENER_SIGNAL);
 434 #else
 435     kill(ap_my_pid, LISTENER_SIGNAL);
 436 #endif
 437 }
 438
 439 #define ST_INIT              0
 440 #define ST_GRACEFUL          1
 441 #define ST_UNGRACEFUL        2
 442
 443 static int terminate_mode = ST_INIT;
 444
 445 static void signal_threads(int mode)
 446 {
 447     if (terminate_mode == mode) {
 448         return;
 449     }
 450     terminate_mode = mode;
 451     mpm_state = AP_MPMQ_STOPPING;
 452
 453     /* in case we weren't called from the listener thread, wake up the
 454      * listener thread
 455      */
 456     wakeup_listener();
 457
 458     /* for ungraceful termination, let the workers exit now;
 459      * for graceful termination, the listener thread will notify the
 460      * workers to exit once it has stopped accepting new connections
 461      */
 462     if (mode == ST_UNGRACEFUL) {
 463         workers_may_exit = 1;
 464         ap_queue_interrupt_all(worker_queue);
 465         close_worker_sockets(); /* forcefully kill all current connections */
 466     }
 467 }
 468
 469 static int event_query(int query_code, int *result, apr_status_t *rv)
 470 {
 471     *rv = APR_SUCCESS;
 472     switch (query_code) {
 473     case AP_MPMQ_MAX_DAEMON_USED:
 474         *result = retained->max_daemons_limit;
 475         break;
 476     case AP_MPMQ_IS_THREADED:
 477         *result = AP_MPMQ_STATIC;
 478         break;
 479     case AP_MPMQ_IS_FORKED:
 480         *result = AP_MPMQ_DYNAMIC;
 481         break;
 482     case AP_MPMQ_IS_ASYNC:
 483         *result = 1;
 484         break;
 485     case AP_MPMQ_HAS_SERF:
 486         *result = 1;
 487         break;
 488     case AP_MPMQ_HARD_LIMIT_DAEMONS:
 489         *result = server_limit;
 490         break;
 491     case AP_MPMQ_HARD_LIMIT_THREADS:
 492         *result = thread_limit;
 493         break;
 494     case AP_MPMQ_MAX_THREADS:
 495         *result = threads_per_child;
 496         break;
 497     case AP_MPMQ_MIN_SPARE_DAEMONS:
 498         *result = 0;
 499         break;
 500     case AP_MPMQ_MIN_SPARE_THREADS:
 501         *result = min_spare_threads;
 502         break;
 503     case AP_MPMQ_MAX_SPARE_DAEMONS:
 504         *result = 0;
 505         break;
 506     case AP_MPMQ_MAX_SPARE_THREADS:
 507         *result = max_spare_threads;
 508         break;
 509     case AP_MPMQ_MAX_REQUESTS_DAEMON:
 510         *result = ap_max_requests_per_child;
 511         break;
 512     case AP_MPMQ_MAX_DAEMONS:
 513         *result = ap_daemons_limit;
 514         break;
 515     case AP_MPMQ_MPM_STATE:
 516         *result = mpm_state;
 517         break;
 518     case AP_MPMQ_GENERATION:
 519         *result = retained->my_generation;
 520         break;
 521     default:
 522         *rv = APR_ENOTIMPL;
 523         break;
 524     }
 525     return OK;
 526 }
 527
 528 static void event_note_child_killed(int childnum, pid_t pid, ap_generation_t gen)
 529 {
 530     if (childnum != -1) { /* child had a scoreboard slot? */
 531         ap_run_child_status(ap_server_conf,
 532                             ap_scoreboard_image->parent[childnum].pid,
 533                             ap_scoreboard_image->parent[childnum].generation,
 534                             childnum, MPM_CHILD_EXITED);
 535         ap_scoreboard_image->parent[childnum].pid = 0;
 536     }
 537     else {
 538         ap_run_child_status(ap_server_conf, pid, gen, -1, MPM_CHILD_EXITED);
 539     }
 540 }
 541
 542 static void event_note_child_started(int slot, pid_t pid)
 543 {
 544     ap_scoreboard_image->parent[slot].pid = pid;
 545     ap_run_child_status(ap_server_conf,
 546                         ap_scoreboard_image->parent[slot].pid,
 547                         retained->my_generation, slot, MPM_CHILD_STARTED);
 548 }
 549
 550 static void event_note_child_lost_slot(int slot, pid_t newpid)
 551 {
 552     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
 553                  "pid %" APR_PID_T_FMT " taking over scoreboard slot from "
 554                  "%" APR_PID_T_FMT "%s",
 555                  newpid,
 556                  ap_scoreboard_image->parent[slot].pid,
 557                  ap_scoreboard_image->parent[slot].quiescing ?
 558                  " (quiescing)" : "");
 559     ap_run_child_status(ap_server_conf,
 560                         ap_scoreboard_image->parent[slot].pid,
 561                         ap_scoreboard_image->parent[slot].generation,
 562                         slot, MPM_CHILD_LOST_SLOT);
 563     /* Don't forget about this exiting child process, or we
 564      * won't be able to kill it if it doesn't exit by the
 565      * time the server is shut down.
 566      */
 567     ap_register_extra_mpm_process(ap_scoreboard_image->parent[slot].pid,
 568                                   ap_scoreboard_image->parent[slot].generation);
 569 }
 570
 571 static const char *event_get_name(void)
 572 {
 573     return "event";
 574 }
 575
 576 /* a clean exit from a child with proper cleanup */
 577 static void clean_child_exit(int code) __attribute__ ((noreturn));
 578 static void clean_child_exit(int code)
 579 {
 580     mpm_state = AP_MPMQ_STOPPING;
 581     if (pchild) {
 582         apr_pool_destroy(pchild);
 583     }
 584
 585     if (one_process) {
 586         event_note_child_killed(/* slot */ 0, 0, 0);
 587     }
 588
 589     exit(code);
 590 }
 591
 592 static void just_die(int sig)
 593 {
 594     clean_child_exit(0);
 595 }
 596
 597 /*****************************************************************
 598  * Connection structures and accounting...
 599  */
 600
 601 static int child_fatal;
 602
 603 /* volatile because they're updated from a signal handler */
 604 static int volatile shutdown_pending;
 605 static int volatile restart_pending;
 606
 607 static apr_status_t decrement_connection_count(void *dummy) {
 608     apr_atomic_dec32(&connection_count);
 609     return APR_SUCCESS;
 610 }
 611
 612 /*
 613  * ap_start_shutdown() and ap_start_restart(), below, are a first stab at
 614  * functions to initiate shutdown or restart without relying on signals.
 615  * Previously this was initiated in sig_term() and restart() signal handlers,
 616  * but we want to be able to start a shutdown/restart from other sources --
 617  * e.g. on Win32, from the service manager. Now the service manager can
 618  * call ap_start_shutdown() or ap_start_restart() as appropiate.  Note that
 619  * these functions can also be called by the child processes, since global
 620  * variables are no longer used to pass on the required action to the parent.
 621  *
 622  * These should only be called from the parent process itself, since the
 623  * parent process will use the shutdown_pending and restart_pending variables
 624  * to determine whether to shutdown or restart. The child process should
 625  * call signal_parent() directly to tell the parent to die -- this will
 626  * cause neither of those variable to be set, which the parent will
 627  * assume means something serious is wrong (which it will be, for the
 628  * child to force an exit) and so do an exit anyway.
 629  */
 630
 631 static void ap_start_shutdown(int graceful)
 632 {
 633     mpm_state = AP_MPMQ_STOPPING;
 634     if (shutdown_pending == 1) {
 635         /* Um, is this _probably_ not an error, if the user has
 636          * tried to do a shutdown twice quickly, so we won't
 637          * worry about reporting it.
 638          */
 639         return;
 640     }
 641     shutdown_pending = 1;
 642     retained->is_graceful = graceful;
 643 }
 644
 645 /* do a graceful restart if graceful == 1 */
 646 static void ap_start_restart(int graceful)
 647 {
 648     mpm_state = AP_MPMQ_STOPPING;
 649     if (restart_pending == 1) {
 650         /* Probably not an error - don't bother reporting it */
 651         return;
 652     }
 653     restart_pending = 1;
 654     retained->is_graceful = graceful;
 655 }
 656
 657 static void sig_term(int sig)
 658 {
 659     ap_start_shutdown(sig == AP_SIG_GRACEFUL_STOP);
 660 }
 661
 662 static void restart(int sig)
 663 {
 664     ap_start_restart(sig == AP_SIG_GRACEFUL);
 665 }
 666
 667 static void set_signals(void)
 668 {
 669 #ifndef NO_USE_SIGACTION
 670     struct sigaction sa;
 671 #endif
 672
 673     if (!one_process) {
 674         ap_fatal_signal_setup(ap_server_conf, pconf);
 675     }
 676
 677 #ifndef NO_USE_SIGACTION
 678     sigemptyset(&sa.sa_mask);
 679     sa.sa_flags = 0;
 680
 681     sa.sa_handler = sig_term;
 682     if (sigaction(SIGTERM, &sa, NULL) < 0)
 683         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 684                      "sigaction(SIGTERM)");
 685 #ifdef AP_SIG_GRACEFUL_STOP
 686     if (sigaction(AP_SIG_GRACEFUL_STOP, &sa, NULL) < 0)
 687         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 688                      "sigaction(" AP_SIG_GRACEFUL_STOP_STRING ")");
 689 #endif
 690 #ifdef SIGINT
 691     if (sigaction(SIGINT, &sa, NULL) < 0)
 692         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 693                      "sigaction(SIGINT)");
 694 #endif
 695 #ifdef SIGXCPU
 696     sa.sa_handler = SIG_DFL;
 697     if (sigaction(SIGXCPU, &sa, NULL) < 0)
 698         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 699                      "sigaction(SIGXCPU)");
 700 #endif
 701 #ifdef SIGXFSZ
 702     /* For systems following the LFS standard, ignoring SIGXFSZ allows
 703      * a write() beyond the 2GB limit to fail gracefully with E2BIG
 704      * rather than terminate the process. */
 705     sa.sa_handler = SIG_IGN;
 706     if (sigaction(SIGXFSZ, &sa, NULL) < 0)
 707         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 708                      "sigaction(SIGXFSZ)");
 709 #endif
 710 #ifdef SIGPIPE
 711     sa.sa_handler = SIG_IGN;
 712     if (sigaction(SIGPIPE, &sa, NULL) < 0)
 713         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 714                      "sigaction(SIGPIPE)");
 715 #endif
 716
 717     /* we want to ignore HUPs and AP_SIG_GRACEFUL while we're busy
 718      * processing one */
 719     sigaddset(&sa.sa_mask, SIGHUP);
 720     sigaddset(&sa.sa_mask, AP_SIG_GRACEFUL);
 721     sa.sa_handler = restart;
 722     if (sigaction(SIGHUP, &sa, NULL) < 0)
 723         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 724                      "sigaction(SIGHUP)");
 725     if (sigaction(AP_SIG_GRACEFUL, &sa, NULL) < 0)
 726         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 727                      "sigaction(" AP_SIG_GRACEFUL_STRING ")");
 728 #else
 729     if (!one_process) {
 730 #ifdef SIGXCPU
 731         apr_signal(SIGXCPU, SIG_DFL);
 732 #endif /* SIGXCPU */
 733 #ifdef SIGXFSZ
 734         apr_signal(SIGXFSZ, SIG_IGN);
 735 #endif /* SIGXFSZ */
 736     }
 737
 738     apr_signal(SIGTERM, sig_term);
 739 #ifdef SIGHUP
 740     apr_signal(SIGHUP, restart);
 741 #endif /* SIGHUP */
 742 #ifdef AP_SIG_GRACEFUL
 743     apr_signal(AP_SIG_GRACEFUL, restart);
 744 #endif /* AP_SIG_GRACEFUL */
 745 #ifdef AP_SIG_GRACEFUL_STOP
 746      apr_signal(AP_SIG_GRACEFUL_STOP, sig_term);
 747 #endif /* AP_SIG_GRACEFUL_STOP */
 748 #ifdef SIGPIPE
 749     apr_signal(SIGPIPE, SIG_IGN);
 750 #endif /* SIGPIPE */
 751
 752 #endif
 753 }
 754
 755 static void process_pollop(pollset_op_t *op)
 756 {
 757     apr_status_t rv;
 758     conn_state_t *cs = op->cs;
 759
 760     switch (op->timeout_type) {
 761     case TIMEOUT_WRITE_COMPLETION:
 762         TO_QUEUE_APPEND(write_completion_q, cs);
 763         break;
 764     case TIMEOUT_KEEPALIVE:
 765         TO_QUEUE_APPEND(keepalive_q, cs);
 766         break;
 767     case TIMEOUT_LINGER:
 768         TO_QUEUE_APPEND(linger_q, cs);
 769         break;
 770     case TIMEOUT_SHORT_LINGER:
 771         TO_QUEUE_APPEND(short_linger_q, cs);
 772         break;
 773     }
 774
 775     rv = apr_pollset_add(event_pollset, &op->cs->pfd);
 776
 777     if (rv != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rv)) {
 778         ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf,
 779                      "%s: apr_pollset_add failure", op->tag);
 780     }
 781 }
 782
 783 /*
 784  * close our side of the connection
 785  * Pre-condition: cs is not in any timeout queue and not in the pollset,
 786  *                timeout_mutex is not locked
 787  * return: 0 if connection is fully closed,
 788  *         1 if connection is lingering
 789  * may be called by listener or by worker thread.
 790  * the eq may be null if called from the listener thread,
 791  * and the pollset operations are done directly by this function.
 792  */
 793 static int start_lingering_close(conn_state_t *cs, ap_equeue_t *eq)
 794 {
 795     apr_status_t rv;
 796
 797     cs->c->sbh = NULL;  /* prevent scoreboard updates from the listener
 798                          * worker will loop around soon and set SERVER_READY
 799                          */
 800
 801     if (ap_start_lingering_close(cs->c)) {
 802         apr_pool_clear(cs->p);
 803         ap_push_pool(worker_queue_info, cs->p);
 804         return 0;
 805     }
 806     else {
 807         apr_socket_t *csd = ap_get_conn_socket(cs->c);
 808         pollset_op_t localv;
 809         pollset_op_t *v;
 810
 811         if (eq) {
 812             v = ap_equeue_writer_value(eq);
 813         }
 814         else {
 815             v = &localv;
 816         }
 817
 818         rv = apr_socket_timeout_set(csd, 0);
 819         AP_DEBUG_ASSERT(rv == APR_SUCCESS);
 820         /*
 821          * If some module requested a shortened waiting period, only wait for
 822          * 2s (SECONDS_TO_LINGER). This is useful for mitigating certain
 823          * DoS attacks.
 824          */
 825         if (apr_table_get(cs->c->notes, "short-lingering-close")) {
 826             cs->expiration_time =
 827                 apr_time_now() + apr_time_from_sec(SECONDS_TO_LINGER);
 828             v->timeout_type = TIMEOUT_SHORT_LINGER;
 829             v->tag = "start_lingering_close(short)";
 830             cs->state = CONN_STATE_LINGER_SHORT;
 831         }
 832         else {
 833             cs->expiration_time =
 834                 apr_time_now() + apr_time_from_sec(MAX_SECS_TO_LINGER);
 835             v->timeout_type = TIMEOUT_LINGER;
 836             v->tag = "start_lingering_close(normal)";
 837             cs->state = CONN_STATE_LINGER_NORMAL;
 838         }
 839
 840         cs->pfd.reqevents = APR_POLLIN | APR_POLLHUP | APR_POLLERR;
 841         v->cs = cs;
 842         if (eq != NULL) {
 843             ap_equeue_writer_onward(eq);
 844             apr_pollset_wakeup(event_pollset);
 845         }
 846         else {
 847             process_pollop(v);
 848         }
 849     }
 850     return 1;
 851 }
 852
 853 /*
 854  * forcibly close a lingering connection after the lingering period has
 855  * expired
 856  * Pre-condition: cs is not in any timeout queue and not in the pollset
 857  * return: irrelevant (need same prototype as start_lingering_close)
 858  */
 859 static int stop_lingering_close(conn_state_t *cs, ap_equeue_t *eq)
 860 {
 861     apr_status_t rv;
 862     apr_socket_t *csd = ap_get_conn_socket(cs->c);
 863     ap_log_error(APLOG_MARK, APLOG_TRACE4, 0, ap_server_conf,
 864                  "socket reached timeout in lingering-close state");
 865     rv = apr_socket_close(csd);
 866     if (rv != APR_SUCCESS) {
 867         ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, "error closing socket");
 868         AP_DEBUG_ASSERT(0);
 869     }
 870     apr_pool_clear(cs->p);
 871     ap_push_pool(worker_queue_info, cs->p);
 872     return 0;
 873 }
 874
 875 /*
 876  * process one connection in the worker
 877  * return: 1 if the connection has been completed,
 878  *         0 if it is still open and waiting for some event
 879  */
 880 static int process_socket(apr_thread_t *thd, apr_pool_t * p, apr_socket_t * sock,
 881                           conn_state_t * cs,
 882                           ap_equeue_t *eq,
 883                           int my_child_num,
 884                           int my_thread_num)
 885 {
 886     conn_rec *c;
 887     long conn_id = ID_FROM_CHILD_THREAD(my_child_num, my_thread_num);
 888     int rc;
 889     ap_sb_handle_t *sbh;
 890
 891     ap_create_sb_handle(&sbh, p, my_child_num, my_thread_num);
 892
 893     if (cs == NULL) {           /* This is a new connection */
 894         listener_poll_type *pt = apr_pcalloc(p, sizeof(*pt));
 895         cs = apr_pcalloc(p, sizeof(conn_state_t));
 896         cs->bucket_alloc = apr_bucket_alloc_create(p);
 897         c = ap_run_create_connection(p, ap_server_conf, sock,
 898                                      conn_id, sbh, cs->bucket_alloc);
 899         if (!c) {
 900             apr_bucket_alloc_destroy(cs->bucket_alloc);
 901             apr_pool_clear(p);
 902             ap_push_pool(worker_queue_info, p);
 903             return 1;
 904         }
 905         apr_atomic_inc32(&connection_count);
 906         apr_pool_cleanup_register(c->pool, NULL, decrement_connection_count, apr_pool_cleanup_null);
 907         c->current_thread = thd;
 908         cs->c = c;
 909         c->cs = cs;
 910         cs->p = p;
 911         cs->pfd.desc_type = APR_POLL_SOCKET;
 912         cs->pfd.reqevents = APR_POLLIN;
 913         cs->pfd.desc.s = sock;
 914         pt->type = PT_CSD;
 915         pt->baton = cs;
 916         cs->pfd.client_data = pt;
 917
 918         ap_update_vhost_given_ip(c);
 919
 920         rc = ap_run_pre_connection(c, sock);
 921         if (rc != OK && rc != DONE) {
 922             ap_log_cerror(APLOG_MARK, APLOG_DEBUG, 0, c,
 923                           "process_socket: connection aborted");
 924             c->aborted = 1;
 925         }
 926
 927         /**
 928          * XXX If the platform does not have a usable way of bundling
 929          * accept() with a socket readability check, like Win32,
 930          * and there are measurable delays before the
 931          * socket is readable due to the first data packet arriving,
 932          * it might be better to create the cs on the listener thread
 933          * with the state set to CONN_STATE_CHECK_REQUEST_LINE_READABLE
 934          *
 935          * FreeBSD users will want to enable the HTTP accept filter
 936          * module in their kernel for the highest performance
 937          * When the accept filter is active, sockets are kept in the
 938          * kernel until a HTTP request is received.
 939          */
 940         cs->state = CONN_STATE_READ_REQUEST_LINE;
 941
 942     }
 943     else {
 944         c = cs->c;
 945         c->sbh = sbh;
 946         c->current_thread = thd;
 947     }
 948
 949     if (c->clogging_input_filters && !c->aborted) {
 950         /* Since we have an input filter which 'cloggs' the input stream,
 951          * like mod_ssl, lets just do the normal read from input filters,
 952          * like the Worker MPM does.
 953          */
 954         ap_run_process_connection(c);
 955         if (cs->state != CONN_STATE_SUSPENDED) {
 956             cs->state = CONN_STATE_LINGER;
 957         }
 958     }
 959
 960 read_request:
 961     if (cs->state == CONN_STATE_READ_REQUEST_LINE) {
 962         if (!c->aborted) {
 963             ap_run_process_connection(c);
 964
 965             /* state will be updated upon return
 966              * fall thru to either wait for readability/timeout or
 967              * do lingering close
 968              */
 969         }
 970         else {
 971             cs->state = CONN_STATE_LINGER;
 972         }
 973     }
 974
 975     if (cs->state == CONN_STATE_WRITE_COMPLETION) {
 976         ap_filter_t *output_filter = c->output_filters;
 977         apr_status_t rv;
 978         ap_update_child_status_from_conn(sbh, SERVER_BUSY_WRITE, c);
 979         while (output_filter->next != NULL) {
 980             output_filter = output_filter->next;
 981         }
 982         rv = output_filter->frec->filter_func.out_func(output_filter, NULL);
 983         if (rv != APR_SUCCESS) {
 984             ap_log_cerror(APLOG_MARK, APLOG_DEBUG, rv, c,
 985                           "network write failure in core output filter");
 986             cs->state = CONN_STATE_LINGER;
 987         }
 988         else if (c->data_in_output_filters) {
 989             /* Still in WRITE_COMPLETION_STATE:
 990              * Set a write timeout for this connection, and let the
 991              * event thread poll for writeability.
 992              */
 993             pollset_op_t *v = ap_equeue_writer_value(eq);
 994
 995             cs->expiration_time = ap_server_conf->timeout + apr_time_now();
 996             cs->pfd.reqevents = APR_POLLOUT | APR_POLLHUP | APR_POLLERR;
 997
 998             v->cs = cs;
 999             v->timeout_type = TIMEOUT_WRITE_COMPLETION;
1000             v->tag = "process_socket(write_completion)";
1001
1002             ap_equeue_writer_onward(eq);
1003             apr_pollset_wakeup(event_pollset);
1004             return 1;
1005         }
1006         else if (c->keepalive != AP_CONN_KEEPALIVE || c->aborted ||
1007             listener_may_exit) {
1008             c->cs->state = CONN_STATE_LINGER;
1009         }
1010         else if (c->data_in_input_filters) {
1011             cs->state = CONN_STATE_READ_REQUEST_LINE;
1012             goto read_request;
1013         }
1014         else {
1015             cs->state = CONN_STATE_CHECK_REQUEST_LINE_READABLE;
1016         }
1017     }
1018
1019     if (cs->state == CONN_STATE_LINGER) {
1020         if (!start_lingering_close(cs, eq)) {
1021             return 0;
1022         }
1023     }
1024     else if (cs->state == CONN_STATE_CHECK_REQUEST_LINE_READABLE) {
1025         pollset_op_t *v;
1026
1027         /* It greatly simplifies the logic to use a single timeout value here
1028          * because the new element can just be added to the end of the list and
1029          * it will stay sorted in expiration time sequence.  If brand new
1030          * sockets are sent to the event thread for a readability check, this
1031          * will be a slight behavior change - they use the non-keepalive
1032          * timeout today.  With a normal client, the socket will be readable in
1033          * a few milliseconds anyway.
1034          */
1035         cs->expiration_time = ap_server_conf->keep_alive_timeout +
1036                               apr_time_now();
1037
1038         /* Add work to pollset. */
1039         v = ap_equeue_writer_value(eq);
1040         v->timeout_type = TIMEOUT_KEEPALIVE;
1041         v->cs = cs;
1042         cs->pfd.reqevents = APR_POLLIN;
1043         v->tag = "process_socket(keepalive)";
1044         ap_equeue_writer_onward(eq);
1045         apr_pollset_wakeup(event_pollset);
1046     }
1047     return 1;
1048 }
1049
1050 /* requests_this_child has gone to zero or below.  See if the admin coded
1051    "MaxConnectionsPerChild 0", and keep going in that case.  Doing it this way
1052    simplifies the hot path in worker_thread */
1053 static void check_infinite_requests(void)
1054 {
1055     if (ap_max_requests_per_child) {
1056         signal_threads(ST_GRACEFUL);
1057     }
1058     else {
1059         requests_this_child = INT_MAX;  /* keep going */
1060     }
1061 }
1062
1063 static void close_listeners(int process_slot, int *closed) {
1064     if (!*closed) {
1065         int i;
1066         disable_listensocks(process_slot);
1067         ap_close_listeners();
1068         *closed = 1;
1069         dying = 1;
1070         ap_scoreboard_image->parent[process_slot].quiescing = 1;
1071         for (i = 0; i < threads_per_child; ++i) {
1072             ap_update_child_status_from_indexes(process_slot, i,
1073                                                 SERVER_GRACEFUL, NULL);
1074         }
1075         /* wake up the main thread */
1076         kill(ap_my_pid, SIGTERM);
1077     }
1078 }
1079
1080 static void unblock_signal(int sig)
1081 {
1082     sigset_t sig_mask;
1083
1084     sigemptyset(&sig_mask);
1085     sigaddset(&sig_mask, sig);
1086 #if defined(SIGPROCMASK_SETS_THREAD_MASK)
1087     sigprocmask(SIG_UNBLOCK, &sig_mask, NULL);
1088 #else
1089     pthread_sigmask(SIG_UNBLOCK, &sig_mask, NULL);
1090 #endif
1091 }
1092
1093 static void dummy_signal_handler(int sig)
1094 {
1095     /* XXX If specifying SIG_IGN is guaranteed to unblock a syscall,
1096      *     then we don't need this goofy function.
1097      */
1098 }
1099
1100
1101 #if HAVE_SERF
1102 static apr_status_t s_socket_add(void *user_baton,
1103                                  apr_pollfd_t *pfd,
1104                                  void *serf_baton)
1105 {
1106     s_baton_t *s = (s_baton_t*)user_baton;
1107     /* XXXXX: recycle listener_poll_types */
1108     listener_poll_type *pt = ap_malloc(sizeof(*pt));
1109     pt->type = PT_SERF;
1110     pt->baton = serf_baton;
1111     pfd->client_data = pt;
1112     return apr_pollset_add(s->pollset, pfd);
1113 }
1114
1115 static apr_status_t s_socket_remove(void *user_baton,
1116                                     apr_pollfd_t *pfd,
1117                                     void *serf_baton)
1118 {
1119     s_baton_t *s = (s_baton_t*)user_baton;
1120     listener_poll_type *pt = pfd->client_data;
1121     free(pt);
1122     return apr_pollset_remove(s->pollset, pfd);
1123 }
1124 #endif
1125
1126 static apr_status_t init_pollset(apr_pool_t *p)
1127 {
1128 #if HAVE_SERF
1129     s_baton_t *baton = NULL;
1130 #endif
1131     ap_listen_rec *lr;
1132     listener_poll_type *pt;
1133     int i = 0;
1134
1135     TO_QUEUE_INIT(write_completion_q);
1136     TO_QUEUE_INIT(keepalive_q);
1137     TO_QUEUE_INIT(linger_q);
1138     TO_QUEUE_INIT(short_linger_q);
1139
1140     listener_pollfd = apr_palloc(p, sizeof(apr_pollfd_t) * num_listensocks);
1141     for (lr = ap_listeners; lr != NULL; lr = lr->next, i++) {
1142         apr_pollfd_t *pfd;
1143         AP_DEBUG_ASSERT(i < num_listensocks);
1144         pfd = &listener_pollfd[i];
1145         pt = apr_pcalloc(p, sizeof(*pt));
1146         pfd->desc_type = APR_POLL_SOCKET;
1147         pfd->desc.s = lr->sd;
1148         pfd->reqevents = APR_POLLIN;
1149
1150         pt->type = PT_ACCEPT;
1151         pt->baton = lr;
1152
1153         pfd->client_data = pt;
1154
1155         apr_socket_opt_set(pfd->desc.s, APR_SO_NONBLOCK, 1);
1156         apr_pollset_add(event_pollset, pfd);
1157
1158         lr->accept_func = ap_unixd_accept;
1159     }
1160
1161 #if HAVE_SERF
1162     baton = apr_pcalloc(p, sizeof(*baton));
1163     baton->pollset = event_pollset;
1164     /* TODO: subpools, threads, reuse, etc.  -- currently use malloc() inside :( */
1165     baton->pool = p;
1166
1167     g_serf = serf_context_create_ex(baton,
1168                                     s_socket_add,
1169                                     s_socket_remove, p);
1170
1171     ap_register_provider(p, "mpm_serf",
1172                          "instance", "0", g_serf);
1173
1174 #endif
1175
1176     return APR_SUCCESS;
1177 }
1178
1179 static apr_status_t push_timer2worker(timer_event_t* te)
1180 {
1181     return ap_queue_push_timer(worker_queue, te);
1182 }
1183
1184 /*
1185  * Pre-condition: pfd->cs is neither in pollset nor timeout queue
1186  * this function may only be called by the listener
1187  */
1188 static apr_status_t push2worker(const apr_pollfd_t * pfd,
1189                                 apr_pollset_t * pollset)
1190 {
1191     listener_poll_type *pt = (listener_poll_type *) pfd->client_data;
1192     conn_state_t *cs = (conn_state_t *) pt->baton;
1193     apr_status_t rc;
1194
1195     rc = ap_queue_push(worker_queue, cs->pfd.desc.s, cs, cs->p);
1196     if (rc != APR_SUCCESS) {
1197         /* trash the connection; we couldn't queue the connected
1198          * socket to a worker
1199          */
1200         apr_bucket_alloc_destroy(cs->bucket_alloc);
1201         apr_socket_close(cs->pfd.desc.s);
1202         ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1203                      ap_server_conf, "push2worker: ap_queue_push failed");
1204         apr_pool_clear(cs->p);
1205         ap_push_pool(worker_queue_info, cs->p);
1206     }
1207
1208     return rc;
1209 }
1210
1211 /* get_worker:
1212  *     If *have_idle_worker_p == 0, reserve a worker thread, and set
1213  *     *have_idle_worker_p = 1.
1214  *     If *have_idle_worker_p is already 1, will do nothing.
1215  *     If blocking == 1, block if all workers are currently busy.
1216  *     If no worker was available immediately, will set *all_busy to 1.
1217  *     XXX: If there are no workers, we should not block immediately but
1218  *     XXX: close all keep-alive connections first.
1219  */
1220 static void get_worker(int *have_idle_worker_p, int blocking, int *all_busy)
1221 {
1222     apr_status_t rc;
1223
1224     if (*have_idle_worker_p) {
1225         /* already reserved a worker thread - must have hit a
1226          * transient error on a previous pass
1227          */
1228         return;
1229     }
1230
1231     if (blocking)
1232         rc = ap_queue_info_wait_for_idler(worker_queue_info, all_busy);
1233     else
1234         rc = ap_queue_info_try_get_idler(worker_queue_info);
1235
1236     if (rc == APR_SUCCESS) {
1237         *have_idle_worker_p = 1;
1238     }
1239     else if (!blocking && rc == APR_EAGAIN) {
1240         *all_busy = 1;
1241     }
1242     else if (!APR_STATUS_IS_EOF(rc)) {
1243         ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
1244                      "ap_queue_info_wait_for_idler failed.  "
1245                      "Attempting to shutdown process gracefully");
1246         signal_threads(ST_GRACEFUL);
1247     }
1248 }
1249
1250 /* XXXXXX: Convert to skiplist or other better data structure
1251  * (yes, this is VERY VERY VERY VERY BAD)
1252  */
1253
1254 /* Structures to reuse */
1255 static APR_RING_HEAD(timer_free_ring_t, timer_event_t) timer_free_ring;
1256 /* Active timers */
1257 static APR_RING_HEAD(timer_ring_t, timer_event_t) timer_ring;
1258
1259 static apr_thread_mutex_t *g_timer_ring_mtx;
1260
1261 static apr_status_t event_register_timed_callback(apr_time_t t,
1262                                                   ap_mpm_callback_fn_t *cbfn,
1263                                                   void *baton)
1264 {
1265     int inserted = 0;
1266     timer_event_t *ep;
1267     timer_event_t *te;
1268     /* oh yeah, and make locking smarter/fine grained. */
1269     apr_thread_mutex_lock(g_timer_ring_mtx);
1270
1271     if (!APR_RING_EMPTY(&timer_free_ring, timer_event_t, link)) {
1272         te = APR_RING_FIRST(&timer_free_ring);
1273         APR_RING_REMOVE(te, link);
1274     }
1275     else {
1276         /* XXXXX: lol, pool allocation without a context from any thread.Yeah. Right. MPMs Suck. */
1277         te = ap_malloc(sizeof(timer_event_t));
1278         APR_RING_ELEM_INIT(te, link);
1279     }
1280
1281     te->cbfunc = cbfn;
1282     te->baton = baton;
1283     /* XXXXX: optimize */
1284     te->when = t + apr_time_now();
1285
1286     /* Okay, insert sorted by when.. */
1287     for (ep = APR_RING_FIRST(&timer_ring);
1288          ep != APR_RING_SENTINEL(&timer_ring,
1289                                  timer_event_t, link);
1290          ep = APR_RING_NEXT(ep, link))
1291     {
1292         if (ep->when > te->when) {
1293             inserted = 1;
1294             APR_RING_INSERT_BEFORE(ep, te, link);
1295             break;
1296         }
1297     }
1298
1299     if (!inserted) {
1300         APR_RING_INSERT_TAIL(&timer_ring, te, timer_event_t, link);
1301     }
1302
1303     apr_thread_mutex_unlock(g_timer_ring_mtx);
1304
1305     return APR_SUCCESS;
1306 }
1307
1308 /*
1309  * Close socket and clean up if remote closed its end while we were in
1310  * lingering close.
1311  * Only to be called in the listener thread;
1312  * Pre-condition: cs is in one of the linger queues and in the pollset
1313  */
1314 static void process_lingering_close(conn_state_t *cs, const apr_pollfd_t *pfd)
1315 {
1316     apr_socket_t *csd = ap_get_conn_socket(cs->c);
1317     char dummybuf[2048];
1318     apr_size_t nbytes;
1319     apr_status_t rv;
1320     struct timeout_queue *q;
1321     q = (cs->state == CONN_STATE_LINGER_SHORT) ?  &short_linger_q : &linger_q;
1322
1323     /* socket is already in non-blocking state */
1324     do {
1325         nbytes = sizeof(dummybuf);
1326         rv = apr_socket_recv(csd, dummybuf, &nbytes);
1327     } while (rv == APR_SUCCESS);
1328
1329     if (!APR_STATUS_IS_EOF(rv)) {
1330         return;
1331     }
1332
1333     rv = apr_pollset_remove(event_pollset, pfd);
1334     AP_DEBUG_ASSERT(rv == APR_SUCCESS);
1335
1336     rv = apr_socket_close(csd);
1337     AP_DEBUG_ASSERT(rv == APR_SUCCESS);
1338
1339     TO_QUEUE_REMOVE(*q, cs);
1340     TO_QUEUE_ELEM_INIT(cs);
1341
1342     apr_pool_clear(cs->p);
1343     ap_push_pool(worker_queue_info, cs->p);
1344 }
1345
1346 /* call 'func' for all elements of 'q' with timeout less than 'timeout_time'.
1347  * Pre-condition: timeout_mutex must already be locked
1348  * Post-condition: timeout_mutex will be locked again
1349  */
1350 static void process_timeout_queue(struct timeout_queue *q,
1351                                   apr_time_t timeout_time,
1352                                   int (*func)(conn_state_t *, ap_equeue_t *eq))
1353 {
1354     int count = 0;
1355     conn_state_t *first, *cs, *last;
1356     apr_status_t rv;
1357     if (!q->count) {
1358         return;
1359     }
1360     AP_DEBUG_ASSERT(!APR_RING_EMPTY(&q->head, conn_state_t, timeout_list));
1361
1362     cs = first = APR_RING_FIRST(&q->head);
1363     while (cs != APR_RING_SENTINEL(&q->head, conn_state_t, timeout_list)
1364            && cs->expiration_time < timeout_time) {
1365         last = cs;
1366         rv = apr_pollset_remove(event_pollset, &cs->pfd);
1367         if (rv != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rv)) {
1368             ap_log_cerror(APLOG_MARK, APLOG_ERR, rv, cs->c,
1369                           "apr_pollset_remove failed");
1370         }
1371         cs = APR_RING_NEXT(cs, timeout_list);
1372         count++;
1373     }
1374     if (!count)
1375         return;
1376
1377     APR_RING_UNSPLICE(first, last, timeout_list);
1378     AP_DEBUG_ASSERT(q->count >= count);
1379     q->count -= count;
1380     while (count) {
1381         cs = APR_RING_NEXT(first, timeout_list);
1382         TO_QUEUE_ELEM_INIT(first);
1383         func(first, NULL);
1384         first = cs;
1385         count--;
1386     }
1387 }
1388
1389 static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
1390 {
1391     timer_event_t *ep;
1392     timer_event_t *te;
1393     apr_status_t rc;
1394     proc_info *ti = dummy;
1395     int process_slot = ti->pid;
1396     apr_pool_t *tpool = apr_thread_pool_get(thd);
1397     void *csd = NULL;
1398     apr_pool_t *ptrans;         /* Pool for per-transaction stuff */
1399     ap_listen_rec *lr;
1400     int have_idle_worker = 0;
1401     conn_state_t *cs;
1402     const apr_pollfd_t *out_pfd;
1403     apr_int32_t num = 0;
1404     apr_interval_time_t timeout_interval;
1405     apr_time_t timeout_time = 0, now, last_log;
1406     listener_poll_type *pt;
1407     int closed = 0, listeners_disabled = 0;
1408
1409     last_log = apr_time_now();
1410     free(ti);
1411
1412     /* the following times out events that are really close in the future
1413      *   to prevent extra poll calls
1414      *
1415      * current value is .1 second
1416      */
1417 #define TIMEOUT_FUDGE_FACTOR 100000
1418 #define EVENT_FUDGE_FACTOR 10000
1419
1420     rc = init_pollset(tpool);
1421     if (rc != APR_SUCCESS) {
1422         ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
1423                      "failed to initialize pollset, "
1424                      "attempting to shutdown process gracefully");
1425         signal_threads(ST_GRACEFUL);
1426         return NULL;
1427     }
1428
1429     /* Unblock the signal used to wake this thread up, and set a handler for
1430      * it.
1431      */
1432     unblock_signal(LISTENER_SIGNAL);
1433     apr_signal(LISTENER_SIGNAL, dummy_signal_handler);
1434
1435     for (;;) {
1436         int workers_were_busy = 0;
1437         if (listener_may_exit) {
1438             close_listeners(process_slot, &closed);
1439             if (terminate_mode == ST_UNGRACEFUL
1440                 || apr_atomic_read32(&connection_count) == 0)
1441                 break;
1442         }
1443
1444         if (requests_this_child <= 0) {
1445             check_infinite_requests();
1446         }
1447
1448         now = apr_time_now();
1449         if (APLOGtrace6(ap_server_conf)) {
1450             /* trace log status every second */
1451             if (now - last_log > apr_time_from_msec(1000)) {
1452                 last_log = now;
1453                 ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf,
1454                              "connections: %d (write-completion: %d "
1455                              "keep-alive: %d lingering: %d)",
1456                              connection_count, write_completion_q.count,
1457                              keepalive_q.count,
1458                              linger_q.count + short_linger_q.count);
1459             }
1460         }
1461
1462         apr_thread_mutex_lock(g_timer_ring_mtx);
1463         if (!APR_RING_EMPTY(&timer_ring, timer_event_t, link)) {
1464             te = APR_RING_FIRST(&timer_ring);
1465             if (te->when > now) {
1466                 timeout_interval = te->when - now;
1467             }
1468             else {
1469                 timeout_interval = 1;
1470             }
1471         }
1472         else {
1473             timeout_interval = apr_time_from_msec(100);
1474         }
1475         apr_thread_mutex_unlock(g_timer_ring_mtx);
1476
1477 #if HAVE_SERF
1478         rc = serf_context_prerun(g_serf);
1479         if (rc != APR_SUCCESS) {
1480             /* TOOD: what should do here? ugh. */
1481         }
1482 #endif
1483         rc = apr_pollset_poll(event_pollset, timeout_interval, &num, &out_pfd);
1484         if (rc != APR_SUCCESS
1485             && !APR_STATUS_IS_EINTR(rc)
1486             && !APR_STATUS_IS_TIMEUP(rc)) {
1487             ap_log_error(APLOG_MARK, APLOG_CRIT, rc, ap_server_conf,
1488                          "apr_pollset_poll failed.  Attempting to "
1489                          "shutdown process gracefully");
1490             signal_threads(ST_GRACEFUL);
1491         }
1492
1493         if (listener_may_exit) {
1494             close_listeners(process_slot, &closed);
1495             if (terminate_mode == ST_UNGRACEFUL
1496                 || apr_atomic_read32(&connection_count) == 0)
1497                 break;
1498         }
1499
1500         now = apr_time_now();
1501         apr_thread_mutex_lock(g_timer_ring_mtx);
1502         for (ep = APR_RING_FIRST(&timer_ring);
1503              ep != APR_RING_SENTINEL(&timer_ring,
1504                                      timer_event_t, link);
1505              ep = APR_RING_FIRST(&timer_ring))
1506         {
1507             if (ep->when < now + EVENT_FUDGE_FACTOR) {
1508                 APR_RING_REMOVE(ep, link);
1509                 push_timer2worker(ep);
1510             }
1511             else {
1512                 break;
1513             }
1514         }
1515         apr_thread_mutex_unlock(g_timer_ring_mtx);
1516
1517         while (num) {
1518             pt = (listener_poll_type *) out_pfd->client_data;
1519             if (pt->type == PT_CSD) {
1520                 /* one of the sockets is readable */
1521                 struct timeout_queue *remove_from_q = &write_completion_q;
1522                 int blocking = 1;
1523                 cs = (conn_state_t *) pt->baton;
1524                 switch (cs->state) {
1525                 case CONN_STATE_CHECK_REQUEST_LINE_READABLE:
1526                     cs->state = CONN_STATE_READ_REQUEST_LINE;
1527                     remove_from_q = &keepalive_q;
1528                     /* don't wait for a worker for a keepalive request */
1529                     blocking = 0;
1530                     /* FALL THROUGH */
1531                 case CONN_STATE_WRITE_COMPLETION:
1532                     get_worker(&have_idle_worker, blocking,
1533                                &workers_were_busy);
1534                     TO_QUEUE_REMOVE(*remove_from_q, cs);
1535                     rc = apr_pollset_remove(event_pollset, &cs->pfd);
1536
1537                     /*
1538                      * Some of the pollset backends, like KQueue or Epoll
1539                      * automagically remove the FD if the socket is closed,
1540                      * therefore, we can accept _SUCCESS or _NOTFOUND,
1541                      * and we still want to keep going
1542                      */
1543                     if (rc != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rc)) {
1544                         ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
1545                                      "pollset remove failed");
1546                         start_lingering_close(cs, NULL);
1547                         break;
1548                     }
1549
1550                     TO_QUEUE_ELEM_INIT(cs);
1551                     /* If we didn't get a worker immediately for a keep-alive
1552                      * request, we close the connection, so that the client can
1553                      * re-connect to a different process.
1554                      */
1555                     if (!have_idle_worker) {
1556                         start_lingering_close(cs, NULL);
1557                         break;
1558                     }
1559                     rc = push2worker(out_pfd, event_pollset);
1560                     if (rc != APR_SUCCESS) {
1561                         ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1562                                      ap_server_conf, "push2worker failed");
1563                     }
1564                     else {
1565                         have_idle_worker = 0;
1566                     }
1567                     break;
1568                 case CONN_STATE_LINGER_NORMAL:
1569                 case CONN_STATE_LINGER_SHORT:
1570                     process_lingering_close(cs, out_pfd);
1571                     break;
1572                 default:
1573                     ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1574                                  ap_server_conf,
1575                                  "event_loop: unexpected state %d",
1576                                  cs->state);
1577                     ap_assert(0);
1578                 }
1579             }
1580             else if (pt->type == PT_ACCEPT) {
1581                 int skip_accept = 0;
1582                 int connection_count_local = connection_count;
1583
1584                 /* A Listener Socket is ready for an accept() */
1585                 if (workers_were_busy) {
1586                     skip_accept = 1;
1587                     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
1588                                  "All workers busy, not accepting new conns"
1589                                  "in this process");
1590                 }
1591                 else if (listeners_disabled) {
1592                     listeners_disabled = 0;
1593                     enable_listensocks(process_slot);
1594                 }
1595                 else if (connection_count_local > threads_per_child
1596                          + ap_queue_info_get_idlers(worker_queue_info) *
1597                            worker_factor / WORKER_FACTOR_SCALE)
1598                 {
1599                     skip_accept = 1;
1600                     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
1601                                  "Too many open connections (%u), "
1602                                  "not accepting new conns in this process",
1603                                  connection_count_local);
1604                     ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
1605                                  "Idle workers: %u",
1606                                  ap_queue_info_get_idlers(worker_queue_info));
1607                 }
1608
1609                 if (skip_accept == 0) {
1610                     lr = (ap_listen_rec *) pt->baton;
1611                     ap_pop_pool(&ptrans, worker_queue_info);
1612
1613                     if (ptrans == NULL) {
1614                         /* create a new transaction pool for each accepted socket */
1615                         apr_allocator_t *allocator;
1616
1617                         apr_allocator_create(&allocator);
1618                         apr_allocator_max_free_set(allocator,
1619                                                    ap_max_mem_free);
1620                         apr_pool_create_ex(&ptrans, pconf, NULL, allocator);
1621                         apr_allocator_owner_set(allocator, ptrans);
1622                         if (ptrans == NULL) {
1623                             ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1624                                          ap_server_conf,
1625                                          "Failed to create transaction pool");
1626                             signal_threads(ST_GRACEFUL);
1627                             return NULL;
1628                         }
1629                     }
1630                     apr_pool_tag(ptrans, "transaction");
1631
1632                     get_worker(&have_idle_worker, 1, &workers_were_busy);
1633                     rc = lr->accept_func(&csd, lr, ptrans);
1634
1635                     /* later we trash rv and rely on csd to indicate
1636                      * success/failure
1637                      */
1638                     AP_DEBUG_ASSERT(rc == APR_SUCCESS || !csd);
1639
1640                     if (rc == APR_EGENERAL) {
1641                         /* E[NM]FILE, ENOMEM, etc */
1642                         resource_shortage = 1;
1643                         signal_threads(ST_GRACEFUL);
1644                     }
1645
1646                     if (csd != NULL) {
1647                         rc = ap_queue_push(worker_queue, csd, NULL, ptrans);
1648                         if (rc != APR_SUCCESS) {
1649                             /* trash the connection; we couldn't queue the connected
1650                              * socket to a worker
1651                              */
1652                             apr_socket_close(csd);
1653                             ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1654                                          ap_server_conf,
1655                                          "ap_queue_push failed");
1656                             apr_pool_clear(ptrans);
1657                             ap_push_pool(worker_queue_info, ptrans);
1658                         }
1659                         else {
1660                             have_idle_worker = 0;
1661                         }
1662                     }
1663                     else {
1664                         apr_pool_clear(ptrans);
1665                         ap_push_pool(worker_queue_info, ptrans);
1666                     }
1667                 }
1668             }               /* if:else on pt->type */
1669 #if HAVE_SERF
1670             else if (pt->type == PT_SERF) {
1671                 /* send socket to serf. */
1672                 /* XXXX: this doesn't require get_worker() */
1673                 serf_event_trigger(g_serf, pt->baton, out_pfd);
1674             }
1675 #endif
1676             out_pfd++;
1677             num--;
1678         }                   /* while for processing poll */
1679
1680         {
1681             /* TODO: break out to separate function */
1682             int i;
1683
1684             for (i = 0; i < threads_per_child; i++) {
1685                 ap_equeue_t *eq = worker_equeues[i];
1686                 pollset_op_t *op = NULL;
1687
1688                 while ((op = ap_equeue_reader_next(eq)) != NULL) {
1689                     process_pollop(op);
1690                 }
1691             }
1692         }
1693
1694         /* XXX possible optimization: stash the current time for use as
1695          * r->request_time for new requests
1696          */
1697         now = apr_time_now();
1698         /* we only do this once per 0.1s (TIMEOUT_FUDGE_FACTOR) */
1699         if (now > timeout_time) {
1700             struct process_score *ps;
1701             timeout_time = now + TIMEOUT_FUDGE_FACTOR;
1702
1703             /* handle timed out sockets */
1704
1705             /* Step 1: keepalive timeouts */
1706             /* If all workers are busy, we kill older keep-alive connections so that they
1707              * may connect to another process.
1708              */
1709             if (workers_were_busy && keepalive_q.count) {
1710                 ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
1711                              "All workers are busy, will close %d keep-alive "
1712                              "connections",
1713                              keepalive_q.count);
1714                 process_timeout_queue(&keepalive_q,
1715                                       timeout_time + ap_server_conf->keep_alive_timeout,
1716                                       start_lingering_close);
1717             }
1718             else {
1719                 process_timeout_queue(&keepalive_q, timeout_time,
1720                                       start_lingering_close);
1721             }
1722             /* Step 2: write completion timeouts */
1723             process_timeout_queue(&write_completion_q, timeout_time, start_lingering_close);
1724             /* Step 3: (normal) lingering close completion timeouts */
1725             process_timeout_queue(&linger_q, timeout_time, stop_lingering_close);
1726             /* Step 4: (short) lingering close completion timeouts */
1727             process_timeout_queue(&short_linger_q, timeout_time, stop_lingering_close);
1728
1729             ps = ap_get_scoreboard_process(process_slot);
1730             ps->write_completion = write_completion_q.count;
1731             ps->lingering_close = linger_q.count + short_linger_q.count;
1732             ps->keep_alive = keepalive_q.count;
1733
1734             ps->connections = apr_atomic_read32(&connection_count);
1735             /* XXX: should count CONN_STATE_SUSPENDED and set ps->suspended */
1736         }
1737         if (listeners_disabled && !workers_were_busy &&
1738             (int)apr_atomic_read32(&connection_count) <
1739             ((int)ap_queue_info_get_idlers(worker_queue_info) - 1) *
1740             worker_factor / WORKER_FACTOR_SCALE + threads_per_child)
1741         {
1742             listeners_disabled = 0;
1743             enable_listensocks(process_slot);
1744         }
1745         /*
1746          * XXX: do we need to set some timeout that re-enables the listensocks
1747          * XXX: in case no other event occurs?
1748          */
1749     }     /* listener main loop */
1750
1751     close_listeners(process_slot, &closed);
1752     ap_queue_term(worker_queue);
1753
1754     apr_thread_exit(thd, APR_SUCCESS);
1755     return NULL;
1756 }
1757
1758 /* XXX For ungraceful termination/restart, we definitely don't want to
1759  *     wait for active connections to finish but we may want to wait
1760  *     for idle workers to get out of the queue code and release mutexes,
1761  *     since those mutexes are cleaned up pretty soon and some systems
1762  *     may not react favorably (i.e., segfault) if operations are attempted
1763  *     on cleaned-up mutexes.
1764  */
1765 static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy)
1766 {
1767     proc_info *ti = dummy;
1768     int process_slot = ti->pid;
1769     int thread_slot = ti->tid;
1770     apr_socket_t *csd = NULL;
1771     conn_state_t *cs;
1772     apr_pool_t *ptrans;         /* Pool for per-transaction stuff */
1773     apr_status_t rv;
1774     int is_idle = 0;
1775     timer_event_t *te = NULL;
1776     ap_equeue_t *eq = worker_equeues[thread_slot];
1777
1778     free(ti);
1779
1780     ap_scoreboard_image->servers[process_slot][thread_slot].pid = ap_my_pid;
1781     ap_scoreboard_image->servers[process_slot][thread_slot].tid = apr_os_thread_current();
1782     ap_scoreboard_image->servers[process_slot][thread_slot].generation = retained->my_generation;
1783     ap_update_child_status_from_indexes(process_slot, thread_slot,
1784                                         SERVER_STARTING, NULL);
1785
1786     while (!workers_may_exit) {
1787         if (!is_idle) {
1788             rv = ap_queue_info_set_idle(worker_queue_info, NULL);
1789             if (rv != APR_SUCCESS) {
1790                 ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf,
1791                              "ap_queue_info_set_idle failed. Attempting to "
1792                              "shutdown process gracefully.");
1793                 signal_threads(ST_GRACEFUL);
1794                 break;
1795             }
1796             is_idle = 1;
1797         }
1798
1799         ap_update_child_status_from_indexes(process_slot, thread_slot,
1800                                             dying ? SERVER_GRACEFUL : SERVER_READY, NULL);
1801       worker_pop:
1802         if (workers_may_exit) {
1803             break;
1804         }
1805
1806         te = NULL;
1807         rv = ap_queue_pop_something(worker_queue, &csd, &cs, &ptrans, &te);
1808
1809         if (rv != APR_SUCCESS) {
1810             /* We get APR_EOF during a graceful shutdown once all the
1811              * connections accepted by this server process have been handled.
1812              */
1813             if (APR_STATUS_IS_EOF(rv)) {
1814                 break;
1815             }
1816             /* We get APR_EINTR whenever ap_queue_pop() has been interrupted
1817              * from an explicit call to ap_queue_interrupt_all(). This allows
1818              * us to unblock threads stuck in ap_queue_pop() when a shutdown
1819              * is pending.
1820              *
1821              * If workers_may_exit is set and this is ungraceful termination/
1822              * restart, we are bound to get an error on some systems (e.g.,
1823              * AIX, which sanity-checks mutex operations) since the queue
1824              * may have already been cleaned up.  Don't log the "error" if
1825              * workers_may_exit is set.
1826              */
1827             else if (APR_STATUS_IS_EINTR(rv)) {
1828                 goto worker_pop;
1829             }
1830             /* We got some other error. */
1831             else if (!workers_may_exit) {
1832                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
1833                              "ap_queue_pop failed");
1834             }
1835             continue;
1836         }
1837         if (te != NULL) {
1838             te->cbfunc(te->baton);
1839
1840             {
1841                 apr_thread_mutex_lock(g_timer_ring_mtx);
1842                 APR_RING_INSERT_TAIL(&timer_free_ring, te, timer_event_t, link);
1843                 apr_thread_mutex_unlock(g_timer_ring_mtx);
1844             }
1845         }
1846         else {
1847             is_idle = 0;
1848             worker_sockets[thread_slot] = csd;
1849             rv = process_socket(thd, ptrans, csd, cs, eq, process_slot, thread_slot);
1850             if (!rv) {
1851                 requests_this_child--;
1852             }
1853             worker_sockets[thread_slot] = NULL;
1854         }
1855     }
1856
1857     ap_update_child_status_from_indexes(process_slot, thread_slot,
1858                                         dying ? SERVER_DEAD :
1859                                         SERVER_GRACEFUL,
1860                                         (request_rec *) NULL);
1861
1862     apr_thread_exit(thd, APR_SUCCESS);
1863     return NULL;
1864 }
1865
1866 static int check_signal(int signum)
1867 {
1868     switch (signum) {
1869     case SIGTERM:
1870     case SIGINT:
1871         return 1;
1872     }
1873     return 0;
1874 }
1875
1876
1877
1878 static void create_listener_thread(thread_starter * ts)
1879 {
1880     int my_child_num = ts->child_num_arg;
1881     apr_threadattr_t *thread_attr = ts->threadattr;
1882     proc_info *my_info;
1883     apr_status_t rv;
1884
1885     my_info = (proc_info *) ap_malloc(sizeof(proc_info));
1886     my_info->pid = my_child_num;
1887     my_info->tid = -1;          /* listener thread doesn't have a thread slot */
1888     my_info->sd = 0;
1889     rv = apr_thread_create(&ts->listener, thread_attr, listener_thread,
1890                            my_info, pchild);
1891     if (rv != APR_SUCCESS) {
1892         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
1893                      "apr_thread_create: unable to create listener thread");
1894         /* let the parent decide how bad this really is */
1895         clean_child_exit(APEXIT_CHILDSICK);
1896     }
1897     apr_os_thread_get(&listener_os_thread, ts->listener);
1898 }
1899
1900 /* XXX under some circumstances not understood, children can get stuck
1901  *     in start_threads forever trying to take over slots which will
1902  *     never be cleaned up; for now there is an APLOG_DEBUG message issued
1903  *     every so often when this condition occurs
1904  */
1905 static void *APR_THREAD_FUNC start_threads(apr_thread_t * thd, void *dummy)
1906 {
1907     thread_starter *ts = dummy;
1908     apr_thread_t **threads = ts->threads;
1909     apr_threadattr_t *thread_attr = ts->threadattr;
1910     int child_num_arg = ts->child_num_arg;
1911     int my_child_num = child_num_arg;
1912     proc_info *my_info;
1913     apr_status_t rv;
1914     int i;
1915     int threads_created = 0;
1916     int listener_started = 0;
1917     int loops;
1918     int prev_threads_created;
1919     int max_recycled_pools = -1;
1920
1921     /* We must create the fd queues before we start up the listener
1922      * and worker threads. */
1923     worker_queue = apr_pcalloc(pchild, sizeof(*worker_queue));
1924     rv = ap_queue_init(worker_queue, threads_per_child, pchild);
1925     if (rv != APR_SUCCESS) {
1926         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
1927                      "ap_queue_init() failed");
1928         clean_child_exit(APEXIT_CHILDFATAL);
1929     }
1930
1931     if (ap_max_mem_free != APR_ALLOCATOR_MAX_FREE_UNLIMITED) {
1932         /* If we want to conserve memory, let's not keep an unlimited number of
1933          * pools & allocators.
1934          * XXX: This should probably be a separate config directive
1935          */
1936         max_recycled_pools = threads_per_child * 3 / 4 ;
1937     }
1938     rv = ap_queue_info_create(&worker_queue_info, pchild,
1939                               threads_per_child, max_recycled_pools);
1940     if (rv != APR_SUCCESS) {
1941         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
1942                      "ap_queue_info_create() failed");
1943         clean_child_exit(APEXIT_CHILDFATAL);
1944     }
1945
1946     /* Create the main pollset */
1947     rv = apr_pollset_create(&event_pollset,
1948                             threads_per_child, /* XXX don't we need more, to handle
1949                                                 * connections in K-A or lingering
1950                                                 * close?
1951                                                 */
1952                             pchild, APR_POLLSET_WAKEABLE|APR_POLLSET_NOCOPY);
1953     if (rv != APR_SUCCESS) {
1954         ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf,
1955                      "apr_pollset_create failed; check system or user limits");
1956         clean_child_exit(APEXIT_CHILDFATAL);
1957     }
1958
1959     worker_sockets = apr_pcalloc(pchild, threads_per_child
1960                                  * sizeof(apr_socket_t *));
1961
1962     worker_equeues = apr_palloc(pchild, threads_per_child * sizeof(ap_equeue_t*));
1963
1964     for (i = 0; i < threads_per_child; i++) {
1965         ap_equeue_t* eq = NULL;
1966         /* TODO: research/test optimal size of queue here */
1967         ap_equeue_create(pchild, 16, sizeof(pollset_op_t), &eq);
1968         /* same as thread ID */
1969         worker_equeues[i] = eq;
1970     }
1971
1972     loops = prev_threads_created = 0;
1973     while (1) {
1974         /* threads_per_child does not include the listener thread */
1975         for (i = 0; i < threads_per_child; i++) {
1976             int status =
1977                 ap_scoreboard_image->servers[child_num_arg][i].status;
1978
1979             if (status != SERVER_GRACEFUL && status != SERVER_DEAD) {
1980                 continue;
1981             }
1982
1983             my_info = (proc_info *) ap_malloc(sizeof(proc_info));
1984             my_info->pid = my_child_num;
1985             my_info->tid = i;
1986             my_info->sd = 0;
1987
1988             /* We are creating threads right now */
1989             ap_update_child_status_from_indexes(my_child_num, i,
1990                                                 SERVER_STARTING, NULL);
1991             /* We let each thread update its own scoreboard entry.  This is
1992              * done because it lets us deal with tid better.
1993              */
1994             rv = apr_thread_create(&threads[i], thread_attr,
1995                                    worker_thread, my_info, pchild);
1996             if (rv != APR_SUCCESS) {
1997                 ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
1998                              "apr_thread_create: unable to create worker thread");
1999                 /* let the parent decide how bad this really is */
2000                 clean_child_exit(APEXIT_CHILDSICK);
2001             }
2002             threads_created++;
2003         }
2004
2005         /* Start the listener only when there are workers available */
2006         if (!listener_started && threads_created) {
2007             create_listener_thread(ts);
2008             listener_started = 1;
2009         }
2010
2011
2012         if (start_thread_may_exit || threads_created == threads_per_child) {
2013             break;
2014         }
2015         /* wait for previous generation to clean up an entry */
2016         apr_sleep(apr_time_from_sec(1));
2017         ++loops;
2018         if (loops % 120 == 0) { /* every couple of minutes */
2019             if (prev_threads_created == threads_created) {
2020                 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
2021                              "child %" APR_PID_T_FMT " isn't taking over "
2022                              "slots very quickly (%d of %d)",
2023                              ap_my_pid, threads_created,
2024                              threads_per_child);
2025             }
2026             prev_threads_created = threads_created;
2027         }
2028     }
2029
2030     /* What state should this child_main process be listed as in the
2031      * scoreboard...?
2032      *  ap_update_child_status_from_indexes(my_child_num, i, SERVER_STARTING,
2033      *                                      (request_rec *) NULL);
2034      *
2035      *  This state should be listed separately in the scoreboard, in some kind
2036      *  of process_status, not mixed in with the worker threads' status.
2037      *  "life_status" is almost right, but it's in the worker's structure, and
2038      *  the name could be clearer.   gla
2039      */
2040     apr_thread_exit(thd, APR_SUCCESS);
2041     return NULL;
2042 }
2043
2044 static void join_workers(apr_thread_t * listener, apr_thread_t ** threads)
2045 {
2046     int i;
2047     apr_status_t rv, thread_rv;
2048
2049     if (listener) {
2050         int iter;
2051
2052         /* deal with a rare timing window which affects waking up the
2053          * listener thread...  if the signal sent to the listener thread
2054          * is delivered between the time it verifies that the
2055          * listener_may_exit flag is clear and the time it enters a
2056          * blocking syscall, the signal didn't do any good...  work around
2057          * that by sleeping briefly and sending it again
2058          */
2059
2060         iter = 0;
2061         while (iter < 10 && !dying) {
2062             /* listener has not stopped accepting yet */
2063             apr_sleep(apr_time_make(0, 500000));
2064             wakeup_listener();
2065             ++iter;
2066         }
2067         if (iter >= 10) {
2068             ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
2069                          "the listener thread didn't stop accepting");
2070         }
2071         else {
2072             rv = apr_thread_join(&thread_rv, listener);
2073             if (rv != APR_SUCCESS) {
2074                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
2075                              "apr_thread_join: unable to join listener thread");
2076             }
2077         }
2078     }
2079
2080     for (i = 0; i < threads_per_child; i++) {
2081         if (threads[i]) {       /* if we ever created this thread */
2082             rv = apr_thread_join(&thread_rv, threads[i]);
2083             if (rv != APR_SUCCESS) {
2084                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
2085                              "apr_thread_join: unable to join worker "
2086                              "thread %d", i);
2087             }
2088         }
2089     }
2090 }
2091
2092 static void join_start_thread(apr_thread_t * start_thread_id)
2093 {
2094     apr_status_t rv, thread_rv;
2095
2096     start_thread_may_exit = 1;  /* tell it to give up in case it is still
2097                                  * trying to take over slots from a
2098                                  * previous generation
2099                                  */
2100     rv = apr_thread_join(&thread_rv, start_thread_id);
2101     if (rv != APR_SUCCESS) {
2102         ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
2103                      "apr_thread_join: unable to join the start " "thread");
2104     }
2105 }
2106
2107 static void child_main(int child_num_arg)
2108 {
2109     apr_thread_t **threads;
2110     apr_status_t rv;
2111     thread_starter *ts;
2112     apr_threadattr_t *thread_attr;
2113     apr_thread_t *start_thread_id;
2114
2115     mpm_state = AP_MPMQ_STARTING;       /* for benefit of any hooks that run as this
2116                                          * child initializes
2117                                          */
2118     ap_my_pid = getpid();
2119     ap_fatal_signal_child_setup(ap_server_conf);
2120     apr_pool_create(&pchild, pconf);
2121
2122     /*stuff to do before we switch id's, so we have permissions. */
2123     ap_reopen_scoreboard(pchild, NULL, 0);
2124
2125     if (ap_run_drop_privileges(pchild, ap_server_conf)) {
2126         clean_child_exit(APEXIT_CHILDFATAL);
2127     }
2128
2129     apr_thread_mutex_create(&g_timer_ring_mtx, APR_THREAD_MUTEX_DEFAULT, pchild);
2130     APR_RING_INIT(&timer_free_ring, timer_event_t, link);
2131     APR_RING_INIT(&timer_ring, timer_event_t, link);
2132     ap_run_child_init(pchild, ap_server_conf);
2133
2134     /* done with init critical section */
2135
2136     /* Just use the standard apr_setup_signal_thread to block all signals
2137      * from being received.  The child processes no longer use signals for
2138      * any communication with the parent process.
2139      */
2140     rv = apr_setup_signal_thread();
2141     if (rv != APR_SUCCESS) {
2142         ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf,
2143                      "Couldn't initialize signal thread");
2144         clean_child_exit(APEXIT_CHILDFATAL);
2145     }
2146
2147     if (ap_max_requests_per_child) {
2148         requests_this_child = ap_max_requests_per_child;
2149     }
2150     else {
2151         /* coding a value of zero means infinity */
2152         requests_this_child = INT_MAX;
2153     }
2154
2155     /* Setup worker threads */
2156
2157     /* clear the storage; we may not create all our threads immediately,
2158      * and we want a 0 entry to indicate a thread which was not created
2159      */
2160     threads = ap_calloc(threads_per_child, sizeof(apr_thread_t *));
2161     ts = apr_palloc(pchild, sizeof(*ts));
2162
2163     apr_threadattr_create(&thread_attr, pchild);
2164     /* 0 means PTHREAD_CREATE_JOINABLE */
2165     apr_threadattr_detach_set(thread_attr, 0);
2166
2167     if (ap_thread_stacksize != 0) {
2168         apr_threadattr_stacksize_set(thread_attr, ap_thread_stacksize);
2169     }
2170
2171     ts->threads = threads;
2172     ts->listener = NULL;
2173     ts->child_num_arg = child_num_arg;
2174     ts->threadattr = thread_attr;
2175
2176     rv = apr_thread_create(&start_thread_id, thread_attr, start_threads,
2177                            ts, pchild);
2178     if (rv != APR_SUCCESS) {
2179         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
2180                      "apr_thread_create: unable to create worker thread");
2181         /* let the parent decide how bad this really is */
2182         clean_child_exit(APEXIT_CHILDSICK);
2183     }
2184
2185     mpm_state = AP_MPMQ_RUNNING;
2186
2187     /* If we are only running in one_process mode, we will want to
2188      * still handle signals. */
2189     if (one_process) {
2190         /* Block until we get a terminating signal. */
2191         apr_signal_thread(check_signal);
2192         /* make sure the start thread has finished; signal_threads()
2193          * and join_workers() depend on that
2194          */
2195         /* XXX join_start_thread() won't be awakened if one of our
2196          *     threads encounters a critical error and attempts to
2197          *     shutdown this child
2198          */
2199         join_start_thread(start_thread_id);
2200
2201         /* helps us terminate a little more quickly than the dispatch of the
2202          * signal thread; beats the Pipe of Death and the browsers
2203          */
2204         signal_threads(ST_UNGRACEFUL);
2205
2206         /* A terminating signal was received. Now join each of the
2207          * workers to clean them up.
2208          *   If the worker already exited, then the join frees
2209          *   their resources and returns.
2210          *   If the worker hasn't exited, then this blocks until
2211          *   they have (then cleans up).
2212          */
2213         join_workers(ts->listener, threads);
2214     }
2215     else {                      /* !one_process */
2216         /* remove SIGTERM from the set of blocked signals...  if one of
2217          * the other threads in the process needs to take us down
2218          * (e.g., for MaxConnectionsPerChild) it will send us SIGTERM
2219          */
2220         unblock_signal(SIGTERM);
2221         apr_signal(SIGTERM, dummy_signal_handler);
2222         /* Watch for any messages from the parent over the POD */
2223         while (1) {
2224             rv = ap_event_pod_check(pod);
2225             if (rv == AP_NORESTART) {
2226                 /* see if termination was triggered while we slept */
2227                 switch (terminate_mode) {
2228                 case ST_GRACEFUL:
2229                     rv = AP_GRACEFUL;
2230                     break;
2231                 case ST_UNGRACEFUL:
2232                     rv = AP_RESTART;
2233                     break;
2234                 }
2235             }
2236             if (rv == AP_GRACEFUL || rv == AP_RESTART) {
2237                 /* make sure the start thread has finished;
2238                  * signal_threads() and join_workers depend on that
2239                  */
2240                 join_start_thread(start_thread_id);
2241                 signal_threads(rv ==
2242                                AP_GRACEFUL ? ST_GRACEFUL : ST_UNGRACEFUL);
2243                 break;
2244             }
2245         }
2246
2247         /* A terminating signal was received. Now join each of the
2248          * workers to clean them up.
2249          *   If the worker already exited, then the join frees
2250          *   their resources and returns.
2251          *   If the worker hasn't exited, then this blocks until
2252          *   they have (then cleans up).
2253          */
2254         join_workers(ts->listener, threads);
2255     }
2256
2257     free(threads);
2258
2259     clean_child_exit(resource_shortage ? APEXIT_CHILDSICK : 0);
2260 }
2261
2262 static int make_child(server_rec * s, int slot)
2263 {
2264     int pid;
2265
2266     if (slot + 1 > retained->max_daemons_limit) {
2267         retained->max_daemons_limit = slot + 1;
2268     }
2269
2270     if (one_process) {
2271         set_signals();
2272         event_note_child_started(slot, getpid());
2273         child_main(slot);
2274         /* NOTREACHED */
2275     }
2276
2277     if ((pid = fork()) == -1) {
2278         ap_log_error(APLOG_MARK, APLOG_ERR, errno, s,
2279                      "fork: Unable to fork new process");
2280
2281         /* fork didn't succeed.  There's no need to touch the scoreboard;
2282          * if we were trying to replace a failed child process, then
2283          * server_main_loop() marked its workers SERVER_DEAD, and if
2284          * we were trying to replace a child process that exited normally,
2285          * its worker_thread()s left SERVER_DEAD or SERVER_GRACEFUL behind.
2286          */
2287
2288         /* In case system resources are maxxed out, we don't want
2289            Apache running away with the CPU trying to fork over and
2290            over and over again. */
2291         apr_sleep(apr_time_from_sec(10));
2292
2293         return -1;
2294     }
2295
2296     if (!pid) {
2297 #ifdef HAVE_BINDPROCESSOR
2298         /* By default, AIX binds to a single processor.  This bit unbinds
2299          * children which will then bind to another CPU.
2300          */
2301         int status = bindprocessor(BINDPROCESS, (int) getpid(),
2302                                    PROCESSOR_CLASS_ANY);
2303         if (status != OK)
2304             ap_log_error(APLOG_MARK, APLOG_DEBUG, errno,
2305                          ap_server_conf,
2306                          "processor unbind failed");
2307 #endif
2308         RAISE_SIGSTOP(MAKE_CHILD);
2309
2310         apr_signal(SIGTERM, just_die);
2311         child_main(slot);
2312         /* NOTREACHED */
2313     }
2314     /* else */
2315     if (ap_scoreboard_image->parent[slot].pid != 0) {
2316         /* This new child process is squatting on the scoreboard
2317          * entry owned by an exiting child process, which cannot
2318          * exit until all active requests complete.
2319          */
2320         event_note_child_lost_slot(slot, pid);
2321     }
2322     ap_scoreboard_image->parent[slot].quiescing = 0;
2323     ap_scoreboard_image->parent[slot].not_accepting = 0;
2324     event_note_child_started(slot, pid);
2325     return 0;
2326 }
2327
2328 /* start up a bunch of children */
2329 static void startup_children(int number_to_start)
2330 {
2331     int i;
2332
2333     for (i = 0; number_to_start && i < ap_daemons_limit; ++i) {
2334         if (ap_scoreboard_image->parent[i].pid != 0) {
2335             continue;
2336         }
2337         if (make_child(ap_server_conf, i) < 0) {
2338             break;
2339         }
2340         --number_to_start;
2341     }
2342 }
2343
2344 static void perform_idle_server_maintenance(void)
2345 {
2346     int i, j;
2347     int idle_thread_count;
2348     worker_score *ws;
2349     process_score *ps;
2350     int free_length;
2351     int totally_free_length = 0;
2352     int free_slots[MAX_SPAWN_RATE];
2353     int last_non_dead;
2354     int total_non_dead;
2355     int active_thread_count = 0;
2356
2357     /* initialize the free_list */
2358     free_length = 0;
2359
2360     idle_thread_count = 0;
2361     last_non_dead = -1;
2362     total_non_dead = 0;
2363
2364     for (i = 0; i < ap_daemons_limit; ++i) {
2365         /* Initialization to satisfy the compiler. It doesn't know
2366          * that threads_per_child is always > 0 */
2367         int status = SERVER_DEAD;
2368         int any_dying_threads = 0;
2369         int any_dead_threads = 0;
2370         int all_dead_threads = 1;
2371
2372         if (i >= retained->max_daemons_limit
2373             && totally_free_length == retained->idle_spawn_rate)
2374             /* short cut if all active processes have been examined and
2375              * enough empty scoreboard slots have been found
2376              */
2377
2378             break;
2379         ps = &ap_scoreboard_image->parent[i];
2380         for (j = 0; j < threads_per_child; j++) {
2381             ws = &ap_scoreboard_image->servers[i][j];
2382             status = ws->status;
2383
2384             /* XXX any_dying_threads is probably no longer needed    GLA */
2385             any_dying_threads = any_dying_threads ||
2386                 (status == SERVER_GRACEFUL);
2387             any_dead_threads = any_dead_threads || (status == SERVER_DEAD);
2388             all_dead_threads = all_dead_threads &&
2389                 (status == SERVER_DEAD || status == SERVER_GRACEFUL);
2390
2391             /* We consider a starting server as idle because we started it
2392              * at least a cycle ago, and if it still hasn't finished starting
2393              * then we're just going to swamp things worse by forking more.
2394              * So we hopefully won't need to fork more if we count it.
2395              * This depends on the ordering of SERVER_READY and SERVER_STARTING.
2396              */
2397             if (ps->pid != 0) { /* XXX just set all_dead_threads in outer
2398                                    for loop if no pid?  not much else matters */
2399                 if (status <= SERVER_READY && !ps->quiescing && !ps->not_accepting
2400                     && ps->generation == retained->my_generation)
2401                 {
2402                     ++idle_thread_count;
2403                 }
2404                 if (status >= SERVER_READY && status < SERVER_GRACEFUL) {
2405                     ++active_thread_count;
2406                 }
2407             }
2408         }
2409         if (any_dead_threads
2410             && totally_free_length < retained->idle_spawn_rate
2411             && free_length < MAX_SPAWN_RATE
2412             && (!ps->pid      /* no process in the slot */
2413                   || ps->quiescing)) {  /* or at least one is going away */
2414             if (all_dead_threads) {
2415                 /* great! we prefer these, because the new process can
2416                  * start more threads sooner.  So prioritize this slot
2417                  * by putting it ahead of any slots with active threads.
2418                  *
2419                  * first, make room by moving a slot that's potentially still
2420                  * in use to the end of the array
2421                  */
2422                 free_slots[free_length] = free_slots[totally_free_length];
2423                 free_slots[totally_free_length++] = i;
2424             }
2425             else {
2426                 /* slot is still in use - back of the bus
2427                  */
2428                 free_slots[free_length] = i;
2429             }
2430             ++free_length;
2431         }
2432         /* XXX if (!ps->quiescing)     is probably more reliable  GLA */
2433         if (!any_dying_threads) {
2434             last_non_dead = i;
2435             ++total_non_dead;
2436         }
2437     }
2438
2439     if (retained->sick_child_detected) {
2440         if (active_thread_count > 0) {
2441             /* some child processes appear to be working.  don't kill the
2442              * whole server.
2443              */
2444             retained->sick_child_detected = 0;
2445         }
2446         else {
2447             /* looks like a basket case.  give up.
2448              */
2449             shutdown_pending = 1;
2450             child_fatal = 1;
2451             ap_log_error(APLOG_MARK, APLOG_ALERT, 0,
2452                          ap_server_conf,
2453                          "No active workers found..."
2454                          " Apache is exiting!");
2455             /* the child already logged the failure details */
2456             return;
2457         }
2458     }
2459
2460     retained->max_daemons_limit = last_non_dead + 1;
2461
2462     if (idle_thread_count > max_spare_threads) {
2463         /* Kill off one child */
2464         ap_event_pod_signal(pod, TRUE);
2465         retained->idle_spawn_rate = 1;
2466     }
2467     else if (idle_thread_count < min_spare_threads) {
2468         /* terminate the free list */
2469         if (free_length == 0) { /* scoreboard is full, can't fork */
2470
2471             if (active_thread_count >= ap_daemons_limit * threads_per_child) {
2472                 if (!retained->maxclients_reported) {
2473                     /* only report this condition once */
2474                     ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf,
2475                                  "server reached MaxRequestWorkers setting, "
2476                                  "consider raising the MaxRequestWorkers "
2477                                  "setting");
2478                     retained->maxclients_reported = 1;
2479                 }
2480             }
2481             else {
2482                 ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf,
2483                              "scoreboard is full, not at MaxRequestWorkers");
2484             }
2485             retained->idle_spawn_rate = 1;
2486         }
2487         else {
2488             if (free_length > retained->idle_spawn_rate) {
2489                 free_length = retained->idle_spawn_rate;
2490             }
2491             if (retained->idle_spawn_rate >= 8) {
2492                 ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf,
2493                              "server seems busy, (you may need "
2494                              "to increase StartServers, ThreadsPerChild "
2495                              "or Min/MaxSpareThreads), "
2496                              "spawning %d children, there are around %d idle "
2497                              "threads, and %d total children", free_length,
2498                              idle_thread_count, total_non_dead);
2499             }
2500             for (i = 0; i < free_length; ++i) {
2501                 make_child(ap_server_conf, free_slots[i]);
2502             }
2503             /* the next time around we want to spawn twice as many if this
2504              * wasn't good enough, but not if we've just done a graceful
2505              */
2506             if (retained->hold_off_on_exponential_spawning) {
2507                 --retained->hold_off_on_exponential_spawning;
2508             }
2509             else if (retained->idle_spawn_rate < MAX_SPAWN_RATE) {
2510                 retained->idle_spawn_rate *= 2;
2511             }
2512         }
2513     }
2514     else {
2515         retained->idle_spawn_rate = 1;
2516     }
2517 }
2518
2519 static void server_main_loop(int remaining_children_to_start)
2520 {
2521     ap_generation_t old_gen;
2522     int child_slot;
2523     apr_exit_why_e exitwhy;
2524     int status, processed_status;
2525     apr_proc_t pid;
2526     int i;
2527
2528     while (!restart_pending && !shutdown_pending) {
2529         ap_wait_or_timeout(&exitwhy, &status, &pid, pconf, ap_server_conf);
2530
2531         if (pid.pid != -1) {
2532             processed_status = ap_process_child_status(&pid, exitwhy, status);
2533             child_slot = ap_find_child_by_pid(&pid);
2534             if (processed_status == APEXIT_CHILDFATAL) {
2535                 /* fix race condition found in PR 39311
2536                  * A child created at the same time as a graceful happens
2537                  * can find the lock missing and create a fatal error.
2538                  * It is not fatal for the last generation to be in this state.
2539                  */
2540                 if (child_slot < 0
2541                     || ap_get_scoreboard_process(child_slot)->generation
2542                        == retained->my_generation) {
2543                     shutdown_pending = 1;
2544                     child_fatal = 1;
2545                     return;
2546                 }
2547                 else {
2548                     ap_log_error(APLOG_MARK, APLOG_WARNING, 0, ap_server_conf,
2549                                  "Ignoring fatal error in child of previous "
2550                                  "generation (pid %ld).",
2551                                  (long)pid.pid);
2552                     retained->sick_child_detected = 1;
2553                 }
2554             }
2555             else if (processed_status == APEXIT_CHILDSICK) {
2556                 /* tell perform_idle_server_maintenance to check into this
2557                  * on the next timer pop
2558                  */
2559                 retained->sick_child_detected = 1;
2560             }
2561             /* non-fatal death... note that it's gone in the scoreboard. */
2562             if (child_slot >= 0) {
2563                 for (i = 0; i < threads_per_child; i++)
2564                     ap_update_child_status_from_indexes(child_slot, i,
2565                                                         SERVER_DEAD,
2566                                                         (request_rec *) NULL);
2567
2568                 event_note_child_killed(child_slot, 0, 0);
2569                 ap_scoreboard_image->parent[child_slot].quiescing = 0;
2570                 if (processed_status == APEXIT_CHILDSICK) {
2571                     /* resource shortage, minimize the fork rate */
2572                     retained->idle_spawn_rate = 1;
2573                 }
2574                 else if (remaining_children_to_start
2575                          && child_slot < ap_daemons_limit) {
2576                     /* we're still doing a 1-for-1 replacement of dead
2577                      * children with new children
2578                      */
2579                     make_child(ap_server_conf, child_slot);
2580                     --remaining_children_to_start;
2581                 }
2582             }
2583             else if (ap_unregister_extra_mpm_process(pid.pid, &old_gen) == 1) {
2584
2585                 event_note_child_killed(-1, /* already out of the scoreboard */
2586                                         pid.pid, old_gen);
2587 #if APR_HAS_OTHER_CHILD
2588             }
2589             else if (apr_proc_other_child_alert(&pid, APR_OC_REASON_DEATH,
2590                                                 status) == 0) {
2591                 /* handled */
2592 #endif
2593             }
2594             else if (retained->is_graceful) {
2595                 /* Great, we've probably just lost a slot in the
2596                  * scoreboard.  Somehow we don't know about this child.
2597                  */
2598                 ap_log_error(APLOG_MARK, APLOG_WARNING, 0,
2599                              ap_server_conf,
2600                              "long lost child came home! (pid %ld)",
2601                              (long) pid.pid);
2602             }
2603             /* Don't perform idle maintenance when a child dies,
2604              * only do it when there's a timeout.  Remember only a
2605              * finite number of children can die, and it's pretty
2606              * pathological for a lot to die suddenly.
2607              */
2608             continue;
2609         }
2610         else if (remaining_children_to_start) {
2611             /* we hit a 1 second timeout in which none of the previous
2612              * generation of children needed to be reaped... so assume
2613              * they're all done, and pick up the slack if any is left.
2614              */
2615             startup_children(remaining_children_to_start);
2616             remaining_children_to_start = 0;
2617             /* In any event we really shouldn't do the code below because
2618              * few of the servers we just started are in the IDLE state
2619              * yet, so we'd mistakenly create an extra server.
2620              */
2621             continue;
2622         }
2623
2624         perform_idle_server_maintenance();
2625     }
2626 }
2627
2628 static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
2629 {
2630     int remaining_children_to_start;
2631
2632     ap_log_pid(pconf, ap_pid_fname);
2633
2634     if (!retained->is_graceful) {
2635         if (ap_run_pre_mpm(s->process->pool, SB_SHARED) != OK) {
2636             mpm_state = AP_MPMQ_STOPPING;
2637             return DONE;
2638         }
2639         /* fix the generation number in the global score; we just got a new,
2640          * cleared scoreboard
2641          */
2642         ap_scoreboard_image->global->running_generation = retained->my_generation;
2643     }
2644
2645     restart_pending = shutdown_pending = 0;
2646     set_signals();
2647     /* Don't thrash... */
2648     if (max_spare_threads < min_spare_threads + threads_per_child)
2649         max_spare_threads = min_spare_threads + threads_per_child;
2650
2651     /* If we're doing a graceful_restart then we're going to see a lot
2652      * of children exiting immediately when we get into the main loop
2653      * below (because we just sent them AP_SIG_GRACEFUL).  This happens pretty
2654      * rapidly... and for each one that exits we may start a new one, until
2655      * there are at least min_spare_threads idle threads, counting across
2656      * all children.  But we may be permitted to start more children than
2657      * that, so we'll just keep track of how many we're
2658      * supposed to start up without the 1 second penalty between each fork.
2659      */
2660     remaining_children_to_start = ap_daemons_to_start;
2661     if (remaining_children_to_start > ap_daemons_limit) {
2662         remaining_children_to_start = ap_daemons_limit;
2663     }
2664     if (!retained->is_graceful) {
2665         startup_children(remaining_children_to_start);
2666         remaining_children_to_start = 0;
2667     }
2668     else {
2669         /* give the system some time to recover before kicking into
2670          * exponential mode */
2671         retained->hold_off_on_exponential_spawning = 10;
2672     }
2673
2674     ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf,
2675                  "%s configured -- resuming normal operations",
2676                  ap_get_server_description());
2677     ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf,
2678                  "Server built: %s", ap_get_server_built());
2679     ap_log_command_line(plog, s);
2680
2681     mpm_state = AP_MPMQ_RUNNING;
2682
2683     server_main_loop(remaining_children_to_start);
2684     mpm_state = AP_MPMQ_STOPPING;
2685
2686     if (shutdown_pending && !retained->is_graceful) {
2687         /* Time to shut down:
2688          * Kill child processes, tell them to call child_exit, etc...
2689          */
2690         ap_event_pod_killpg(pod, ap_daemons_limit, FALSE);
2691         ap_reclaim_child_processes(1, /* Start with SIGTERM */
2692                                    event_note_child_killed);
2693
2694         if (!child_fatal) {
2695             /* cleanup pid file on normal shutdown */
2696             ap_remove_pid(pconf, ap_pid_fname);
2697             ap_log_error(APLOG_MARK, APLOG_NOTICE, 0,
2698                          ap_server_conf, "caught SIGTERM, shutting down");
2699         }
2700         return DONE;
2701     } else if (shutdown_pending) {
2702         /* Time to gracefully shut down:
2703          * Kill child processes, tell them to call child_exit, etc...
2704          */
2705         int active_children;
2706         int index;
2707         apr_time_t cutoff = 0;
2708
2709         /* Close our listeners, and then ask our children to do same */
2710         ap_close_listeners();
2711         ap_event_pod_killpg(pod, ap_daemons_limit, TRUE);
2712         ap_relieve_child_processes(event_note_child_killed);
2713
2714         if (!child_fatal) {
2715             /* cleanup pid file on normal shutdown */
2716             ap_remove_pid(pconf, ap_pid_fname);
2717             ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf,
2718                          "caught " AP_SIG_GRACEFUL_STOP_STRING
2719                          ", shutting down gracefully");
2720         }
2721
2722         if (ap_graceful_shutdown_timeout) {
2723             cutoff = apr_time_now() +
2724                      apr_time_from_sec(ap_graceful_shutdown_timeout);
2725         }
2726
2727         /* Don't really exit until each child has finished */
2728         shutdown_pending = 0;
2729         do {
2730             /* Pause for a second */
2731             apr_sleep(apr_time_from_sec(1));
2732
2733             /* Relieve any children which have now exited */
2734             ap_relieve_child_processes(event_note_child_killed);
2735
2736             active_children = 0;
2737             for (index = 0; index < ap_daemons_limit; ++index) {
2738                 if (ap_mpm_safe_kill(MPM_CHILD_PID(index), 0) == APR_SUCCESS) {
2739                     active_children = 1;
2740                     /* Having just one child is enough to stay around */
2741                     break;
2742                 }
2743             }
2744         } while (!shutdown_pending && active_children &&
2745                  (!ap_graceful_shutdown_timeout || apr_time_now() < cutoff));
2746
2747         /* We might be here because we received SIGTERM, either
2748          * way, try and make sure that all of our processes are
2749          * really dead.
2750          */
2751         ap_event_pod_killpg(pod, ap_daemons_limit, FALSE);
2752         ap_reclaim_child_processes(1, event_note_child_killed);
2753
2754         return DONE;
2755     }
2756
2757     /* we've been told to restart */
2758     apr_signal(SIGHUP, SIG_IGN);
2759
2760     if (one_process) {
2761         /* not worth thinking about */
2762         return DONE;
2763     }
2764
2765     /* advance to the next generation */
2766     /* XXX: we really need to make sure this new generation number isn't in
2767      * use by any of the children.
2768      */
2769     ++retained->my_generation;
2770     ap_scoreboard_image->global->running_generation = retained->my_generation;
2771
2772     if (retained->is_graceful) {
2773         ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf,
2774                      AP_SIG_GRACEFUL_STRING
2775                      " received.  Doing graceful restart");
2776         /* wake up the children...time to die.  But we'll have more soon */
2777         ap_event_pod_killpg(pod, ap_daemons_limit, TRUE);
2778
2779
2780         /* This is mostly for debugging... so that we know what is still
2781          * gracefully dealing with existing request.
2782          */
2783
2784     }
2785     else {
2786         /* Kill 'em all.  Since the child acts the same on the parents SIGTERM
2787          * and a SIGHUP, we may as well use the same signal, because some user
2788          * pthreads are stealing signals from us left and right.
2789          */
2790         ap_event_pod_killpg(pod, ap_daemons_limit, FALSE);
2791
2792         ap_reclaim_child_processes(1,  /* Start with SIGTERM */
2793                                    event_note_child_killed);
2794         ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf,
2795                      "SIGHUP received.  Attempting to restart");
2796     }
2797
2798     return OK;
2799 }
2800
2801 /* This really should be a post_config hook, but the error log is already
2802  * redirected by that point, so we need to do this in the open_logs phase.
2803  */
2804 static int event_open_logs(apr_pool_t * p, apr_pool_t * plog,
2805                            apr_pool_t * ptemp, server_rec * s)
2806 {
2807     int startup = 0;
2808     int level_flags = 0;
2809     apr_status_t rv;
2810
2811     pconf = p;
2812
2813     /* the reverse of pre_config, we want this only the first time around */
2814     if (retained->module_loads == 1) {
2815         startup = 1;
2816         level_flags |= APLOG_STARTUP;
2817     }
2818
2819     if ((num_listensocks = ap_setup_listeners(ap_server_conf)) < 1) {
2820         ap_log_error(APLOG_MARK, APLOG_ALERT | level_flags, 0,
2821                      (startup ? NULL : s),
2822                      "no listening sockets available, shutting down");
2823         return DONE;
2824     }
2825
2826     if (!one_process) {
2827         if ((rv = ap_event_pod_open(pconf, &pod))) {
2828             ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
2829                          (startup ? NULL : s),
2830                          "could not open pipe-of-death");
2831             return DONE;
2832         }
2833     }
2834     return OK;
2835 }
2836
2837 static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
2838                             apr_pool_t * ptemp)
2839 {
2840     int no_detach, debug, foreground;
2841     apr_status_t rv;
2842     const char *userdata_key = "mpm_event_module";
2843
2844     mpm_state = AP_MPMQ_STARTING;
2845
2846     debug = ap_exists_config_define("DEBUG");
2847
2848     if (debug) {
2849         foreground = one_process = 1;
2850         no_detach = 0;
2851     }
2852     else {
2853         one_process = ap_exists_config_define("ONE_PROCESS");
2854         no_detach = ap_exists_config_define("NO_DETACH");
2855         foreground = ap_exists_config_define("FOREGROUND");
2856     }
2857
2858     /* sigh, want this only the second time around */
2859     retained = ap_retained_data_get(userdata_key);
2860     if (!retained) {
2861         retained = ap_retained_data_create(userdata_key, sizeof(*retained));
2862         retained->max_daemons_limit = -1;
2863         retained->idle_spawn_rate = 1;
2864     }
2865     ++retained->module_loads;
2866     if (retained->module_loads == 2) {
2867         rv = apr_pollset_create(&event_pollset, 1, plog,
2868                                 APR_POLLSET_WAKEABLE|APR_POLLSET_NOCOPY);
2869         if (rv != APR_SUCCESS) {
2870             ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL,
2871                          "apr_pollset_create failed; check system or user limits");
2872             return HTTP_INTERNAL_SERVER_ERROR;
2873         }
2874         apr_pollset_destroy(event_pollset);
2875
2876         if (!one_process && !foreground) {
2877             /* before we detach, setup crash handlers to log to errorlog */
2878             ap_fatal_signal_setup(ap_server_conf, pconf);
2879             rv = apr_proc_detach(no_detach ? APR_PROC_DETACH_FOREGROUND
2880                                  : APR_PROC_DETACH_DAEMONIZE);
2881             if (rv != APR_SUCCESS) {
2882                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL,
2883                              "apr_proc_detach failed");
2884                 return HTTP_INTERNAL_SERVER_ERROR;
2885             }
2886         }
2887     }
2888
2889     parent_pid = ap_my_pid = getpid();
2890
2891     ap_listen_pre_config();
2892     ap_daemons_to_start = DEFAULT_START_DAEMON;
2893     min_spare_threads = DEFAULT_MIN_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD;
2894     max_spare_threads = DEFAULT_MAX_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD;
2895     server_limit = DEFAULT_SERVER_LIMIT;
2896     thread_limit = DEFAULT_THREAD_LIMIT;
2897     ap_daemons_limit = server_limit;
2898     threads_per_child = DEFAULT_THREADS_PER_CHILD;
2899     max_workers = ap_daemons_limit * threads_per_child;
2900     ap_extended_status = 0;
2901
2902     return OK;
2903 }
2904
2905 static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
2906                               apr_pool_t *ptemp, server_rec *s)
2907 {
2908     int startup = 0;
2909
2910     /* the reverse of pre_config, we want this only the first time around */
2911     if (retained->module_loads == 1) {
2912         startup = 1;
2913     }
2914
2915     if (server_limit > MAX_SERVER_LIMIT) {
2916         if (startup) {
2917             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2918                          "WARNING: ServerLimit of %d exceeds compile-time "
2919                          "limit of", server_limit);
2920             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2921                          " %d servers, decreasing to %d.",
2922                          MAX_SERVER_LIMIT, MAX_SERVER_LIMIT);
2923         } else {
2924             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2925                          "ServerLimit of %d exceeds compile-time limit "
2926                          "of %d, decreasing to match",
2927                          server_limit, MAX_SERVER_LIMIT);
2928         }
2929         server_limit = MAX_SERVER_LIMIT;
2930     }
2931     else if (server_limit < 1) {
2932         if (startup) {
2933             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2934                          "WARNING: ServerLimit of %d not allowed, "
2935                          "increasing to 1.", server_limit);
2936         } else {
2937             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2938                          "ServerLimit of %d not allowed, increasing to 1",
2939                          server_limit);
2940         }
2941         server_limit = 1;
2942     }
2943
2944     /* you cannot change ServerLimit across a restart; ignore
2945      * any such attempts
2946      */
2947     if (!retained->first_server_limit) {
2948         retained->first_server_limit = server_limit;
2949     }
2950     else if (server_limit != retained->first_server_limit) {
2951         /* don't need a startup console version here */
2952         ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2953                      "changing ServerLimit to %d from original value of %d "
2954                      "not allowed during restart",
2955                      server_limit, retained->first_server_limit);
2956         server_limit = retained->first_server_limit;
2957     }
2958
2959     if (thread_limit > MAX_THREAD_LIMIT) {
2960         if (startup) {
2961             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2962                          "WARNING: ThreadLimit of %d exceeds compile-time "
2963                          "limit of", thread_limit);
2964             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2965                          " %d threads, decreasing to %d.",
2966                          MAX_THREAD_LIMIT, MAX_THREAD_LIMIT);
2967         } else {
2968             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2969                          "ThreadLimit of %d exceeds compile-time limit "
2970                          "of %d, decreasing to match",
2971                          thread_limit, MAX_THREAD_LIMIT);
2972         }
2973         thread_limit = MAX_THREAD_LIMIT;
2974     }
2975     else if (thread_limit < 1) {
2976         if (startup) {
2977             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2978                          "WARNING: ThreadLimit of %d not allowed, "
2979                          "increasing to 1.", thread_limit);
2980         } else {
2981             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2982                          "ThreadLimit of %d not allowed, increasing to 1",
2983                          thread_limit);
2984         }
2985         thread_limit = 1;
2986     }
2987
2988     /* you cannot change ThreadLimit across a restart; ignore
2989      * any such attempts
2990      */
2991     if (!retained->first_thread_limit) {
2992         retained->first_thread_limit = thread_limit;
2993     }
2994     else if (thread_limit != retained->first_thread_limit) {
2995         /* don't need a startup console version here */
2996         ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2997                      "changing ThreadLimit to %d from original value of %d "
2998                      "not allowed during restart",
2999                      thread_limit, retained->first_thread_limit);
3000         thread_limit = retained->first_thread_limit;
3001     }
3002
3003     if (threads_per_child > thread_limit) {
3004         if (startup) {
3005             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3006                          "WARNING: ThreadsPerChild of %d exceeds ThreadLimit "
3007                          "of", threads_per_child);
3008             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3009                          " %d threads, decreasing to %d.",
3010                          thread_limit, thread_limit);
3011             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3012                          " To increase, please see the ThreadLimit "
3013                          "directive.");
3014         } else {
3015             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
3016                          "ThreadsPerChild of %d exceeds ThreadLimit "
3017                          "of %d, decreasing to match",
3018                          threads_per_child, thread_limit);
3019         }
3020         threads_per_child = thread_limit;
3021     }
3022     else if (threads_per_child < 1) {
3023         if (startup) {
3024             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3025                          "WARNING: ThreadsPerChild of %d not allowed, "
3026                          "increasing to 1.", threads_per_child);
3027         } else {
3028             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
3029                          "ThreadsPerChild of %d not allowed, increasing to 1",
3030                          threads_per_child);
3031         }
3032         threads_per_child = 1;
3033     }
3034
3035     if (max_workers < threads_per_child) {
3036         if (startup) {
3037             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3038                          "WARNING: MaxRequestWorkers of %d is less than "
3039                          "ThreadsPerChild of", max_workers);
3040             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3041                          " %d, increasing to %d.  MaxRequestWorkers must be at "
3042                          "least as large",
3043                          threads_per_child, threads_per_child);
3044             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3045                          " as the number of threads in a single server.");
3046         } else {
3047             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
3048                          "MaxRequestWorkers of %d is less than ThreadsPerChild "
3049                          "of %d, increasing to match",
3050                          max_workers, threads_per_child);
3051         }
3052         max_workers = threads_per_child;
3053     }
3054
3055     ap_daemons_limit = max_workers / threads_per_child;
3056
3057     if (max_workers % threads_per_child) {
3058         int tmp_max_workers = ap_daemons_limit * threads_per_child;
3059
3060         if (startup) {
3061             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3062                          "WARNING: MaxRequestWorkers of %d is not an integer "
3063                          "multiple of", max_workers);
3064             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3065                          " ThreadsPerChild of %d, decreasing to nearest "
3066                          "multiple %d,", threads_per_child,
3067                          tmp_max_workers);
3068             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3069                          " for a maximum of %d servers.",
3070                          ap_daemons_limit);
3071         } else {
3072             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
3073                          "MaxRequestWorkers of %d is not an integer multiple "
3074                          "of ThreadsPerChild of %d, decreasing to nearest "
3075                          "multiple %d", max_workers, threads_per_child,
3076                          tmp_max_workers);
3077         }
3078         max_workers = tmp_max_workers;
3079     }
3080
3081     if (ap_daemons_limit > server_limit) {
3082         if (startup) {
3083             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3084                          "WARNING: MaxRequestWorkers of %d would require %d "
3085                          "servers and ", max_workers, ap_daemons_limit);
3086             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3087                          " would exceed ServerLimit of %d, decreasing to %d.",
3088                          server_limit, server_limit * threads_per_child);
3089             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3090                          " To increase, please see the ServerLimit "
3091                          "directive.");
3092         } else {
3093             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
3094                          "MaxRequestWorkers of %d would require %d servers and "
3095                          "exceed ServerLimit of %d, decreasing to %d",
3096                          max_workers, ap_daemons_limit, server_limit,
3097                          server_limit * threads_per_child);
3098         }
3099         ap_daemons_limit = server_limit;
3100     }
3101
3102     /* ap_daemons_to_start > ap_daemons_limit checked in ap_mpm_run() */
3103     if (ap_daemons_to_start < 0) {
3104         if (startup) {
3105             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3106                          "WARNING: StartServers of %d not allowed, "
3107                          "increasing to 1.", ap_daemons_to_start);
3108         } else {
3109             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
3110                          "StartServers of %d not allowed, increasing to 1",
3111                          ap_daemons_to_start);
3112         }
3113         ap_daemons_to_start = 1;
3114     }
3115
3116     if (min_spare_threads < 1) {
3117         if (startup) {
3118             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3119                          "WARNING: MinSpareThreads of %d not allowed, "
3120                          "increasing to 1", min_spare_threads);
3121             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3122                          " to avoid almost certain server failure.");
3123             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3124                          " Please read the documentation.");
3125         } else {
3126             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
3127                          "MinSpareThreads of %d not allowed, increasing to 1",
3128                          min_spare_threads);
3129         }
3130         min_spare_threads = 1;
3131     }
3132
3133     /* max_spare_threads < min_spare_threads + threads_per_child
3134      * checked in ap_mpm_run()
3135      */
3136
3137     return OK;
3138 }
3139
3140 static void event_hooks(apr_pool_t * p)
3141 {
3142     /* Our open_logs hook function must run before the core's, or stderr
3143      * will be redirected to a file, and the messages won't print to the
3144      * console.
3145      */
3146     static const char *const aszSucc[] = { "core.c", NULL };
3147     one_process = 0;
3148
3149     ap_hook_open_logs(event_open_logs, NULL, aszSucc, APR_HOOK_REALLY_FIRST);
3150     /* we need to set the MPM state before other pre-config hooks use MPM query
3151      * to retrieve it, so register as REALLY_FIRST
3152      */
3153     ap_hook_pre_config(event_pre_config, NULL, NULL, APR_HOOK_REALLY_FIRST);
3154     ap_hook_check_config(event_check_config, NULL, NULL, APR_HOOK_MIDDLE);
3155     ap_hook_mpm(event_run, NULL, NULL, APR_HOOK_MIDDLE);
3156     ap_hook_mpm_query(event_query, NULL, NULL, APR_HOOK_MIDDLE);
3157     ap_hook_mpm_register_timed_callback(event_register_timed_callback, NULL, NULL,
3158                                         APR_HOOK_MIDDLE);
3159     ap_hook_mpm_get_name(event_get_name, NULL, NULL, APR_HOOK_MIDDLE);
3160 }
3161
3162 static const char *set_daemons_to_start(cmd_parms *cmd, void *dummy,
3163                                         const char *arg)
3164 {
3165     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3166     if (err != NULL) {
3167         return err;
3168     }
3169
3170     ap_daemons_to_start = atoi(arg);
3171     return NULL;
3172 }
3173
3174 static const char *set_min_spare_threads(cmd_parms * cmd, void *dummy,
3175                                          const char *arg)
3176 {
3177     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3178     if (err != NULL) {
3179         return err;
3180     }
3181
3182     min_spare_threads = atoi(arg);
3183     return NULL;
3184 }
3185
3186 static const char *set_max_spare_threads(cmd_parms * cmd, void *dummy,
3187                                          const char *arg)
3188 {
3189     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3190     if (err != NULL) {
3191         return err;
3192     }
3193
3194     max_spare_threads = atoi(arg);
3195     return NULL;
3196 }
3197
3198 static const char *set_max_workers(cmd_parms * cmd, void *dummy,
3199                                    const char *arg)
3200 {
3201     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3202     if (err != NULL) {
3203         return err;
3204     }
3205     if (!strcasecmp(cmd->cmd->name, "MaxClients")) {
3206         ap_log_error(APLOG_MARK, APLOG_INFO, 0, NULL,
3207                      "MaxClients is deprecated, use MaxRequestWorkers "
3208                      "instead.");
3209     }
3210     max_workers = atoi(arg);
3211     return NULL;
3212 }
3213
3214 static const char *set_threads_per_child(cmd_parms * cmd, void *dummy,
3215                                          const char *arg)
3216 {
3217     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3218     if (err != NULL) {
3219         return err;
3220     }
3221
3222     threads_per_child = atoi(arg);
3223     return NULL;
3224 }
3225 static const char *set_server_limit (cmd_parms *cmd, void *dummy, const char *arg)
3226 {
3227     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3228     if (err != NULL) {
3229         return err;
3230     }
3231
3232     server_limit = atoi(arg);
3233     return NULL;
3234 }
3235
3236 static const char *set_thread_limit(cmd_parms * cmd, void *dummy,
3237                                     const char *arg)
3238 {
3239     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3240     if (err != NULL) {
3241         return err;
3242     }
3243
3244     thread_limit = atoi(arg);
3245     return NULL;
3246 }
3247
3248 static const char *set_worker_factor(cmd_parms * cmd, void *dummy,
3249                                      const char *arg)
3250 {
3251     double val;
3252     char *endptr;
3253     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3254     if (err != NULL) {
3255         return err;
3256     }
3257
3258     val = strtod(arg, &endptr);
3259     if (*endptr)
3260         return "error parsing value";
3261
3262     worker_factor = val * WORKER_FACTOR_SCALE;
3263     if (worker_factor == 0)
3264         worker_factor = 1;
3265     return NULL;
3266 }
3267
3268
3269 static const command_rec event_cmds[] = {
3270     LISTEN_COMMANDS,
3271     AP_INIT_TAKE1("StartServers", set_daemons_to_start, NULL, RSRC_CONF,
3272                   "Number of child processes launched at server startup"),
3273     AP_INIT_TAKE1("ServerLimit", set_server_limit, NULL, RSRC_CONF,
3274                   "Maximum number of child processes for this run of Apache"),
3275     AP_INIT_TAKE1("MinSpareThreads", set_min_spare_threads, NULL, RSRC_CONF,
3276                   "Minimum number of idle threads, to handle request spikes"),
3277     AP_INIT_TAKE1("MaxSpareThreads", set_max_spare_threads, NULL, RSRC_CONF,
3278                   "Maximum number of idle threads"),
3279     AP_INIT_TAKE1("MaxClients", set_max_workers, NULL, RSRC_CONF,
3280                   "Deprecated name of MaxRequestWorkers"),
3281     AP_INIT_TAKE1("MaxRequestWorkers", set_max_workers, NULL, RSRC_CONF,
3282                   "Maximum number of threads alive at the same time"),
3283     AP_INIT_TAKE1("ThreadsPerChild", set_threads_per_child, NULL, RSRC_CONF,
3284                   "Number of threads each child creates"),
3285     AP_INIT_TAKE1("ThreadLimit", set_thread_limit, NULL, RSRC_CONF,
3286                   "Maximum number of worker threads per child process for this "
3287                   "run of Apache - Upper limit for ThreadsPerChild"),
3288     AP_INIT_TAKE1("AsyncRequestWorkerFactor", set_worker_factor, NULL, RSRC_CONF,
3289                   "How many additional connects will be accepted per idle "
3290                   "worker thread"),
3291     AP_GRACEFUL_SHUTDOWN_TIMEOUT_COMMAND,
3292     {NULL}
3293 };
3294
3295 AP_DECLARE_MODULE(mpm_event) = {
3296     MPM20_MODULE_STUFF,
3297     NULL,                       /* hook to run before apache parses args */
3298     NULL,                       /* create per-directory config structure */
3299     NULL,                       /* merge per-directory config structures */
3300     NULL,                       /* create per-server config structure */
3301     NULL,                       /* merge per-server config structures */
3302     event_cmds,                 /* command apr_table_t */
3303     event_hooks                 /* register_hooks */
3304 };