granicus.if.org Git - apache/blob - server/mpm/event/event.c

   1 /* Licensed to the Apache Software Foundation (ASF) under one or more
   2  * contributor license agreements.  See the NOTICE file distributed with
   3  * this work for additional information regarding copyright ownership.
   4  * The ASF licenses this file to You under the Apache License, Version 2.0
   5  * (the "License"); you may not use this file except in compliance with
   6  * the License.  You may obtain a copy of the License at
   7  *
   8  *     http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /**
  18  * This MPM tries to fix the 'keep alive problem' in HTTP.
  19  *
  20  * After a client completes the first request, the client can keep the
  21  * connection open to send more requests with the same socket.  This can save
  22  * signifigant overhead in creating TCP connections.  However, the major
  23  * disadvantage is that Apache traditionally keeps an entire child
  24  * process/thread waiting for data from the client.  To solve this problem,
  25  * this MPM has a dedicated thread for handling both the Listenting sockets,
  26  * and all sockets that are in a Keep Alive status.
  27  *
  28  * The MPM assumes the underlying apr_pollset implementation is somewhat
  29  * threadsafe.  This currently is only compatible with KQueue and EPoll.  This
  30  * enables the MPM to avoid extra high level locking or having to wake up the
  31  * listener thread when a keep-alive socket needs to be sent to it.
  32  *
  33  * This MPM not preform well on older platforms that do not have very good
  34  * threading, like Linux with a 2.4 kernel, but this does not matter, since we
  35  * require EPoll or KQueue.
  36  *
  37  * For FreeBSD, use 5.3.  It is possible to run this MPM on FreeBSD 5.2.1, if
  38  * you use libkse (see `man libmap.conf`).
  39  *
  40  * For NetBSD, use at least 2.0.
  41  *
  42  * For Linux, you should use a 2.6 kernel, and make sure your glibc has epoll
  43  * support compiled in.
  44  *
  45  */
  46
  47 #include "apr.h"
  48 #include "apr_portable.h"
  49 #include "apr_strings.h"
  50 #include "apr_file_io.h"
  51 #include "apr_thread_proc.h"
  52 #include "apr_signal.h"
  53 #include "apr_thread_mutex.h"
  54 #include "apr_poll.h"
  55 #include "apr_ring.h"
  56 #include "apr_queue.h"
  57 #include "apr_atomic.h"
  58 #define APR_WANT_STRFUNC
  59 #include "apr_want.h"
  60 #include "apr_version.h"
  61
  62 #if APR_HAVE_UNISTD_H
  63 #include <unistd.h>
  64 #endif
  65 #if APR_HAVE_SYS_SOCKET_H
  66 #include <sys/socket.h>
  67 #endif
  68 #if APR_HAVE_SYS_WAIT_H
  69 #include <sys/wait.h>
  70 #endif
  71 #ifdef HAVE_SYS_PROCESSOR_H
  72 #include <sys/processor.h>      /* for bindprocessor() */
  73 #endif
  74
  75 #if !APR_HAS_THREADS
  76 #error The Event MPM requires APR threads, but they are unavailable.
  77 #endif
  78
  79 #include "ap_config.h"
  80 #include "httpd.h"
  81 #include "http_main.h"
  82 #include "http_log.h"
  83 #include "http_config.h"        /* for read_config */
  84 #include "http_core.h"          /* for get_remote_host */
  85 #include "http_connection.h"
  86 #include "ap_mpm.h"
  87 #include "pod.h"
  88 #include "mpm_common.h"
  89 #include "ap_listen.h"
  90 #include "scoreboard.h"
  91 #include "fdqueue.h"
  92 #include "mpm_default.h"
  93 #include "http_vhost.h"
  94 #include "unixd.h"
  95
  96 #include <signal.h>
  97 #include <limits.h>             /* for INT_MAX */
  98
  99
 100 #include "equeue.h"
 101
 102 #if HAVE_SERF
 103 #include "mod_serf.h"
 104 #include "serf.h"
 105 #endif
 106
 107 /* Limit on the total --- clients will be locked out if more servers than
 108  * this are needed.  It is intended solely to keep the server from crashing
 109  * when things get out of hand.
 110  *
 111  * We keep a hard maximum number of servers, for two reasons --- first off,
 112  * in case something goes seriously wrong, we want to stop the fork bomb
 113  * short of actually crashing the machine we're running on by filling some
 114  * kernel table.  Secondly, it keeps the size of the scoreboard file small
 115  * enough that we can read the whole thing without worrying too much about
 116  * the overhead.
 117  */
 118 #ifndef DEFAULT_SERVER_LIMIT
 119 #define DEFAULT_SERVER_LIMIT 16
 120 #endif
 121
 122 /* Admin can't tune ServerLimit beyond MAX_SERVER_LIMIT.  We want
 123  * some sort of compile-time limit to help catch typos.
 124  */
 125 #ifndef MAX_SERVER_LIMIT
 126 #define MAX_SERVER_LIMIT 20000
 127 #endif
 128
 129 /* Limit on the threads per process.  Clients will be locked out if more than
 130  * this are needed.
 131  *
 132  * We keep this for one reason it keeps the size of the scoreboard file small
 133  * enough that we can read the whole thing without worrying too much about
 134  * the overhead.
 135  */
 136 #ifndef DEFAULT_THREAD_LIMIT
 137 #define DEFAULT_THREAD_LIMIT 64
 138 #endif
 139
 140 /* Admin can't tune ThreadLimit beyond MAX_THREAD_LIMIT.  We want
 141  * some sort of compile-time limit to help catch typos.
 142  */
 143 #ifndef MAX_THREAD_LIMIT
 144 #define MAX_THREAD_LIMIT 100000
 145 #endif
 146
 147 #define MPM_CHILD_PID(i) (ap_scoreboard_image->parent[i].pid)
 148
 149 #if !APR_VERSION_AT_LEAST(1,4,0)
 150 #define apr_time_from_msec(x) (x * 1000)
 151 #endif
 152
 153 #ifndef MAX_SECS_TO_LINGER
 154 #define MAX_SECS_TO_LINGER 30
 155 #endif
 156 #define SECONDS_TO_LINGER  2
 157
 158 /*
 159  * Actual definitions of config globals
 160  */
 161
 162 #ifndef DEFAULT_WORKER_FACTOR
 163 #define DEFAULT_WORKER_FACTOR 2
 164 #endif
 165 #define WORKER_FACTOR_SCALE   16  /* scale factor to allow fractional values */
 166 static unsigned int worker_factor = DEFAULT_WORKER_FACTOR * WORKER_FACTOR_SCALE;
 167
 168 static int threads_per_child = 0;   /* Worker threads per child */
 169 static int ap_daemons_to_start = 0;
 170 static int min_spare_threads = 0;
 171 static int max_spare_threads = 0;
 172 static int ap_daemons_limit = 0;
 173 static int max_workers = 0;
 174 static int server_limit = 0;
 175 static int thread_limit = 0;
 176 static int dying = 0;
 177 static int workers_may_exit = 0;
 178 static int start_thread_may_exit = 0;
 179 static int listener_may_exit = 0;
 180 static int requests_this_child;
 181 static int num_listensocks = 0;
 182 static apr_uint32_t connection_count = 0;
 183 static int resource_shortage = 0;
 184 static fd_queue_t *worker_queue;
 185 static fd_queue_info_t *worker_queue_info;
 186 static int mpm_state = AP_MPMQ_STARTING;
 187
 188 typedef enum {
 189     TIMEOUT_WRITE_COMPLETION,
 190     TIMEOUT_KEEPALIVE,
 191     TIMEOUT_LINGER,
 192     TIMEOUT_SHORT_LINGER
 193 } timeout_type_e;
 194
 195 struct event_conn_state_t {
 196     /** APR_RING of expiration timeouts */
 197     APR_RING_ENTRY(event_conn_state_t) timeout_list;
 198     /** the expiration time of the next keepalive timeout */
 199     apr_time_t expiration_time;
 200     /** connection record this struct refers to */
 201     conn_rec *c;
 202     /** memory pool to allocate from */
 203     apr_pool_t *p;
 204     /** bucket allocator */
 205     apr_bucket_alloc_t *bucket_alloc;
 206     /** poll file descriptor information */
 207     apr_pollfd_t pfd;
 208     /** public parts of the connection state */
 209     conn_state_t pub;
 210 };
 211
 212 typedef struct pollset_op_t {
 213     timeout_type_e timeout_type;
 214     event_conn_state_t *cs;
 215     const char *tag;
 216 } pollset_op_t;
 217
 218
 219 APR_RING_HEAD(timeout_head_t, event_conn_state_t);
 220 struct timeout_queue {
 221     struct timeout_head_t head;
 222     int count;
 223     const char *tag;
 224 };
 225 /*
 226  * Several timeout queues that use different timeouts, so that we always can
 227  * simply append to the end.
 228  *   write_completion_q uses TimeOut
 229  *   keepalive_q        uses KeepAliveTimeOut
 230  *   linger_q           uses MAX_SECS_TO_LINGER
 231  *   short_linger_q     uses SECONDS_TO_LINGER
 232  */
 233 static struct timeout_queue write_completion_q, keepalive_q, linger_q,
 234                             short_linger_q;
 235 static apr_pollfd_t *listener_pollfd;
 236
 237 /*
 238  * Macros for accessing struct timeout_queue.
 239  * For TO_QUEUE_APPEND and TO_QUEUE_REMOVE, timeout_mutex must be held.
 240  */
 241 #define TO_QUEUE_APPEND(q, el)                                                  \
 242     do {                                                                        \
 243         APR_RING_INSERT_TAIL(&(q).head, el, event_conn_state_t, timeout_list);  \
 244         (q).count++;                                                            \
 245     } while (0)
 246
 247 #define TO_QUEUE_REMOVE(q, el)             \
 248     do {                                   \
 249         APR_RING_REMOVE(el, timeout_list); \
 250         (q).count--;                       \
 251     } while (0)
 252
 253 #define TO_QUEUE_INIT(q)                                                  \
 254     do {                                                                  \
 255             APR_RING_INIT(&(q).head, event_conn_state_t, timeout_list);   \
 256             (q).tag = #q;                                                 \
 257     } while (0)
 258
 259 #define TO_QUEUE_ELEM_INIT(el) APR_RING_ELEM_INIT(el, timeout_list)
 260
 261 /*
 262  * The pollset for sockets that are in any of the timeout queues. Currently
 263  * we use the timeout_mutex to make sure that connections are added/removed
 264  * atomically to/from both event_pollset and a timeout queue. Otherwise
 265  * some confusion can happen under high load if timeout queues and pollset
 266  * get out of sync.
 267  * XXX: It should be possible to make the lock unnecessary in many or even all
 268  * XXX: cases.
 269  */
 270 static apr_pollset_t *event_pollset;
 271
 272 #if HAVE_SERF
 273 typedef struct {
 274     apr_pollset_t *pollset;
 275     apr_pool_t *pool;
 276 } s_baton_t;
 277
 278 static serf_context_t *g_serf;
 279 #endif
 280
 281 /* The structure used to pass unique initialization info to each thread */
 282 typedef struct
 283 {
 284     int pid;
 285     int tid;
 286     int sd;
 287 } proc_info;
 288
 289 /* Structure used to pass information to the thread responsible for
 290  * creating the rest of the threads.
 291  */
 292 typedef struct
 293 {
 294     apr_thread_t **threads;
 295     apr_thread_t *listener;
 296     int child_num_arg;
 297     apr_threadattr_t *threadattr;
 298 } thread_starter;
 299
 300 typedef enum
 301 {
 302     PT_CSD,
 303     PT_ACCEPT
 304 #if HAVE_SERF
 305     , PT_SERF
 306 #endif
 307 } poll_type_e;
 308
 309 typedef struct
 310 {
 311     poll_type_e type;
 312     void *baton;
 313 } listener_poll_type;
 314
 315 /* data retained by event across load/unload of the module
 316  * allocated on first call to pre-config hook; located on
 317  * subsequent calls to pre-config hook
 318  */
 319 typedef struct event_retained_data {
 320     int first_server_limit;
 321     int first_thread_limit;
 322     int module_loads;
 323     int sick_child_detected;
 324     ap_generation_t my_generation;
 325     int volatile is_graceful; /* set from signal handler */
 326     int maxclients_reported;
 327     /*
 328      * The max child slot ever assigned, preserved across restarts.  Necessary
 329      * to deal with MaxRequestWorkers changes across AP_SIG_GRACEFUL restarts.
 330      * We use this value to optimize routines that have to scan the entire
 331      * scoreboard.
 332      */
 333     int max_daemons_limit;
 334     /*
 335      * idle_spawn_rate is the number of children that will be spawned on the
 336      * next maintenance cycle if there aren't enough idle servers.  It is
 337      * doubled up to MAX_SPAWN_RATE, and reset only when a cycle goes by
 338      * without the need to spawn.
 339      */
 340     int idle_spawn_rate;
 341 #ifndef MAX_SPAWN_RATE
 342 #define MAX_SPAWN_RATE        (32)
 343 #endif
 344     int hold_off_on_exponential_spawning;
 345 } event_retained_data;
 346 static event_retained_data *retained;
 347
 348 #define ID_FROM_CHILD_THREAD(c, t)    ((c * thread_limit) + t)
 349
 350 static ap_event_pod_t *pod;
 351
 352 /* The event MPM respects a couple of runtime flags that can aid
 353  * in debugging. Setting the -DNO_DETACH flag will prevent the root process
 354  * from detaching from its controlling terminal. Additionally, setting
 355  * the -DONE_PROCESS flag (which implies -DNO_DETACH) will get you the
 356  * child_main loop running in the process which originally started up.
 357  * This gives you a pretty nice debugging environment.  (You'll get a SIGHUP
 358  * early in standalone_main; just continue through.  This is the server
 359  * trying to kill off any child processes which it might have lying
 360  * around --- Apache doesn't keep track of their pids, it just sends
 361  * SIGHUP to the process group, ignoring it in the root process.
 362  * Continue through and you'll be fine.).
 363  */
 364
 365 static int one_process = 0;
 366
 367 #ifdef DEBUG_SIGSTOP
 368 int raise_sigstop_flags;
 369 #endif
 370
 371 static apr_pool_t *pconf;       /* Pool for config stuff */
 372 static apr_pool_t *pchild;      /* Pool for httpd child stuff */
 373
 374 static pid_t ap_my_pid;         /* Linux getpid() doesn't work except in main
 375                                    thread. Use this instead */
 376 static pid_t parent_pid;
 377 static apr_os_thread_t *listener_os_thread;
 378
 379 /* The LISTENER_SIGNAL signal will be sent from the main thread to the
 380  * listener thread to wake it up for graceful termination (what a child
 381  * process from an old generation does when the admin does "apachectl
 382  * graceful").  This signal will be blocked in all threads of a child
 383  * process except for the listener thread.
 384  */
 385 #define LISTENER_SIGNAL     SIGHUP
 386
 387 /* An array of socket descriptors in use by each thread used to
 388  * perform a non-graceful (forced) shutdown of the server.
 389  */
 390 static apr_socket_t **worker_sockets;
 391 static ap_equeue_t **worker_equeues;
 392
 393 static void disable_listensocks(int process_slot)
 394 {
 395     int i;
 396     for (i = 0; i < num_listensocks; i++) {
 397         apr_pollset_remove(event_pollset, &listener_pollfd[i]);
 398     }
 399     ap_scoreboard_image->parent[process_slot].not_accepting = 1;
 400 }
 401
 402 static void enable_listensocks(int process_slot)
 403 {
 404     int i;
 405     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
 406                  "Accepting new connections again: "
 407                  "%u active conns, %u idle workers",
 408                  apr_atomic_read32(&connection_count),
 409                  ap_queue_info_get_idlers(worker_queue_info));
 410     for (i = 0; i < num_listensocks; i++)
 411         apr_pollset_add(event_pollset, &listener_pollfd[i]);
 412     /*
 413      * XXX: This is not yet optimal. If many workers suddenly become available,
 414      * XXX: the parent may kill some processes off too soon.
 415      */
 416     ap_scoreboard_image->parent[process_slot].not_accepting = 0;
 417 }
 418
 419 static void close_worker_sockets(void)
 420 {
 421     int i;
 422     for (i = 0; i < threads_per_child; i++) {
 423         if (worker_sockets[i]) {
 424             apr_socket_close(worker_sockets[i]);
 425             worker_sockets[i] = NULL;
 426         }
 427     }
 428 }
 429
 430 static void wakeup_listener(void)
 431 {
 432     listener_may_exit = 1;
 433     if (!listener_os_thread) {
 434         /* XXX there is an obscure path that this doesn't handle perfectly:
 435          *     right after listener thread is created but before
 436          *     listener_os_thread is set, the first worker thread hits an
 437          *     error and starts graceful termination
 438          */
 439         return;
 440     }
 441
 442     /* unblock the listener if it's waiting for a worker */
 443     ap_queue_info_term(worker_queue_info);
 444
 445     /*
 446      * we should just be able to "kill(ap_my_pid, LISTENER_SIGNAL)" on all
 447      * platforms and wake up the listener thread since it is the only thread
 448      * with SIGHUP unblocked, but that doesn't work on Linux
 449      */
 450 #ifdef HAVE_PTHREAD_KILL
 451     pthread_kill(*listener_os_thread, LISTENER_SIGNAL);
 452 #else
 453     kill(ap_my_pid, LISTENER_SIGNAL);
 454 #endif
 455 }
 456
 457 #define ST_INIT              0
 458 #define ST_GRACEFUL          1
 459 #define ST_UNGRACEFUL        2
 460
 461 static int terminate_mode = ST_INIT;
 462
 463 static void signal_threads(int mode)
 464 {
 465     if (terminate_mode == mode) {
 466         return;
 467     }
 468     terminate_mode = mode;
 469     mpm_state = AP_MPMQ_STOPPING;
 470
 471     /* in case we weren't called from the listener thread, wake up the
 472      * listener thread
 473      */
 474     wakeup_listener();
 475
 476     /* for ungraceful termination, let the workers exit now;
 477      * for graceful termination, the listener thread will notify the
 478      * workers to exit once it has stopped accepting new connections
 479      */
 480     if (mode == ST_UNGRACEFUL) {
 481         workers_may_exit = 1;
 482         ap_queue_interrupt_all(worker_queue);
 483         close_worker_sockets(); /* forcefully kill all current connections */
 484     }
 485 }
 486
 487 static int event_query(int query_code, int *result, apr_status_t *rv)
 488 {
 489     *rv = APR_SUCCESS;
 490     switch (query_code) {
 491     case AP_MPMQ_MAX_DAEMON_USED:
 492         *result = retained->max_daemons_limit;
 493         break;
 494     case AP_MPMQ_IS_THREADED:
 495         *result = AP_MPMQ_STATIC;
 496         break;
 497     case AP_MPMQ_IS_FORKED:
 498         *result = AP_MPMQ_DYNAMIC;
 499         break;
 500     case AP_MPMQ_IS_ASYNC:
 501         *result = 1;
 502         break;
 503     case AP_MPMQ_HAS_SERF:
 504         *result = 1;
 505         break;
 506     case AP_MPMQ_HARD_LIMIT_DAEMONS:
 507         *result = server_limit;
 508         break;
 509     case AP_MPMQ_HARD_LIMIT_THREADS:
 510         *result = thread_limit;
 511         break;
 512     case AP_MPMQ_MAX_THREADS:
 513         *result = threads_per_child;
 514         break;
 515     case AP_MPMQ_MIN_SPARE_DAEMONS:
 516         *result = 0;
 517         break;
 518     case AP_MPMQ_MIN_SPARE_THREADS:
 519         *result = min_spare_threads;
 520         break;
 521     case AP_MPMQ_MAX_SPARE_DAEMONS:
 522         *result = 0;
 523         break;
 524     case AP_MPMQ_MAX_SPARE_THREADS:
 525         *result = max_spare_threads;
 526         break;
 527     case AP_MPMQ_MAX_REQUESTS_DAEMON:
 528         *result = ap_max_requests_per_child;
 529         break;
 530     case AP_MPMQ_MAX_DAEMONS:
 531         *result = ap_daemons_limit;
 532         break;
 533     case AP_MPMQ_MPM_STATE:
 534         *result = mpm_state;
 535         break;
 536     case AP_MPMQ_GENERATION:
 537         *result = retained->my_generation;
 538         break;
 539     default:
 540         *rv = APR_ENOTIMPL;
 541         break;
 542     }
 543     return OK;
 544 }
 545
 546 static void event_note_child_killed(int childnum, pid_t pid, ap_generation_t gen)
 547 {
 548     if (childnum != -1) { /* child had a scoreboard slot? */
 549         ap_run_child_status(ap_server_conf,
 550                             ap_scoreboard_image->parent[childnum].pid,
 551                             ap_scoreboard_image->parent[childnum].generation,
 552                             childnum, MPM_CHILD_EXITED);
 553         ap_scoreboard_image->parent[childnum].pid = 0;
 554     }
 555     else {
 556         ap_run_child_status(ap_server_conf, pid, gen, -1, MPM_CHILD_EXITED);
 557     }
 558 }
 559
 560 static void event_note_child_started(int slot, pid_t pid)
 561 {
 562     ap_scoreboard_image->parent[slot].pid = pid;
 563     ap_run_child_status(ap_server_conf,
 564                         ap_scoreboard_image->parent[slot].pid,
 565                         retained->my_generation, slot, MPM_CHILD_STARTED);
 566 }
 567
 568 static void event_note_child_lost_slot(int slot, pid_t newpid)
 569 {
 570     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
 571                  "pid %" APR_PID_T_FMT " taking over scoreboard slot from "
 572                  "%" APR_PID_T_FMT "%s",
 573                  newpid,
 574                  ap_scoreboard_image->parent[slot].pid,
 575                  ap_scoreboard_image->parent[slot].quiescing ?
 576                  " (quiescing)" : "");
 577     ap_run_child_status(ap_server_conf,
 578                         ap_scoreboard_image->parent[slot].pid,
 579                         ap_scoreboard_image->parent[slot].generation,
 580                         slot, MPM_CHILD_LOST_SLOT);
 581     /* Don't forget about this exiting child process, or we
 582      * won't be able to kill it if it doesn't exit by the
 583      * time the server is shut down.
 584      */
 585     ap_register_extra_mpm_process(ap_scoreboard_image->parent[slot].pid,
 586                                   ap_scoreboard_image->parent[slot].generation);
 587 }
 588
 589 static const char *event_get_name(void)
 590 {
 591     return "event";
 592 }
 593
 594 /* a clean exit from a child with proper cleanup */
 595 static void clean_child_exit(int code) __attribute__ ((noreturn));
 596 static void clean_child_exit(int code)
 597 {
 598     mpm_state = AP_MPMQ_STOPPING;
 599     if (pchild) {
 600         apr_pool_destroy(pchild);
 601     }
 602
 603     if (one_process) {
 604         event_note_child_killed(/* slot */ 0, 0, 0);
 605     }
 606
 607     exit(code);
 608 }
 609
 610 static void just_die(int sig)
 611 {
 612     clean_child_exit(0);
 613 }
 614
 615 /*****************************************************************
 616  * Connection structures and accounting...
 617  */
 618
 619 static int child_fatal;
 620
 621 /* volatile because they're updated from a signal handler */
 622 static int volatile shutdown_pending;
 623 static int volatile restart_pending;
 624
 625 static apr_status_t decrement_connection_count(void *dummy) {
 626     apr_atomic_dec32(&connection_count);
 627     return APR_SUCCESS;
 628 }
 629
 630 /*
 631  * ap_start_shutdown() and ap_start_restart(), below, are a first stab at
 632  * functions to initiate shutdown or restart without relying on signals.
 633  * Previously this was initiated in sig_term() and restart() signal handlers,
 634  * but we want to be able to start a shutdown/restart from other sources --
 635  * e.g. on Win32, from the service manager. Now the service manager can
 636  * call ap_start_shutdown() or ap_start_restart() as appropiate.  Note that
 637  * these functions can also be called by the child processes, since global
 638  * variables are no longer used to pass on the required action to the parent.
 639  *
 640  * These should only be called from the parent process itself, since the
 641  * parent process will use the shutdown_pending and restart_pending variables
 642  * to determine whether to shutdown or restart. The child process should
 643  * call signal_parent() directly to tell the parent to die -- this will
 644  * cause neither of those variable to be set, which the parent will
 645  * assume means something serious is wrong (which it will be, for the
 646  * child to force an exit) and so do an exit anyway.
 647  */
 648
 649 static void ap_start_shutdown(int graceful)
 650 {
 651     mpm_state = AP_MPMQ_STOPPING;
 652     if (shutdown_pending == 1) {
 653         /* Um, is this _probably_ not an error, if the user has
 654          * tried to do a shutdown twice quickly, so we won't
 655          * worry about reporting it.
 656          */
 657         return;
 658     }
 659     shutdown_pending = 1;
 660     retained->is_graceful = graceful;
 661 }
 662
 663 /* do a graceful restart if graceful == 1 */
 664 static void ap_start_restart(int graceful)
 665 {
 666     mpm_state = AP_MPMQ_STOPPING;
 667     if (restart_pending == 1) {
 668         /* Probably not an error - don't bother reporting it */
 669         return;
 670     }
 671     restart_pending = 1;
 672     retained->is_graceful = graceful;
 673 }
 674
 675 static void sig_term(int sig)
 676 {
 677     ap_start_shutdown(sig == AP_SIG_GRACEFUL_STOP);
 678 }
 679
 680 static void restart(int sig)
 681 {
 682     ap_start_restart(sig == AP_SIG_GRACEFUL);
 683 }
 684
 685 static void set_signals(void)
 686 {
 687 #ifndef NO_USE_SIGACTION
 688     struct sigaction sa;
 689 #endif
 690
 691     if (!one_process) {
 692         ap_fatal_signal_setup(ap_server_conf, pconf);
 693     }
 694
 695 #ifndef NO_USE_SIGACTION
 696     sigemptyset(&sa.sa_mask);
 697     sa.sa_flags = 0;
 698
 699     sa.sa_handler = sig_term;
 700     if (sigaction(SIGTERM, &sa, NULL) < 0)
 701         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 702                      "sigaction(SIGTERM)");
 703 #ifdef AP_SIG_GRACEFUL_STOP
 704     if (sigaction(AP_SIG_GRACEFUL_STOP, &sa, NULL) < 0)
 705         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 706                      "sigaction(" AP_SIG_GRACEFUL_STOP_STRING ")");
 707 #endif
 708 #ifdef SIGINT
 709     if (sigaction(SIGINT, &sa, NULL) < 0)
 710         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 711                      "sigaction(SIGINT)");
 712 #endif
 713 #ifdef SIGXCPU
 714     sa.sa_handler = SIG_DFL;
 715     if (sigaction(SIGXCPU, &sa, NULL) < 0)
 716         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 717                      "sigaction(SIGXCPU)");
 718 #endif
 719 #ifdef SIGXFSZ
 720     /* For systems following the LFS standard, ignoring SIGXFSZ allows
 721      * a write() beyond the 2GB limit to fail gracefully with E2BIG
 722      * rather than terminate the process. */
 723     sa.sa_handler = SIG_IGN;
 724     if (sigaction(SIGXFSZ, &sa, NULL) < 0)
 725         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 726                      "sigaction(SIGXFSZ)");
 727 #endif
 728 #ifdef SIGPIPE
 729     sa.sa_handler = SIG_IGN;
 730     if (sigaction(SIGPIPE, &sa, NULL) < 0)
 731         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 732                      "sigaction(SIGPIPE)");
 733 #endif
 734
 735     /* we want to ignore HUPs and AP_SIG_GRACEFUL while we're busy
 736      * processing one */
 737     sigaddset(&sa.sa_mask, SIGHUP);
 738     sigaddset(&sa.sa_mask, AP_SIG_GRACEFUL);
 739     sa.sa_handler = restart;
 740     if (sigaction(SIGHUP, &sa, NULL) < 0)
 741         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 742                      "sigaction(SIGHUP)");
 743     if (sigaction(AP_SIG_GRACEFUL, &sa, NULL) < 0)
 744         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 745                      "sigaction(" AP_SIG_GRACEFUL_STRING ")");
 746 #else
 747     if (!one_process) {
 748 #ifdef SIGXCPU
 749         apr_signal(SIGXCPU, SIG_DFL);
 750 #endif /* SIGXCPU */
 751 #ifdef SIGXFSZ
 752         apr_signal(SIGXFSZ, SIG_IGN);
 753 #endif /* SIGXFSZ */
 754     }
 755
 756     apr_signal(SIGTERM, sig_term);
 757 #ifdef SIGHUP
 758     apr_signal(SIGHUP, restart);
 759 #endif /* SIGHUP */
 760 #ifdef AP_SIG_GRACEFUL
 761     apr_signal(AP_SIG_GRACEFUL, restart);
 762 #endif /* AP_SIG_GRACEFUL */
 763 #ifdef AP_SIG_GRACEFUL_STOP
 764      apr_signal(AP_SIG_GRACEFUL_STOP, sig_term);
 765 #endif /* AP_SIG_GRACEFUL_STOP */
 766 #ifdef SIGPIPE
 767     apr_signal(SIGPIPE, SIG_IGN);
 768 #endif /* SIGPIPE */
 769
 770 #endif
 771 }
 772
 773 static void process_pollop(pollset_op_t *op)
 774 {
 775     apr_status_t rv;
 776     event_conn_state_t *cs = op->cs;
 777
 778     switch (op->timeout_type) {
 779     case TIMEOUT_WRITE_COMPLETION:
 780         TO_QUEUE_APPEND(write_completion_q, cs);
 781         break;
 782     case TIMEOUT_KEEPALIVE:
 783         TO_QUEUE_APPEND(keepalive_q, cs);
 784         break;
 785     case TIMEOUT_LINGER:
 786         TO_QUEUE_APPEND(linger_q, cs);
 787         break;
 788     case TIMEOUT_SHORT_LINGER:
 789         TO_QUEUE_APPEND(short_linger_q, cs);
 790         break;
 791     }
 792
 793     rv = apr_pollset_add(event_pollset, &op->cs->pfd);
 794
 795     if (rv != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rv)) {
 796         ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf,
 797                      "%s: apr_pollset_add failure", op->tag);
 798     }
 799 }
 800
 801 /*
 802  * close our side of the connection
 803  * Pre-condition: cs is not in any timeout queue and not in the pollset,
 804  *                timeout_mutex is not locked
 805  * return: 0 if connection is fully closed,
 806  *         1 if connection is lingering
 807  * may be called by listener or by worker thread.
 808  * the eq may be null if called from the listener thread,
 809  * and the pollset operations are done directly by this function.
 810  */
 811 static int start_lingering_close(event_conn_state_t *cs, ap_equeue_t *eq)
 812 {
 813     apr_status_t rv;
 814
 815     cs->c->sbh = NULL;  /* prevent scoreboard updates from the listener
 816                          * worker will loop around soon and set SERVER_READY
 817                          */
 818
 819     if (ap_start_lingering_close(cs->c)) {
 820         apr_pool_clear(cs->p);
 821         ap_push_pool(worker_queue_info, cs->p);
 822         return 0;
 823     }
 824     else {
 825         apr_socket_t *csd = ap_get_conn_socket(cs->c);
 826         pollset_op_t localv;
 827         pollset_op_t *v;
 828
 829         if (eq) {
 830             v = ap_equeue_writer_value(eq);
 831         }
 832         else {
 833             v = &localv;
 834         }
 835
 836         rv = apr_socket_timeout_set(csd, 0);
 837         AP_DEBUG_ASSERT(rv == APR_SUCCESS);
 838         /*
 839          * If some module requested a shortened waiting period, only wait for
 840          * 2s (SECONDS_TO_LINGER). This is useful for mitigating certain
 841          * DoS attacks.
 842          */
 843         if (apr_table_get(cs->c->notes, "short-lingering-close")) {
 844             cs->expiration_time =
 845                 apr_time_now() + apr_time_from_sec(SECONDS_TO_LINGER);
 846             v->timeout_type = TIMEOUT_SHORT_LINGER;
 847             v->tag = "start_lingering_close(short)";
 848             cs->pub.state = CONN_STATE_LINGER_SHORT;
 849         }
 850         else {
 851             cs->expiration_time =
 852                 apr_time_now() + apr_time_from_sec(MAX_SECS_TO_LINGER);
 853             v->timeout_type = TIMEOUT_LINGER;
 854             v->tag = "start_lingering_close(normal)";
 855             cs->pub.state = CONN_STATE_LINGER_NORMAL;
 856         }
 857
 858         cs->pfd.reqevents = APR_POLLIN | APR_POLLHUP | APR_POLLERR;
 859         v->cs = cs;
 860         if (eq != NULL) {
 861             ap_equeue_writer_onward(eq);
 862             apr_pollset_wakeup(event_pollset);
 863         }
 864         else {
 865             process_pollop(v);
 866         }
 867     }
 868     return 1;
 869 }
 870
 871 /*
 872  * forcibly close a lingering connection after the lingering period has
 873  * expired
 874  * Pre-condition: cs is not in any timeout queue and not in the pollset
 875  * return: irrelevant (need same prototype as start_lingering_close)
 876  */
 877 static int stop_lingering_close(event_conn_state_t *cs, ap_equeue_t *eq)
 878 {
 879     apr_status_t rv;
 880     apr_socket_t *csd = ap_get_conn_socket(cs->c);
 881     ap_log_error(APLOG_MARK, APLOG_TRACE4, 0, ap_server_conf,
 882                  "socket reached timeout in lingering-close state");
 883     rv = apr_socket_close(csd);
 884     if (rv != APR_SUCCESS) {
 885         ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, "error closing socket");
 886         AP_DEBUG_ASSERT(0);
 887     }
 888     apr_pool_clear(cs->p);
 889     ap_push_pool(worker_queue_info, cs->p);
 890     return 0;
 891 }
 892
 893 /*
 894  * process one connection in the worker
 895  * return: 1 if the connection has been completed,
 896  *         0 if it is still open and waiting for some event
 897  */
 898 static int process_socket(apr_thread_t *thd, apr_pool_t * p, apr_socket_t * sock,
 899                           event_conn_state_t * cs,
 900                           ap_equeue_t *eq,
 901                           int my_child_num,
 902                           int my_thread_num)
 903 {
 904     conn_rec *c;
 905     long conn_id = ID_FROM_CHILD_THREAD(my_child_num, my_thread_num);
 906     int rc;
 907     ap_sb_handle_t *sbh;
 908
 909     ap_create_sb_handle(&sbh, p, my_child_num, my_thread_num);
 910
 911     if (cs == NULL) {           /* This is a new connection */
 912         listener_poll_type *pt = apr_pcalloc(p, sizeof(*pt));
 913         cs = apr_pcalloc(p, sizeof(event_conn_state_t));
 914         cs->bucket_alloc = apr_bucket_alloc_create(p);
 915         c = ap_run_create_connection(p, ap_server_conf, sock,
 916                                      conn_id, sbh, cs->bucket_alloc);
 917         if (!c) {
 918             apr_bucket_alloc_destroy(cs->bucket_alloc);
 919             apr_pool_clear(p);
 920             ap_push_pool(worker_queue_info, p);
 921             return 1;
 922         }
 923         apr_atomic_inc32(&connection_count);
 924         apr_pool_cleanup_register(c->pool, NULL, decrement_connection_count, apr_pool_cleanup_null);
 925         c->current_thread = thd;
 926         cs->c = c;
 927         c->cs = &(cs->pub);
 928         cs->p = p;
 929         cs->pfd.desc_type = APR_POLL_SOCKET;
 930         cs->pfd.reqevents = APR_POLLIN;
 931         cs->pfd.desc.s = sock;
 932         pt->type = PT_CSD;
 933         pt->baton = cs;
 934         cs->pfd.client_data = pt;
 935
 936         ap_update_vhost_given_ip(c);
 937
 938         rc = ap_run_pre_connection(c, sock);
 939         if (rc != OK && rc != DONE) {
 940             ap_log_cerror(APLOG_MARK, APLOG_DEBUG, 0, c,
 941                           "process_socket: connection aborted");
 942             c->aborted = 1;
 943         }
 944
 945         /**
 946          * XXX If the platform does not have a usable way of bundling
 947          * accept() with a socket readability check, like Win32,
 948          * and there are measurable delays before the
 949          * socket is readable due to the first data packet arriving,
 950          * it might be better to create the cs on the listener thread
 951          * with the state set to CONN_STATE_CHECK_REQUEST_LINE_READABLE
 952          *
 953          * FreeBSD users will want to enable the HTTP accept filter
 954          * module in their kernel for the highest performance
 955          * When the accept filter is active, sockets are kept in the
 956          * kernel until a HTTP request is received.
 957          */
 958         cs->pub.state = CONN_STATE_READ_REQUEST_LINE;
 959
 960     }
 961     else {
 962         c = cs->c;
 963         c->sbh = sbh;
 964         c->current_thread = thd;
 965     }
 966
 967     if (c->clogging_input_filters && !c->aborted) {
 968         /* Since we have an input filter which 'cloggs' the input stream,
 969          * like mod_ssl, lets just do the normal read from input filters,
 970          * like the Worker MPM does.
 971          */
 972         ap_run_process_connection(c);
 973         if (cs->pub.state != CONN_STATE_SUSPENDED) {
 974             cs->pub.state = CONN_STATE_LINGER;
 975         }
 976     }
 977
 978 read_request:
 979     if (cs->pub.state == CONN_STATE_READ_REQUEST_LINE) {
 980         if (!c->aborted) {
 981             ap_run_process_connection(c);
 982
 983             /* state will be updated upon return
 984              * fall thru to either wait for readability/timeout or
 985              * do lingering close
 986              */
 987         }
 988         else {
 989             cs->pub.state = CONN_STATE_LINGER;
 990         }
 991     }
 992
 993     if (cs->pub.state == CONN_STATE_WRITE_COMPLETION) {
 994         ap_filter_t *output_filter = c->output_filters;
 995         apr_status_t rv;
 996         ap_update_child_status_from_conn(sbh, SERVER_BUSY_WRITE, c);
 997         while (output_filter->next != NULL) {
 998             output_filter = output_filter->next;
 999         }
1000         rv = output_filter->frec->filter_func.out_func(output_filter, NULL);
1001         if (rv != APR_SUCCESS) {
1002             ap_log_cerror(APLOG_MARK, APLOG_DEBUG, rv, c,
1003                           "network write failure in core output filter");
1004             cs->pub.state = CONN_STATE_LINGER;
1005         }
1006         else if (c->data_in_output_filters) {
1007             /* Still in WRITE_COMPLETION_STATE:
1008              * Set a write timeout for this connection, and let the
1009              * event thread poll for writeability.
1010              */
1011             pollset_op_t *v = ap_equeue_writer_value(eq);
1012
1013             cs->expiration_time = ap_server_conf->timeout + apr_time_now();
1014             cs->pfd.reqevents = APR_POLLOUT | APR_POLLHUP | APR_POLLERR;
1015
1016             v->cs = cs;
1017             v->timeout_type = TIMEOUT_WRITE_COMPLETION;
1018             v->tag = "process_socket(write_completion)";
1019
1020             ap_equeue_writer_onward(eq);
1021             apr_pollset_wakeup(event_pollset);
1022             return 1;
1023         }
1024         else if (c->keepalive != AP_CONN_KEEPALIVE || c->aborted ||
1025             listener_may_exit) {
1026             cs->pub.state = CONN_STATE_LINGER;
1027         }
1028         else if (c->data_in_input_filters) {
1029             cs->pub.state = CONN_STATE_READ_REQUEST_LINE;
1030             goto read_request;
1031         }
1032         else {
1033             cs->pub.state = CONN_STATE_CHECK_REQUEST_LINE_READABLE;
1034         }
1035     }
1036
1037     if (cs->pub.state == CONN_STATE_LINGER) {
1038         if (!start_lingering_close(cs, eq)) {
1039             return 0;
1040         }
1041     }
1042     else if (cs->pub.state == CONN_STATE_CHECK_REQUEST_LINE_READABLE) {
1043         pollset_op_t *v;
1044
1045         /* It greatly simplifies the logic to use a single timeout value here
1046          * because the new element can just be added to the end of the list and
1047          * it will stay sorted in expiration time sequence.  If brand new
1048          * sockets are sent to the event thread for a readability check, this
1049          * will be a slight behavior change - they use the non-keepalive
1050          * timeout today.  With a normal client, the socket will be readable in
1051          * a few milliseconds anyway.
1052          */
1053         cs->expiration_time = ap_server_conf->keep_alive_timeout +
1054                               apr_time_now();
1055
1056         /* Add work to pollset. */
1057         v = ap_equeue_writer_value(eq);
1058         v->timeout_type = TIMEOUT_KEEPALIVE;
1059         v->cs = cs;
1060         cs->pfd.reqevents = APR_POLLIN;
1061         v->tag = "process_socket(keepalive)";
1062         ap_equeue_writer_onward(eq);
1063         apr_pollset_wakeup(event_pollset);
1064     }
1065     return 1;
1066 }
1067
1068 /* requests_this_child has gone to zero or below.  See if the admin coded
1069    "MaxConnectionsPerChild 0", and keep going in that case.  Doing it this way
1070    simplifies the hot path in worker_thread */
1071 static void check_infinite_requests(void)
1072 {
1073     if (ap_max_requests_per_child) {
1074         signal_threads(ST_GRACEFUL);
1075     }
1076     else {
1077         requests_this_child = INT_MAX;  /* keep going */
1078     }
1079 }
1080
1081 static void close_listeners(int process_slot, int *closed) {
1082     if (!*closed) {
1083         int i;
1084         disable_listensocks(process_slot);
1085         ap_close_listeners();
1086         *closed = 1;
1087         dying = 1;
1088         ap_scoreboard_image->parent[process_slot].quiescing = 1;
1089         for (i = 0; i < threads_per_child; ++i) {
1090             ap_update_child_status_from_indexes(process_slot, i,
1091                                                 SERVER_GRACEFUL, NULL);
1092         }
1093         /* wake up the main thread */
1094         kill(ap_my_pid, SIGTERM);
1095     }
1096 }
1097
1098 static void unblock_signal(int sig)
1099 {
1100     sigset_t sig_mask;
1101
1102     sigemptyset(&sig_mask);
1103     sigaddset(&sig_mask, sig);
1104 #if defined(SIGPROCMASK_SETS_THREAD_MASK)
1105     sigprocmask(SIG_UNBLOCK, &sig_mask, NULL);
1106 #else
1107     pthread_sigmask(SIG_UNBLOCK, &sig_mask, NULL);
1108 #endif
1109 }
1110
1111 static void dummy_signal_handler(int sig)
1112 {
1113     /* XXX If specifying SIG_IGN is guaranteed to unblock a syscall,
1114      *     then we don't need this goofy function.
1115      */
1116 }
1117
1118
1119 #if HAVE_SERF
1120 static apr_status_t s_socket_add(void *user_baton,
1121                                  apr_pollfd_t *pfd,
1122                                  void *serf_baton)
1123 {
1124     s_baton_t *s = (s_baton_t*)user_baton;
1125     /* XXXXX: recycle listener_poll_types */
1126     listener_poll_type *pt = ap_malloc(sizeof(*pt));
1127     pt->type = PT_SERF;
1128     pt->baton = serf_baton;
1129     pfd->client_data = pt;
1130     return apr_pollset_add(s->pollset, pfd);
1131 }
1132
1133 static apr_status_t s_socket_remove(void *user_baton,
1134                                     apr_pollfd_t *pfd,
1135                                     void *serf_baton)
1136 {
1137     s_baton_t *s = (s_baton_t*)user_baton;
1138     listener_poll_type *pt = pfd->client_data;
1139     free(pt);
1140     return apr_pollset_remove(s->pollset, pfd);
1141 }
1142 #endif
1143
1144 static apr_status_t init_pollset(apr_pool_t *p)
1145 {
1146 #if HAVE_SERF
1147     s_baton_t *baton = NULL;
1148 #endif
1149     ap_listen_rec *lr;
1150     listener_poll_type *pt;
1151     int i = 0;
1152
1153     TO_QUEUE_INIT(write_completion_q);
1154     TO_QUEUE_INIT(keepalive_q);
1155     TO_QUEUE_INIT(linger_q);
1156     TO_QUEUE_INIT(short_linger_q);
1157
1158     listener_pollfd = apr_palloc(p, sizeof(apr_pollfd_t) * num_listensocks);
1159     for (lr = ap_listeners; lr != NULL; lr = lr->next, i++) {
1160         apr_pollfd_t *pfd;
1161         AP_DEBUG_ASSERT(i < num_listensocks);
1162         pfd = &listener_pollfd[i];
1163         pt = apr_pcalloc(p, sizeof(*pt));
1164         pfd->desc_type = APR_POLL_SOCKET;
1165         pfd->desc.s = lr->sd;
1166         pfd->reqevents = APR_POLLIN;
1167
1168         pt->type = PT_ACCEPT;
1169         pt->baton = lr;
1170
1171         pfd->client_data = pt;
1172
1173         apr_socket_opt_set(pfd->desc.s, APR_SO_NONBLOCK, 1);
1174         apr_pollset_add(event_pollset, pfd);
1175
1176         lr->accept_func = ap_unixd_accept;
1177     }
1178
1179 #if HAVE_SERF
1180     baton = apr_pcalloc(p, sizeof(*baton));
1181     baton->pollset = event_pollset;
1182     /* TODO: subpools, threads, reuse, etc.  -- currently use malloc() inside :( */
1183     baton->pool = p;
1184
1185     g_serf = serf_context_create_ex(baton,
1186                                     s_socket_add,
1187                                     s_socket_remove, p);
1188
1189     ap_register_provider(p, "mpm_serf",
1190                          "instance", "0", g_serf);
1191
1192 #endif
1193
1194     return APR_SUCCESS;
1195 }
1196
1197 static apr_status_t push_timer2worker(timer_event_t* te)
1198 {
1199     return ap_queue_push_timer(worker_queue, te);
1200 }
1201
1202 /*
1203  * Pre-condition: pfd->cs is neither in pollset nor timeout queue
1204  * this function may only be called by the listener
1205  */
1206 static apr_status_t push2worker(const apr_pollfd_t * pfd,
1207                                 apr_pollset_t * pollset)
1208 {
1209     listener_poll_type *pt = (listener_poll_type *) pfd->client_data;
1210     event_conn_state_t *cs = (event_conn_state_t *) pt->baton;
1211     apr_status_t rc;
1212
1213     rc = ap_queue_push(worker_queue, cs->pfd.desc.s, cs, cs->p);
1214     if (rc != APR_SUCCESS) {
1215         /* trash the connection; we couldn't queue the connected
1216          * socket to a worker
1217          */
1218         apr_bucket_alloc_destroy(cs->bucket_alloc);
1219         apr_socket_close(cs->pfd.desc.s);
1220         ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1221                      ap_server_conf, "push2worker: ap_queue_push failed");
1222         apr_pool_clear(cs->p);
1223         ap_push_pool(worker_queue_info, cs->p);
1224     }
1225
1226     return rc;
1227 }
1228
1229 /* get_worker:
1230  *     If *have_idle_worker_p == 0, reserve a worker thread, and set
1231  *     *have_idle_worker_p = 1.
1232  *     If *have_idle_worker_p is already 1, will do nothing.
1233  *     If blocking == 1, block if all workers are currently busy.
1234  *     If no worker was available immediately, will set *all_busy to 1.
1235  *     XXX: If there are no workers, we should not block immediately but
1236  *     XXX: close all keep-alive connections first.
1237  */
1238 static void get_worker(int *have_idle_worker_p, int blocking, int *all_busy)
1239 {
1240     apr_status_t rc;
1241
1242     if (*have_idle_worker_p) {
1243         /* already reserved a worker thread - must have hit a
1244          * transient error on a previous pass
1245          */
1246         return;
1247     }
1248
1249     if (blocking)
1250         rc = ap_queue_info_wait_for_idler(worker_queue_info, all_busy);
1251     else
1252         rc = ap_queue_info_try_get_idler(worker_queue_info);
1253
1254     if (rc == APR_SUCCESS) {
1255         *have_idle_worker_p = 1;
1256     }
1257     else if (!blocking && rc == APR_EAGAIN) {
1258         *all_busy = 1;
1259     }
1260     else if (!APR_STATUS_IS_EOF(rc)) {
1261         ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
1262                      "ap_queue_info_wait_for_idler failed.  "
1263                      "Attempting to shutdown process gracefully");
1264         signal_threads(ST_GRACEFUL);
1265     }
1266 }
1267
1268 /* XXXXXX: Convert to skiplist or other better data structure
1269  * (yes, this is VERY VERY VERY VERY BAD)
1270  */
1271
1272 /* Structures to reuse */
1273 static APR_RING_HEAD(timer_free_ring_t, timer_event_t) timer_free_ring;
1274 /* Active timers */
1275 static APR_RING_HEAD(timer_ring_t, timer_event_t) timer_ring;
1276
1277 static apr_thread_mutex_t *g_timer_ring_mtx;
1278
1279 static apr_status_t event_register_timed_callback(apr_time_t t,
1280                                                   ap_mpm_callback_fn_t *cbfn,
1281                                                   void *baton)
1282 {
1283     int inserted = 0;
1284     timer_event_t *ep;
1285     timer_event_t *te;
1286     /* oh yeah, and make locking smarter/fine grained. */
1287     apr_thread_mutex_lock(g_timer_ring_mtx);
1288
1289     if (!APR_RING_EMPTY(&timer_free_ring, timer_event_t, link)) {
1290         te = APR_RING_FIRST(&timer_free_ring);
1291         APR_RING_REMOVE(te, link);
1292     }
1293     else {
1294         /* XXXXX: lol, pool allocation without a context from any thread.Yeah. Right. MPMs Suck. */
1295         te = ap_malloc(sizeof(timer_event_t));
1296         APR_RING_ELEM_INIT(te, link);
1297     }
1298
1299     te->cbfunc = cbfn;
1300     te->baton = baton;
1301     /* XXXXX: optimize */
1302     te->when = t + apr_time_now();
1303
1304     /* Okay, insert sorted by when.. */
1305     for (ep = APR_RING_FIRST(&timer_ring);
1306          ep != APR_RING_SENTINEL(&timer_ring,
1307                                  timer_event_t, link);
1308          ep = APR_RING_NEXT(ep, link))
1309     {
1310         if (ep->when > te->when) {
1311             inserted = 1;
1312             APR_RING_INSERT_BEFORE(ep, te, link);
1313             break;
1314         }
1315     }
1316
1317     if (!inserted) {
1318         APR_RING_INSERT_TAIL(&timer_ring, te, timer_event_t, link);
1319     }
1320
1321     apr_thread_mutex_unlock(g_timer_ring_mtx);
1322
1323     return APR_SUCCESS;
1324 }
1325
1326 /*
1327  * Close socket and clean up if remote closed its end while we were in
1328  * lingering close.
1329  * Only to be called in the listener thread;
1330  * Pre-condition: cs is in one of the linger queues and in the pollset
1331  */
1332 static void process_lingering_close(event_conn_state_t *cs, const apr_pollfd_t *pfd)
1333 {
1334     apr_socket_t *csd = ap_get_conn_socket(cs->c);
1335     char dummybuf[2048];
1336     apr_size_t nbytes;
1337     apr_status_t rv;
1338     struct timeout_queue *q;
1339     q = (cs->pub.state == CONN_STATE_LINGER_SHORT) ?  &short_linger_q : &linger_q;
1340
1341     /* socket is already in non-blocking state */
1342     do {
1343         nbytes = sizeof(dummybuf);
1344         rv = apr_socket_recv(csd, dummybuf, &nbytes);
1345     } while (rv == APR_SUCCESS);
1346
1347     if (!APR_STATUS_IS_EOF(rv)) {
1348         return;
1349     }
1350
1351     rv = apr_pollset_remove(event_pollset, pfd);
1352     AP_DEBUG_ASSERT(rv == APR_SUCCESS);
1353
1354     rv = apr_socket_close(csd);
1355     AP_DEBUG_ASSERT(rv == APR_SUCCESS);
1356
1357     TO_QUEUE_REMOVE(*q, cs);
1358     TO_QUEUE_ELEM_INIT(cs);
1359
1360     apr_pool_clear(cs->p);
1361     ap_push_pool(worker_queue_info, cs->p);
1362 }
1363
1364 /* call 'func' for all elements of 'q' with timeout less than 'timeout_time'.
1365  * Pre-condition: timeout_mutex must already be locked
1366  * Post-condition: timeout_mutex will be locked again
1367  */
1368 static void process_timeout_queue(struct timeout_queue *q,
1369                                   apr_time_t timeout_time,
1370                                   int (*func)(event_conn_state_t *, ap_equeue_t *eq))
1371 {
1372     int count = 0;
1373     event_conn_state_t *first, *cs, *last;
1374     apr_status_t rv;
1375     if (!q->count) {
1376         return;
1377     }
1378     AP_DEBUG_ASSERT(!APR_RING_EMPTY(&q->head, event_conn_state_t, timeout_list));
1379
1380     cs = first = APR_RING_FIRST(&q->head);
1381     while (cs != APR_RING_SENTINEL(&q->head, event_conn_state_t, timeout_list)
1382            && cs->expiration_time < timeout_time) {
1383         last = cs;
1384         rv = apr_pollset_remove(event_pollset, &cs->pfd);
1385         if (rv != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rv)) {
1386             ap_log_cerror(APLOG_MARK, APLOG_ERR, rv, cs->c,
1387                           "apr_pollset_remove failed");
1388         }
1389         cs = APR_RING_NEXT(cs, timeout_list);
1390         count++;
1391     }
1392     if (!count)
1393         return;
1394
1395     APR_RING_UNSPLICE(first, last, timeout_list);
1396     AP_DEBUG_ASSERT(q->count >= count);
1397     q->count -= count;
1398     while (count) {
1399         cs = APR_RING_NEXT(first, timeout_list);
1400         TO_QUEUE_ELEM_INIT(first);
1401         func(first, NULL);
1402         first = cs;
1403         count--;
1404     }
1405 }
1406
1407 static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
1408 {
1409     timer_event_t *ep;
1410     timer_event_t *te;
1411     apr_status_t rc;
1412     proc_info *ti = dummy;
1413     int process_slot = ti->pid;
1414     apr_pool_t *tpool = apr_thread_pool_get(thd);
1415     void *csd = NULL;
1416     apr_pool_t *ptrans;         /* Pool for per-transaction stuff */
1417     ap_listen_rec *lr;
1418     int have_idle_worker = 0;
1419     event_conn_state_t *cs;
1420     const apr_pollfd_t *out_pfd;
1421     apr_int32_t num = 0;
1422     apr_interval_time_t timeout_interval;
1423     apr_time_t timeout_time = 0, now, last_log;
1424     listener_poll_type *pt;
1425     int closed = 0, listeners_disabled = 0;
1426
1427     last_log = apr_time_now();
1428     free(ti);
1429
1430     /* the following times out events that are really close in the future
1431      *   to prevent extra poll calls
1432      *
1433      * current value is .1 second
1434      */
1435 #define TIMEOUT_FUDGE_FACTOR 100000
1436 #define EVENT_FUDGE_FACTOR 10000
1437
1438     rc = init_pollset(tpool);
1439     if (rc != APR_SUCCESS) {
1440         ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
1441                      "failed to initialize pollset, "
1442                      "attempting to shutdown process gracefully");
1443         signal_threads(ST_GRACEFUL);
1444         return NULL;
1445     }
1446
1447     /* Unblock the signal used to wake this thread up, and set a handler for
1448      * it.
1449      */
1450     unblock_signal(LISTENER_SIGNAL);
1451     apr_signal(LISTENER_SIGNAL, dummy_signal_handler);
1452
1453     for (;;) {
1454         int workers_were_busy = 0;
1455         if (listener_may_exit) {
1456             close_listeners(process_slot, &closed);
1457             if (terminate_mode == ST_UNGRACEFUL
1458                 || apr_atomic_read32(&connection_count) == 0)
1459                 break;
1460         }
1461
1462         if (requests_this_child <= 0) {
1463             check_infinite_requests();
1464         }
1465
1466         now = apr_time_now();
1467         if (APLOGtrace6(ap_server_conf)) {
1468             /* trace log status every second */
1469             if (now - last_log > apr_time_from_msec(1000)) {
1470                 last_log = now;
1471                 ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf,
1472                              "connections: %d (write-completion: %d "
1473                              "keep-alive: %d lingering: %d)",
1474                              connection_count, write_completion_q.count,
1475                              keepalive_q.count,
1476                              linger_q.count + short_linger_q.count);
1477             }
1478         }
1479
1480         apr_thread_mutex_lock(g_timer_ring_mtx);
1481         if (!APR_RING_EMPTY(&timer_ring, timer_event_t, link)) {
1482             te = APR_RING_FIRST(&timer_ring);
1483             if (te->when > now) {
1484                 timeout_interval = te->when - now;
1485             }
1486             else {
1487                 timeout_interval = 1;
1488             }
1489         }
1490         else {
1491             timeout_interval = apr_time_from_msec(100);
1492         }
1493         apr_thread_mutex_unlock(g_timer_ring_mtx);
1494
1495 #if HAVE_SERF
1496         rc = serf_context_prerun(g_serf);
1497         if (rc != APR_SUCCESS) {
1498             /* TOOD: what should do here? ugh. */
1499         }
1500 #endif
1501         rc = apr_pollset_poll(event_pollset, timeout_interval, &num, &out_pfd);
1502         if (rc != APR_SUCCESS
1503             && !APR_STATUS_IS_EINTR(rc)
1504             && !APR_STATUS_IS_TIMEUP(rc)) {
1505             ap_log_error(APLOG_MARK, APLOG_CRIT, rc, ap_server_conf,
1506                          "apr_pollset_poll failed.  Attempting to "
1507                          "shutdown process gracefully");
1508             signal_threads(ST_GRACEFUL);
1509         }
1510
1511         if (listener_may_exit) {
1512             close_listeners(process_slot, &closed);
1513             if (terminate_mode == ST_UNGRACEFUL
1514                 || apr_atomic_read32(&connection_count) == 0)
1515                 break;
1516         }
1517
1518         now = apr_time_now();
1519         apr_thread_mutex_lock(g_timer_ring_mtx);
1520         for (ep = APR_RING_FIRST(&timer_ring);
1521              ep != APR_RING_SENTINEL(&timer_ring,
1522                                      timer_event_t, link);
1523              ep = APR_RING_FIRST(&timer_ring))
1524         {
1525             if (ep->when < now + EVENT_FUDGE_FACTOR) {
1526                 APR_RING_REMOVE(ep, link);
1527                 push_timer2worker(ep);
1528             }
1529             else {
1530                 break;
1531             }
1532         }
1533         apr_thread_mutex_unlock(g_timer_ring_mtx);
1534
1535         while (num) {
1536             pt = (listener_poll_type *) out_pfd->client_data;
1537             if (pt->type == PT_CSD) {
1538                 /* one of the sockets is readable */
1539                 struct timeout_queue *remove_from_q = &write_completion_q;
1540                 int blocking = 1;
1541                 cs = (event_conn_state_t *)pt->baton;
1542                 switch (cs->pub.state) {
1543                 case CONN_STATE_CHECK_REQUEST_LINE_READABLE:
1544                     cs->pub.state = CONN_STATE_READ_REQUEST_LINE;
1545                     remove_from_q = &keepalive_q;
1546                     /* don't wait for a worker for a keepalive request */
1547                     blocking = 0;
1548                     /* FALL THROUGH */
1549                 case CONN_STATE_WRITE_COMPLETION:
1550                     get_worker(&have_idle_worker, blocking,
1551                                &workers_were_busy);
1552                     TO_QUEUE_REMOVE(*remove_from_q, cs);
1553                     rc = apr_pollset_remove(event_pollset, &cs->pfd);
1554
1555                     /*
1556                      * Some of the pollset backends, like KQueue or Epoll
1557                      * automagically remove the FD if the socket is closed,
1558                      * therefore, we can accept _SUCCESS or _NOTFOUND,
1559                      * and we still want to keep going
1560                      */
1561                     if (rc != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rc)) {
1562                         ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
1563                                      "pollset remove failed");
1564                         start_lingering_close(cs, NULL);
1565                         break;
1566                     }
1567
1568                     TO_QUEUE_ELEM_INIT(cs);
1569                     /* If we didn't get a worker immediately for a keep-alive
1570                      * request, we close the connection, so that the client can
1571                      * re-connect to a different process.
1572                      */
1573                     if (!have_idle_worker) {
1574                         start_lingering_close(cs, NULL);
1575                         break;
1576                     }
1577                     rc = push2worker(out_pfd, event_pollset);
1578                     if (rc != APR_SUCCESS) {
1579                         ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1580                                      ap_server_conf, "push2worker failed");
1581                     }
1582                     else {
1583                         have_idle_worker = 0;
1584                     }
1585                     break;
1586                 case CONN_STATE_LINGER_NORMAL:
1587                 case CONN_STATE_LINGER_SHORT:
1588                     process_lingering_close(cs, out_pfd);
1589                     break;
1590                 default:
1591                     ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1592                                  ap_server_conf,
1593                                  "event_loop: unexpected state %d",
1594                                  cs->pub.state);
1595                     ap_assert(0);
1596                 }
1597             }
1598             else if (pt->type == PT_ACCEPT) {
1599                 int skip_accept = 0;
1600                 int connection_count_local = connection_count;
1601
1602                 /* A Listener Socket is ready for an accept() */
1603                 if (workers_were_busy) {
1604                     skip_accept = 1;
1605                     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
1606                                  "All workers busy, not accepting new conns"
1607                                  "in this process");
1608                 }
1609                 else if (listeners_disabled) {
1610                     listeners_disabled = 0;
1611                     enable_listensocks(process_slot);
1612                 }
1613                 else if (connection_count_local > threads_per_child
1614                          + ap_queue_info_get_idlers(worker_queue_info) *
1615                            worker_factor / WORKER_FACTOR_SCALE)
1616                 {
1617                     skip_accept = 1;
1618                     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
1619                                  "Too many open connections (%u), "
1620                                  "not accepting new conns in this process",
1621                                  connection_count_local);
1622                     ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
1623                                  "Idle workers: %u",
1624                                  ap_queue_info_get_idlers(worker_queue_info));
1625                 }
1626
1627                 if (skip_accept == 0) {
1628                     lr = (ap_listen_rec *) pt->baton;
1629                     ap_pop_pool(&ptrans, worker_queue_info);
1630
1631                     if (ptrans == NULL) {
1632                         /* create a new transaction pool for each accepted socket */
1633                         apr_allocator_t *allocator;
1634
1635                         apr_allocator_create(&allocator);
1636                         apr_allocator_max_free_set(allocator,
1637                                                    ap_max_mem_free);
1638                         apr_pool_create_ex(&ptrans, pconf, NULL, allocator);
1639                         apr_allocator_owner_set(allocator, ptrans);
1640                         if (ptrans == NULL) {
1641                             ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1642                                          ap_server_conf,
1643                                          "Failed to create transaction pool");
1644                             signal_threads(ST_GRACEFUL);
1645                             return NULL;
1646                         }
1647                     }
1648                     apr_pool_tag(ptrans, "transaction");
1649
1650                     get_worker(&have_idle_worker, 1, &workers_were_busy);
1651                     rc = lr->accept_func(&csd, lr, ptrans);
1652
1653                     /* later we trash rv and rely on csd to indicate
1654                      * success/failure
1655                      */
1656                     AP_DEBUG_ASSERT(rc == APR_SUCCESS || !csd);
1657
1658                     if (rc == APR_EGENERAL) {
1659                         /* E[NM]FILE, ENOMEM, etc */
1660                         resource_shortage = 1;
1661                         signal_threads(ST_GRACEFUL);
1662                     }
1663
1664                     if (csd != NULL) {
1665                         rc = ap_queue_push(worker_queue, csd, NULL, ptrans);
1666                         if (rc != APR_SUCCESS) {
1667                             /* trash the connection; we couldn't queue the connected
1668                              * socket to a worker
1669                              */
1670                             apr_socket_close(csd);
1671                             ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1672                                          ap_server_conf,
1673                                          "ap_queue_push failed");
1674                             apr_pool_clear(ptrans);
1675                             ap_push_pool(worker_queue_info, ptrans);
1676                         }
1677                         else {
1678                             have_idle_worker = 0;
1679                         }
1680                     }
1681                     else {
1682                         apr_pool_clear(ptrans);
1683                         ap_push_pool(worker_queue_info, ptrans);
1684                     }
1685                 }
1686             }               /* if:else on pt->type */
1687 #if HAVE_SERF
1688             else if (pt->type == PT_SERF) {
1689                 /* send socket to serf. */
1690                 /* XXXX: this doesn't require get_worker() */
1691                 serf_event_trigger(g_serf, pt->baton, out_pfd);
1692             }
1693 #endif
1694             out_pfd++;
1695             num--;
1696         }                   /* while for processing poll */
1697
1698         {
1699             /* TODO: break out to separate function */
1700             int i;
1701
1702             for (i = 0; i < threads_per_child; i++) {
1703                 ap_equeue_t *eq = worker_equeues[i];
1704                 pollset_op_t *op = NULL;
1705
1706                 while ((op = ap_equeue_reader_next(eq)) != NULL) {
1707                     process_pollop(op);
1708                 }
1709             }
1710         }
1711
1712         /* XXX possible optimization: stash the current time for use as
1713          * r->request_time for new requests
1714          */
1715         now = apr_time_now();
1716         /* we only do this once per 0.1s (TIMEOUT_FUDGE_FACTOR) */
1717         if (now > timeout_time) {
1718             struct process_score *ps;
1719             timeout_time = now + TIMEOUT_FUDGE_FACTOR;
1720
1721             /* handle timed out sockets */
1722
1723             /* Step 1: keepalive timeouts */
1724             /* If all workers are busy, we kill older keep-alive connections so that they
1725              * may connect to another process.
1726              */
1727             if (workers_were_busy && keepalive_q.count) {
1728                 ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
1729                              "All workers are busy, will close %d keep-alive "
1730                              "connections",
1731                              keepalive_q.count);
1732                 process_timeout_queue(&keepalive_q,
1733                                       timeout_time + ap_server_conf->keep_alive_timeout,
1734                                       start_lingering_close);
1735             }
1736             else {
1737                 process_timeout_queue(&keepalive_q, timeout_time,
1738                                       start_lingering_close);
1739             }
1740             /* Step 2: write completion timeouts */
1741             process_timeout_queue(&write_completion_q, timeout_time, start_lingering_close);
1742             /* Step 3: (normal) lingering close completion timeouts */
1743             process_timeout_queue(&linger_q, timeout_time, stop_lingering_close);
1744             /* Step 4: (short) lingering close completion timeouts */
1745             process_timeout_queue(&short_linger_q, timeout_time, stop_lingering_close);
1746
1747             ps = ap_get_scoreboard_process(process_slot);
1748             ps->write_completion = write_completion_q.count;
1749             ps->lingering_close = linger_q.count + short_linger_q.count;
1750             ps->keep_alive = keepalive_q.count;
1751
1752             ps->connections = apr_atomic_read32(&connection_count);
1753             /* XXX: should count CONN_STATE_SUSPENDED and set ps->suspended */
1754         }
1755         if (listeners_disabled && !workers_were_busy &&
1756             (int)apr_atomic_read32(&connection_count) <
1757             ((int)ap_queue_info_get_idlers(worker_queue_info) - 1) *
1758             worker_factor / WORKER_FACTOR_SCALE + threads_per_child)
1759         {
1760             listeners_disabled = 0;
1761             enable_listensocks(process_slot);
1762         }
1763         /*
1764          * XXX: do we need to set some timeout that re-enables the listensocks
1765          * XXX: in case no other event occurs?
1766          */
1767     }     /* listener main loop */
1768
1769     close_listeners(process_slot, &closed);
1770     ap_queue_term(worker_queue);
1771
1772     apr_thread_exit(thd, APR_SUCCESS);
1773     return NULL;
1774 }
1775
1776 /* XXX For ungraceful termination/restart, we definitely don't want to
1777  *     wait for active connections to finish but we may want to wait
1778  *     for idle workers to get out of the queue code and release mutexes,
1779  *     since those mutexes are cleaned up pretty soon and some systems
1780  *     may not react favorably (i.e., segfault) if operations are attempted
1781  *     on cleaned-up mutexes.
1782  */
1783 static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy)
1784 {
1785     proc_info *ti = dummy;
1786     int process_slot = ti->pid;
1787     int thread_slot = ti->tid;
1788     apr_socket_t *csd = NULL;
1789     event_conn_state_t *cs;
1790     apr_pool_t *ptrans;         /* Pool for per-transaction stuff */
1791     apr_status_t rv;
1792     int is_idle = 0;
1793     timer_event_t *te = NULL;
1794     ap_equeue_t *eq = worker_equeues[thread_slot];
1795
1796     free(ti);
1797
1798     ap_scoreboard_image->servers[process_slot][thread_slot].pid = ap_my_pid;
1799     ap_scoreboard_image->servers[process_slot][thread_slot].tid = apr_os_thread_current();
1800     ap_scoreboard_image->servers[process_slot][thread_slot].generation = retained->my_generation;
1801     ap_update_child_status_from_indexes(process_slot, thread_slot,
1802                                         SERVER_STARTING, NULL);
1803
1804     while (!workers_may_exit) {
1805         if (!is_idle) {
1806             rv = ap_queue_info_set_idle(worker_queue_info, NULL);
1807             if (rv != APR_SUCCESS) {
1808                 ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf,
1809                              "ap_queue_info_set_idle failed. Attempting to "
1810                              "shutdown process gracefully.");
1811                 signal_threads(ST_GRACEFUL);
1812                 break;
1813             }
1814             is_idle = 1;
1815         }
1816
1817         ap_update_child_status_from_indexes(process_slot, thread_slot,
1818                                             dying ? SERVER_GRACEFUL : SERVER_READY, NULL);
1819       worker_pop:
1820         if (workers_may_exit) {
1821             break;
1822         }
1823
1824         te = NULL;
1825         rv = ap_queue_pop_something(worker_queue, &csd, &cs, &ptrans, &te);
1826
1827         if (rv != APR_SUCCESS) {
1828             /* We get APR_EOF during a graceful shutdown once all the
1829              * connections accepted by this server process have been handled.
1830              */
1831             if (APR_STATUS_IS_EOF(rv)) {
1832                 break;
1833             }
1834             /* We get APR_EINTR whenever ap_queue_pop() has been interrupted
1835              * from an explicit call to ap_queue_interrupt_all(). This allows
1836              * us to unblock threads stuck in ap_queue_pop() when a shutdown
1837              * is pending.
1838              *
1839              * If workers_may_exit is set and this is ungraceful termination/
1840              * restart, we are bound to get an error on some systems (e.g.,
1841              * AIX, which sanity-checks mutex operations) since the queue
1842              * may have already been cleaned up.  Don't log the "error" if
1843              * workers_may_exit is set.
1844              */
1845             else if (APR_STATUS_IS_EINTR(rv)) {
1846                 goto worker_pop;
1847             }
1848             /* We got some other error. */
1849             else if (!workers_may_exit) {
1850                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
1851                              "ap_queue_pop failed");
1852             }
1853             continue;
1854         }
1855         if (te != NULL) {
1856             te->cbfunc(te->baton);
1857
1858             {
1859                 apr_thread_mutex_lock(g_timer_ring_mtx);
1860                 APR_RING_INSERT_TAIL(&timer_free_ring, te, timer_event_t, link);
1861                 apr_thread_mutex_unlock(g_timer_ring_mtx);
1862             }
1863         }
1864         else {
1865             is_idle = 0;
1866             worker_sockets[thread_slot] = csd;
1867             rv = process_socket(thd, ptrans, csd, cs, eq, process_slot, thread_slot);
1868             if (!rv) {
1869                 requests_this_child--;
1870             }
1871             worker_sockets[thread_slot] = NULL;
1872         }
1873     }
1874
1875     ap_update_child_status_from_indexes(process_slot, thread_slot,
1876                                         dying ? SERVER_DEAD :
1877                                         SERVER_GRACEFUL,
1878                                         (request_rec *) NULL);
1879
1880     apr_thread_exit(thd, APR_SUCCESS);
1881     return NULL;
1882 }
1883
1884 static int check_signal(int signum)
1885 {
1886     switch (signum) {
1887     case SIGTERM:
1888     case SIGINT:
1889         return 1;
1890     }
1891     return 0;
1892 }
1893
1894
1895
1896 static void create_listener_thread(thread_starter * ts)
1897 {
1898     int my_child_num = ts->child_num_arg;
1899     apr_threadattr_t *thread_attr = ts->threadattr;
1900     proc_info *my_info;
1901     apr_status_t rv;
1902
1903     my_info = (proc_info *) ap_malloc(sizeof(proc_info));
1904     my_info->pid = my_child_num;
1905     my_info->tid = -1;          /* listener thread doesn't have a thread slot */
1906     my_info->sd = 0;
1907     rv = apr_thread_create(&ts->listener, thread_attr, listener_thread,
1908                            my_info, pchild);
1909     if (rv != APR_SUCCESS) {
1910         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
1911                      "apr_thread_create: unable to create listener thread");
1912         /* let the parent decide how bad this really is */
1913         clean_child_exit(APEXIT_CHILDSICK);
1914     }
1915     apr_os_thread_get(&listener_os_thread, ts->listener);
1916 }
1917
1918 /* XXX under some circumstances not understood, children can get stuck
1919  *     in start_threads forever trying to take over slots which will
1920  *     never be cleaned up; for now there is an APLOG_DEBUG message issued
1921  *     every so often when this condition occurs
1922  */
1923 static void *APR_THREAD_FUNC start_threads(apr_thread_t * thd, void *dummy)
1924 {
1925     thread_starter *ts = dummy;
1926     apr_thread_t **threads = ts->threads;
1927     apr_threadattr_t *thread_attr = ts->threadattr;
1928     int child_num_arg = ts->child_num_arg;
1929     int my_child_num = child_num_arg;
1930     proc_info *my_info;
1931     apr_status_t rv;
1932     int i;
1933     int threads_created = 0;
1934     int listener_started = 0;
1935     int loops;
1936     int prev_threads_created;
1937     int max_recycled_pools = -1;
1938
1939     /* We must create the fd queues before we start up the listener
1940      * and worker threads. */
1941     worker_queue = apr_pcalloc(pchild, sizeof(*worker_queue));
1942     rv = ap_queue_init(worker_queue, threads_per_child, pchild);
1943     if (rv != APR_SUCCESS) {
1944         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
1945                      "ap_queue_init() failed");
1946         clean_child_exit(APEXIT_CHILDFATAL);
1947     }
1948
1949     if (ap_max_mem_free != APR_ALLOCATOR_MAX_FREE_UNLIMITED) {
1950         /* If we want to conserve memory, let's not keep an unlimited number of
1951          * pools & allocators.
1952          * XXX: This should probably be a separate config directive
1953          */
1954         max_recycled_pools = threads_per_child * 3 / 4 ;
1955     }
1956     rv = ap_queue_info_create(&worker_queue_info, pchild,
1957                               threads_per_child, max_recycled_pools);
1958     if (rv != APR_SUCCESS) {
1959         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
1960                      "ap_queue_info_create() failed");
1961         clean_child_exit(APEXIT_CHILDFATAL);
1962     }
1963
1964     /* Create the main pollset */
1965     rv = apr_pollset_create(&event_pollset,
1966                             threads_per_child, /* XXX don't we need more, to handle
1967                                                 * connections in K-A or lingering
1968                                                 * close?
1969                                                 */
1970                             pchild, APR_POLLSET_WAKEABLE|APR_POLLSET_NOCOPY);
1971     if (rv != APR_SUCCESS) {
1972         ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf,
1973                      "apr_pollset_create failed; check system or user limits");
1974         clean_child_exit(APEXIT_CHILDFATAL);
1975     }
1976
1977     worker_sockets = apr_pcalloc(pchild, threads_per_child
1978                                  * sizeof(apr_socket_t *));
1979
1980     worker_equeues = apr_palloc(pchild, threads_per_child * sizeof(ap_equeue_t*));
1981
1982     for (i = 0; i < threads_per_child; i++) {
1983         ap_equeue_t* eq = NULL;
1984         /* TODO: research/test optimal size of queue here */
1985         ap_equeue_create(pchild, 16, sizeof(pollset_op_t), &eq);
1986         /* same as thread ID */
1987         worker_equeues[i] = eq;
1988     }
1989
1990     loops = prev_threads_created = 0;
1991     while (1) {
1992         /* threads_per_child does not include the listener thread */
1993         for (i = 0; i < threads_per_child; i++) {
1994             int status =
1995                 ap_scoreboard_image->servers[child_num_arg][i].status;
1996
1997             if (status != SERVER_GRACEFUL && status != SERVER_DEAD) {
1998                 continue;
1999             }
2000
2001             my_info = (proc_info *) ap_malloc(sizeof(proc_info));
2002             my_info->pid = my_child_num;
2003             my_info->tid = i;
2004             my_info->sd = 0;
2005
2006             /* We are creating threads right now */
2007             ap_update_child_status_from_indexes(my_child_num, i,
2008                                                 SERVER_STARTING, NULL);
2009             /* We let each thread update its own scoreboard entry.  This is
2010              * done because it lets us deal with tid better.
2011              */
2012             rv = apr_thread_create(&threads[i], thread_attr,
2013                                    worker_thread, my_info, pchild);
2014             if (rv != APR_SUCCESS) {
2015                 ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
2016                              "apr_thread_create: unable to create worker thread");
2017                 /* let the parent decide how bad this really is */
2018                 clean_child_exit(APEXIT_CHILDSICK);
2019             }
2020             threads_created++;
2021         }
2022
2023         /* Start the listener only when there are workers available */
2024         if (!listener_started && threads_created) {
2025             create_listener_thread(ts);
2026             listener_started = 1;
2027         }
2028
2029
2030         if (start_thread_may_exit || threads_created == threads_per_child) {
2031             break;
2032         }
2033         /* wait for previous generation to clean up an entry */
2034         apr_sleep(apr_time_from_sec(1));
2035         ++loops;
2036         if (loops % 120 == 0) { /* every couple of minutes */
2037             if (prev_threads_created == threads_created) {
2038                 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
2039                              "child %" APR_PID_T_FMT " isn't taking over "
2040                              "slots very quickly (%d of %d)",
2041                              ap_my_pid, threads_created,
2042                              threads_per_child);
2043             }
2044             prev_threads_created = threads_created;
2045         }
2046     }
2047
2048     /* What state should this child_main process be listed as in the
2049      * scoreboard...?
2050      *  ap_update_child_status_from_indexes(my_child_num, i, SERVER_STARTING,
2051      *                                      (request_rec *) NULL);
2052      *
2053      *  This state should be listed separately in the scoreboard, in some kind
2054      *  of process_status, not mixed in with the worker threads' status.
2055      *  "life_status" is almost right, but it's in the worker's structure, and
2056      *  the name could be clearer.   gla
2057      */
2058     apr_thread_exit(thd, APR_SUCCESS);
2059     return NULL;
2060 }
2061
2062 static void join_workers(apr_thread_t * listener, apr_thread_t ** threads)
2063 {
2064     int i;
2065     apr_status_t rv, thread_rv;
2066
2067     if (listener) {
2068         int iter;
2069
2070         /* deal with a rare timing window which affects waking up the
2071          * listener thread...  if the signal sent to the listener thread
2072          * is delivered between the time it verifies that the
2073          * listener_may_exit flag is clear and the time it enters a
2074          * blocking syscall, the signal didn't do any good...  work around
2075          * that by sleeping briefly and sending it again
2076          */
2077
2078         iter = 0;
2079         while (iter < 10 && !dying) {
2080             /* listener has not stopped accepting yet */
2081             apr_sleep(apr_time_make(0, 500000));
2082             wakeup_listener();
2083             ++iter;
2084         }
2085         if (iter >= 10) {
2086             ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
2087                          "the listener thread didn't stop accepting");
2088         }
2089         else {
2090             rv = apr_thread_join(&thread_rv, listener);
2091             if (rv != APR_SUCCESS) {
2092                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
2093                              "apr_thread_join: unable to join listener thread");
2094             }
2095         }
2096     }
2097
2098     for (i = 0; i < threads_per_child; i++) {
2099         if (threads[i]) {       /* if we ever created this thread */
2100             rv = apr_thread_join(&thread_rv, threads[i]);
2101             if (rv != APR_SUCCESS) {
2102                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
2103                              "apr_thread_join: unable to join worker "
2104                              "thread %d", i);
2105             }
2106         }
2107     }
2108 }
2109
2110 static void join_start_thread(apr_thread_t * start_thread_id)
2111 {
2112     apr_status_t rv, thread_rv;
2113
2114     start_thread_may_exit = 1;  /* tell it to give up in case it is still
2115                                  * trying to take over slots from a
2116                                  * previous generation
2117                                  */
2118     rv = apr_thread_join(&thread_rv, start_thread_id);
2119     if (rv != APR_SUCCESS) {
2120         ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
2121                      "apr_thread_join: unable to join the start " "thread");
2122     }
2123 }
2124
2125 static void child_main(int child_num_arg)
2126 {
2127     apr_thread_t **threads;
2128     apr_status_t rv;
2129     thread_starter *ts;
2130     apr_threadattr_t *thread_attr;
2131     apr_thread_t *start_thread_id;
2132
2133     mpm_state = AP_MPMQ_STARTING;       /* for benefit of any hooks that run as this
2134                                          * child initializes
2135                                          */
2136     ap_my_pid = getpid();
2137     ap_fatal_signal_child_setup(ap_server_conf);
2138     apr_pool_create(&pchild, pconf);
2139
2140     /*stuff to do before we switch id's, so we have permissions. */
2141     ap_reopen_scoreboard(pchild, NULL, 0);
2142
2143     if (ap_run_drop_privileges(pchild, ap_server_conf)) {
2144         clean_child_exit(APEXIT_CHILDFATAL);
2145     }
2146
2147     apr_thread_mutex_create(&g_timer_ring_mtx, APR_THREAD_MUTEX_DEFAULT, pchild);
2148     APR_RING_INIT(&timer_free_ring, timer_event_t, link);
2149     APR_RING_INIT(&timer_ring, timer_event_t, link);
2150     ap_run_child_init(pchild, ap_server_conf);
2151
2152     /* done with init critical section */
2153
2154     /* Just use the standard apr_setup_signal_thread to block all signals
2155      * from being received.  The child processes no longer use signals for
2156      * any communication with the parent process.
2157      */
2158     rv = apr_setup_signal_thread();
2159     if (rv != APR_SUCCESS) {
2160         ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf,
2161                      "Couldn't initialize signal thread");
2162         clean_child_exit(APEXIT_CHILDFATAL);
2163     }
2164
2165     if (ap_max_requests_per_child) {
2166         requests_this_child = ap_max_requests_per_child;
2167     }
2168     else {
2169         /* coding a value of zero means infinity */
2170         requests_this_child = INT_MAX;
2171     }
2172
2173     /* Setup worker threads */
2174
2175     /* clear the storage; we may not create all our threads immediately,
2176      * and we want a 0 entry to indicate a thread which was not created
2177      */
2178     threads = ap_calloc(threads_per_child, sizeof(apr_thread_t *));
2179     ts = apr_palloc(pchild, sizeof(*ts));
2180
2181     apr_threadattr_create(&thread_attr, pchild);
2182     /* 0 means PTHREAD_CREATE_JOINABLE */
2183     apr_threadattr_detach_set(thread_attr, 0);
2184
2185     if (ap_thread_stacksize != 0) {
2186         apr_threadattr_stacksize_set(thread_attr, ap_thread_stacksize);
2187     }
2188
2189     ts->threads = threads;
2190     ts->listener = NULL;
2191     ts->child_num_arg = child_num_arg;
2192     ts->threadattr = thread_attr;
2193
2194     rv = apr_thread_create(&start_thread_id, thread_attr, start_threads,
2195                            ts, pchild);
2196     if (rv != APR_SUCCESS) {
2197         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
2198                      "apr_thread_create: unable to create worker thread");
2199         /* let the parent decide how bad this really is */
2200         clean_child_exit(APEXIT_CHILDSICK);
2201     }
2202
2203     mpm_state = AP_MPMQ_RUNNING;
2204
2205     /* If we are only running in one_process mode, we will want to
2206      * still handle signals. */
2207     if (one_process) {
2208         /* Block until we get a terminating signal. */
2209         apr_signal_thread(check_signal);
2210         /* make sure the start thread has finished; signal_threads()
2211          * and join_workers() depend on that
2212          */
2213         /* XXX join_start_thread() won't be awakened if one of our
2214          *     threads encounters a critical error and attempts to
2215          *     shutdown this child
2216          */
2217         join_start_thread(start_thread_id);
2218
2219         /* helps us terminate a little more quickly than the dispatch of the
2220          * signal thread; beats the Pipe of Death and the browsers
2221          */
2222         signal_threads(ST_UNGRACEFUL);
2223
2224         /* A terminating signal was received. Now join each of the
2225          * workers to clean them up.
2226          *   If the worker already exited, then the join frees
2227          *   their resources and returns.
2228          *   If the worker hasn't exited, then this blocks until
2229          *   they have (then cleans up).
2230          */
2231         join_workers(ts->listener, threads);
2232     }
2233     else {                      /* !one_process */
2234         /* remove SIGTERM from the set of blocked signals...  if one of
2235          * the other threads in the process needs to take us down
2236          * (e.g., for MaxConnectionsPerChild) it will send us SIGTERM
2237          */
2238         unblock_signal(SIGTERM);
2239         apr_signal(SIGTERM, dummy_signal_handler);
2240         /* Watch for any messages from the parent over the POD */
2241         while (1) {
2242             rv = ap_event_pod_check(pod);
2243             if (rv == AP_NORESTART) {
2244                 /* see if termination was triggered while we slept */
2245                 switch (terminate_mode) {
2246                 case ST_GRACEFUL:
2247                     rv = AP_GRACEFUL;
2248                     break;
2249                 case ST_UNGRACEFUL:
2250                     rv = AP_RESTART;
2251                     break;
2252                 }
2253             }
2254             if (rv == AP_GRACEFUL || rv == AP_RESTART) {
2255                 /* make sure the start thread has finished;
2256                  * signal_threads() and join_workers depend on that
2257                  */
2258                 join_start_thread(start_thread_id);
2259                 signal_threads(rv ==
2260                                AP_GRACEFUL ? ST_GRACEFUL : ST_UNGRACEFUL);
2261                 break;
2262             }
2263         }
2264
2265         /* A terminating signal was received. Now join each of the
2266          * workers to clean them up.
2267          *   If the worker already exited, then the join frees
2268          *   their resources and returns.
2269          *   If the worker hasn't exited, then this blocks until
2270          *   they have (then cleans up).
2271          */
2272         join_workers(ts->listener, threads);
2273     }
2274
2275     free(threads);
2276
2277     clean_child_exit(resource_shortage ? APEXIT_CHILDSICK : 0);
2278 }
2279
2280 static int make_child(server_rec * s, int slot)
2281 {
2282     int pid;
2283
2284     if (slot + 1 > retained->max_daemons_limit) {
2285         retained->max_daemons_limit = slot + 1;
2286     }
2287
2288     if (one_process) {
2289         set_signals();
2290         event_note_child_started(slot, getpid());
2291         child_main(slot);
2292         /* NOTREACHED */
2293     }
2294
2295     if ((pid = fork()) == -1) {
2296         ap_log_error(APLOG_MARK, APLOG_ERR, errno, s,
2297                      "fork: Unable to fork new process");
2298
2299         /* fork didn't succeed.  There's no need to touch the scoreboard;
2300          * if we were trying to replace a failed child process, then
2301          * server_main_loop() marked its workers SERVER_DEAD, and if
2302          * we were trying to replace a child process that exited normally,
2303          * its worker_thread()s left SERVER_DEAD or SERVER_GRACEFUL behind.
2304          */
2305
2306         /* In case system resources are maxxed out, we don't want
2307            Apache running away with the CPU trying to fork over and
2308            over and over again. */
2309         apr_sleep(apr_time_from_sec(10));
2310
2311         return -1;
2312     }
2313
2314     if (!pid) {
2315 #ifdef HAVE_BINDPROCESSOR
2316         /* By default, AIX binds to a single processor.  This bit unbinds
2317          * children which will then bind to another CPU.
2318          */
2319         int status = bindprocessor(BINDPROCESS, (int) getpid(),
2320                                    PROCESSOR_CLASS_ANY);
2321         if (status != OK)
2322             ap_log_error(APLOG_MARK, APLOG_DEBUG, errno,
2323                          ap_server_conf,
2324                          "processor unbind failed");
2325 #endif
2326         RAISE_SIGSTOP(MAKE_CHILD);
2327
2328         apr_signal(SIGTERM, just_die);
2329         child_main(slot);
2330         /* NOTREACHED */
2331     }
2332     /* else */
2333     if (ap_scoreboard_image->parent[slot].pid != 0) {
2334         /* This new child process is squatting on the scoreboard
2335          * entry owned by an exiting child process, which cannot
2336          * exit until all active requests complete.
2337          */
2338         event_note_child_lost_slot(slot, pid);
2339     }
2340     ap_scoreboard_image->parent[slot].quiescing = 0;
2341     ap_scoreboard_image->parent[slot].not_accepting = 0;
2342     event_note_child_started(slot, pid);
2343     return 0;
2344 }
2345
2346 /* start up a bunch of children */
2347 static void startup_children(int number_to_start)
2348 {
2349     int i;
2350
2351     for (i = 0; number_to_start && i < ap_daemons_limit; ++i) {
2352         if (ap_scoreboard_image->parent[i].pid != 0) {
2353             continue;
2354         }
2355         if (make_child(ap_server_conf, i) < 0) {
2356             break;
2357         }
2358         --number_to_start;
2359     }
2360 }
2361
2362 static void perform_idle_server_maintenance(void)
2363 {
2364     int i, j;
2365     int idle_thread_count;
2366     worker_score *ws;
2367     process_score *ps;
2368     int free_length;
2369     int totally_free_length = 0;
2370     int free_slots[MAX_SPAWN_RATE];
2371     int last_non_dead;
2372     int total_non_dead;
2373     int active_thread_count = 0;
2374
2375     /* initialize the free_list */
2376     free_length = 0;
2377
2378     idle_thread_count = 0;
2379     last_non_dead = -1;
2380     total_non_dead = 0;
2381
2382     for (i = 0; i < ap_daemons_limit; ++i) {
2383         /* Initialization to satisfy the compiler. It doesn't know
2384          * that threads_per_child is always > 0 */
2385         int status = SERVER_DEAD;
2386         int any_dying_threads = 0;
2387         int any_dead_threads = 0;
2388         int all_dead_threads = 1;
2389
2390         if (i >= retained->max_daemons_limit
2391             && totally_free_length == retained->idle_spawn_rate)
2392             /* short cut if all active processes have been examined and
2393              * enough empty scoreboard slots have been found
2394              */
2395
2396             break;
2397         ps = &ap_scoreboard_image->parent[i];
2398         for (j = 0; j < threads_per_child; j++) {
2399             ws = &ap_scoreboard_image->servers[i][j];
2400             status = ws->status;
2401
2402             /* XXX any_dying_threads is probably no longer needed    GLA */
2403             any_dying_threads = any_dying_threads ||
2404                 (status == SERVER_GRACEFUL);
2405             any_dead_threads = any_dead_threads || (status == SERVER_DEAD);
2406             all_dead_threads = all_dead_threads &&
2407                 (status == SERVER_DEAD || status == SERVER_GRACEFUL);
2408
2409             /* We consider a starting server as idle because we started it
2410              * at least a cycle ago, and if it still hasn't finished starting
2411              * then we're just going to swamp things worse by forking more.
2412              * So we hopefully won't need to fork more if we count it.
2413              * This depends on the ordering of SERVER_READY and SERVER_STARTING.
2414              */
2415             if (ps->pid != 0) { /* XXX just set all_dead_threads in outer
2416                                    for loop if no pid?  not much else matters */
2417                 if (status <= SERVER_READY && !ps->quiescing && !ps->not_accepting
2418                     && ps->generation == retained->my_generation)
2419                 {
2420                     ++idle_thread_count;
2421                 }
2422                 if (status >= SERVER_READY && status < SERVER_GRACEFUL) {
2423                     ++active_thread_count;
2424                 }
2425             }
2426         }
2427         if (any_dead_threads
2428             && totally_free_length < retained->idle_spawn_rate
2429             && free_length < MAX_SPAWN_RATE
2430             && (!ps->pid      /* no process in the slot */
2431                   || ps->quiescing)) {  /* or at least one is going away */
2432             if (all_dead_threads) {
2433                 /* great! we prefer these, because the new process can
2434                  * start more threads sooner.  So prioritize this slot
2435                  * by putting it ahead of any slots with active threads.
2436                  *
2437                  * first, make room by moving a slot that's potentially still
2438                  * in use to the end of the array
2439                  */
2440                 free_slots[free_length] = free_slots[totally_free_length];
2441                 free_slots[totally_free_length++] = i;
2442             }
2443             else {
2444                 /* slot is still in use - back of the bus
2445                  */
2446                 free_slots[free_length] = i;
2447             }
2448             ++free_length;
2449         }
2450         /* XXX if (!ps->quiescing)     is probably more reliable  GLA */
2451         if (!any_dying_threads) {
2452             last_non_dead = i;
2453             ++total_non_dead;
2454         }
2455     }
2456
2457     if (retained->sick_child_detected) {
2458         if (active_thread_count > 0) {
2459             /* some child processes appear to be working.  don't kill the
2460              * whole server.
2461              */
2462             retained->sick_child_detected = 0;
2463         }
2464         else {
2465             /* looks like a basket case.  give up.
2466              */
2467             shutdown_pending = 1;
2468             child_fatal = 1;
2469             ap_log_error(APLOG_MARK, APLOG_ALERT, 0,
2470                          ap_server_conf,
2471                          "No active workers found..."
2472                          " Apache is exiting!");
2473             /* the child already logged the failure details */
2474             return;
2475         }
2476     }
2477
2478     retained->max_daemons_limit = last_non_dead + 1;
2479
2480     if (idle_thread_count > max_spare_threads) {
2481         /* Kill off one child */
2482         ap_event_pod_signal(pod, TRUE);
2483         retained->idle_spawn_rate = 1;
2484     }
2485     else if (idle_thread_count < min_spare_threads) {
2486         /* terminate the free list */
2487         if (free_length == 0) { /* scoreboard is full, can't fork */
2488
2489             if (active_thread_count >= ap_daemons_limit * threads_per_child) {
2490                 if (!retained->maxclients_reported) {
2491                     /* only report this condition once */
2492                     ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf,
2493                                  "server reached MaxRequestWorkers setting, "
2494                                  "consider raising the MaxRequestWorkers "
2495                                  "setting");
2496                     retained->maxclients_reported = 1;
2497                 }
2498             }
2499             else {
2500                 ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf,
2501                              "scoreboard is full, not at MaxRequestWorkers");
2502             }
2503             retained->idle_spawn_rate = 1;
2504         }
2505         else {
2506             if (free_length > retained->idle_spawn_rate) {
2507                 free_length = retained->idle_spawn_rate;
2508             }
2509             if (retained->idle_spawn_rate >= 8) {
2510                 ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf,
2511                              "server seems busy, (you may need "
2512                              "to increase StartServers, ThreadsPerChild "
2513                              "or Min/MaxSpareThreads), "
2514                              "spawning %d children, there are around %d idle "
2515                              "threads, and %d total children", free_length,
2516                              idle_thread_count, total_non_dead);
2517             }
2518             for (i = 0; i < free_length; ++i) {
2519                 make_child(ap_server_conf, free_slots[i]);
2520             }
2521             /* the next time around we want to spawn twice as many if this
2522              * wasn't good enough, but not if we've just done a graceful
2523              */
2524             if (retained->hold_off_on_exponential_spawning) {
2525                 --retained->hold_off_on_exponential_spawning;
2526             }
2527             else if (retained->idle_spawn_rate < MAX_SPAWN_RATE) {
2528                 retained->idle_spawn_rate *= 2;
2529             }
2530         }
2531     }
2532     else {
2533         retained->idle_spawn_rate = 1;
2534     }
2535 }
2536
2537 static void server_main_loop(int remaining_children_to_start)
2538 {
2539     ap_generation_t old_gen;
2540     int child_slot;
2541     apr_exit_why_e exitwhy;
2542     int status, processed_status;
2543     apr_proc_t pid;
2544     int i;
2545
2546     while (!restart_pending && !shutdown_pending) {
2547         ap_wait_or_timeout(&exitwhy, &status, &pid, pconf, ap_server_conf);
2548
2549         if (pid.pid != -1) {
2550             processed_status = ap_process_child_status(&pid, exitwhy, status);
2551             child_slot = ap_find_child_by_pid(&pid);
2552             if (processed_status == APEXIT_CHILDFATAL) {
2553                 /* fix race condition found in PR 39311
2554                  * A child created at the same time as a graceful happens
2555                  * can find the lock missing and create a fatal error.
2556                  * It is not fatal for the last generation to be in this state.
2557                  */
2558                 if (child_slot < 0
2559                     || ap_get_scoreboard_process(child_slot)->generation
2560                        == retained->my_generation) {
2561                     shutdown_pending = 1;
2562                     child_fatal = 1;
2563                     return;
2564                 }
2565                 else {
2566                     ap_log_error(APLOG_MARK, APLOG_WARNING, 0, ap_server_conf,
2567                                  "Ignoring fatal error in child of previous "
2568                                  "generation (pid %ld).",
2569                                  (long)pid.pid);
2570                     retained->sick_child_detected = 1;
2571                 }
2572             }
2573             else if (processed_status == APEXIT_CHILDSICK) {
2574                 /* tell perform_idle_server_maintenance to check into this
2575                  * on the next timer pop
2576                  */
2577                 retained->sick_child_detected = 1;
2578             }
2579             /* non-fatal death... note that it's gone in the scoreboard. */
2580             if (child_slot >= 0) {
2581                 for (i = 0; i < threads_per_child; i++)
2582                     ap_update_child_status_from_indexes(child_slot, i,
2583                                                         SERVER_DEAD,
2584                                                         (request_rec *) NULL);
2585
2586                 event_note_child_killed(child_slot, 0, 0);
2587                 ap_scoreboard_image->parent[child_slot].quiescing = 0;
2588                 if (processed_status == APEXIT_CHILDSICK) {
2589                     /* resource shortage, minimize the fork rate */
2590                     retained->idle_spawn_rate = 1;
2591                 }
2592                 else if (remaining_children_to_start
2593                          && child_slot < ap_daemons_limit) {
2594                     /* we're still doing a 1-for-1 replacement of dead
2595                      * children with new children
2596                      */
2597                     make_child(ap_server_conf, child_slot);
2598                     --remaining_children_to_start;
2599                 }
2600             }
2601             else if (ap_unregister_extra_mpm_process(pid.pid, &old_gen) == 1) {
2602
2603                 event_note_child_killed(-1, /* already out of the scoreboard */
2604                                         pid.pid, old_gen);
2605 #if APR_HAS_OTHER_CHILD
2606             }
2607             else if (apr_proc_other_child_alert(&pid, APR_OC_REASON_DEATH,
2608                                                 status) == 0) {
2609                 /* handled */
2610 #endif
2611             }
2612             else if (retained->is_graceful) {
2613                 /* Great, we've probably just lost a slot in the
2614                  * scoreboard.  Somehow we don't know about this child.
2615                  */
2616                 ap_log_error(APLOG_MARK, APLOG_WARNING, 0,
2617                              ap_server_conf,
2618                              "long lost child came home! (pid %ld)",
2619                              (long) pid.pid);
2620             }
2621             /* Don't perform idle maintenance when a child dies,
2622              * only do it when there's a timeout.  Remember only a
2623              * finite number of children can die, and it's pretty
2624              * pathological for a lot to die suddenly.
2625              */
2626             continue;
2627         }
2628         else if (remaining_children_to_start) {
2629             /* we hit a 1 second timeout in which none of the previous
2630              * generation of children needed to be reaped... so assume
2631              * they're all done, and pick up the slack if any is left.
2632              */
2633             startup_children(remaining_children_to_start);
2634             remaining_children_to_start = 0;
2635             /* In any event we really shouldn't do the code below because
2636              * few of the servers we just started are in the IDLE state
2637              * yet, so we'd mistakenly create an extra server.
2638              */
2639             continue;
2640         }
2641
2642         perform_idle_server_maintenance();
2643     }
2644 }
2645
2646 static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
2647 {
2648     int remaining_children_to_start;
2649
2650     ap_log_pid(pconf, ap_pid_fname);
2651
2652     if (!retained->is_graceful) {
2653         if (ap_run_pre_mpm(s->process->pool, SB_SHARED) != OK) {
2654             mpm_state = AP_MPMQ_STOPPING;
2655             return DONE;
2656         }
2657         /* fix the generation number in the global score; we just got a new,
2658          * cleared scoreboard
2659          */
2660         ap_scoreboard_image->global->running_generation = retained->my_generation;
2661     }
2662
2663     restart_pending = shutdown_pending = 0;
2664     set_signals();
2665     /* Don't thrash... */
2666     if (max_spare_threads < min_spare_threads + threads_per_child)
2667         max_spare_threads = min_spare_threads + threads_per_child;
2668
2669     /* If we're doing a graceful_restart then we're going to see a lot
2670      * of children exiting immediately when we get into the main loop
2671      * below (because we just sent them AP_SIG_GRACEFUL).  This happens pretty
2672      * rapidly... and for each one that exits we may start a new one, until
2673      * there are at least min_spare_threads idle threads, counting across
2674      * all children.  But we may be permitted to start more children than
2675      * that, so we'll just keep track of how many we're
2676      * supposed to start up without the 1 second penalty between each fork.
2677      */
2678     remaining_children_to_start = ap_daemons_to_start;
2679     if (remaining_children_to_start > ap_daemons_limit) {
2680         remaining_children_to_start = ap_daemons_limit;
2681     }
2682     if (!retained->is_graceful) {
2683         startup_children(remaining_children_to_start);
2684         remaining_children_to_start = 0;
2685     }
2686     else {
2687         /* give the system some time to recover before kicking into
2688          * exponential mode */
2689         retained->hold_off_on_exponential_spawning = 10;
2690     }
2691
2692     ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf,
2693                  "%s configured -- resuming normal operations",
2694                  ap_get_server_description());
2695     ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf,
2696                  "Server built: %s", ap_get_server_built());
2697     ap_log_command_line(plog, s);
2698
2699     mpm_state = AP_MPMQ_RUNNING;
2700
2701     server_main_loop(remaining_children_to_start);
2702     mpm_state = AP_MPMQ_STOPPING;
2703
2704     if (shutdown_pending && !retained->is_graceful) {
2705         /* Time to shut down:
2706          * Kill child processes, tell them to call child_exit, etc...
2707          */
2708         ap_event_pod_killpg(pod, ap_daemons_limit, FALSE);
2709         ap_reclaim_child_processes(1, /* Start with SIGTERM */
2710                                    event_note_child_killed);
2711
2712         if (!child_fatal) {
2713             /* cleanup pid file on normal shutdown */
2714             ap_remove_pid(pconf, ap_pid_fname);
2715             ap_log_error(APLOG_MARK, APLOG_NOTICE, 0,
2716                          ap_server_conf, "caught SIGTERM, shutting down");
2717         }
2718         return DONE;
2719     } else if (shutdown_pending) {
2720         /* Time to gracefully shut down:
2721          * Kill child processes, tell them to call child_exit, etc...
2722          */
2723         int active_children;
2724         int index;
2725         apr_time_t cutoff = 0;
2726
2727         /* Close our listeners, and then ask our children to do same */
2728         ap_close_listeners();
2729         ap_event_pod_killpg(pod, ap_daemons_limit, TRUE);
2730         ap_relieve_child_processes(event_note_child_killed);
2731
2732         if (!child_fatal) {
2733             /* cleanup pid file on normal shutdown */
2734             ap_remove_pid(pconf, ap_pid_fname);
2735             ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf,
2736                          "caught " AP_SIG_GRACEFUL_STOP_STRING
2737                          ", shutting down gracefully");
2738         }
2739
2740         if (ap_graceful_shutdown_timeout) {
2741             cutoff = apr_time_now() +
2742                      apr_time_from_sec(ap_graceful_shutdown_timeout);
2743         }
2744
2745         /* Don't really exit until each child has finished */
2746         shutdown_pending = 0;
2747         do {
2748             /* Pause for a second */
2749             apr_sleep(apr_time_from_sec(1));
2750
2751             /* Relieve any children which have now exited */
2752             ap_relieve_child_processes(event_note_child_killed);
2753
2754             active_children = 0;
2755             for (index = 0; index < ap_daemons_limit; ++index) {
2756                 if (ap_mpm_safe_kill(MPM_CHILD_PID(index), 0) == APR_SUCCESS) {
2757                     active_children = 1;
2758                     /* Having just one child is enough to stay around */
2759                     break;
2760                 }
2761             }
2762         } while (!shutdown_pending && active_children &&
2763                  (!ap_graceful_shutdown_timeout || apr_time_now() < cutoff));
2764
2765         /* We might be here because we received SIGTERM, either
2766          * way, try and make sure that all of our processes are
2767          * really dead.
2768          */
2769         ap_event_pod_killpg(pod, ap_daemons_limit, FALSE);
2770         ap_reclaim_child_processes(1, event_note_child_killed);
2771
2772         return DONE;
2773     }
2774
2775     /* we've been told to restart */
2776     apr_signal(SIGHUP, SIG_IGN);
2777
2778     if (one_process) {
2779         /* not worth thinking about */
2780         return DONE;
2781     }
2782
2783     /* advance to the next generation */
2784     /* XXX: we really need to make sure this new generation number isn't in
2785      * use by any of the children.
2786      */
2787     ++retained->my_generation;
2788     ap_scoreboard_image->global->running_generation = retained->my_generation;
2789
2790     if (retained->is_graceful) {
2791         ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf,
2792                      AP_SIG_GRACEFUL_STRING
2793                      " received.  Doing graceful restart");
2794         /* wake up the children...time to die.  But we'll have more soon */
2795         ap_event_pod_killpg(pod, ap_daemons_limit, TRUE);
2796
2797
2798         /* This is mostly for debugging... so that we know what is still
2799          * gracefully dealing with existing request.
2800          */
2801
2802     }
2803     else {
2804         /* Kill 'em all.  Since the child acts the same on the parents SIGTERM
2805          * and a SIGHUP, we may as well use the same signal, because some user
2806          * pthreads are stealing signals from us left and right.
2807          */
2808         ap_event_pod_killpg(pod, ap_daemons_limit, FALSE);
2809
2810         ap_reclaim_child_processes(1,  /* Start with SIGTERM */
2811                                    event_note_child_killed);
2812         ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf,
2813                      "SIGHUP received.  Attempting to restart");
2814     }
2815
2816     return OK;
2817 }
2818
2819 /* This really should be a post_config hook, but the error log is already
2820  * redirected by that point, so we need to do this in the open_logs phase.
2821  */
2822 static int event_open_logs(apr_pool_t * p, apr_pool_t * plog,
2823                            apr_pool_t * ptemp, server_rec * s)
2824 {
2825     int startup = 0;
2826     int level_flags = 0;
2827     apr_status_t rv;
2828
2829     pconf = p;
2830
2831     /* the reverse of pre_config, we want this only the first time around */
2832     if (retained->module_loads == 1) {
2833         startup = 1;
2834         level_flags |= APLOG_STARTUP;
2835     }
2836
2837     if ((num_listensocks = ap_setup_listeners(ap_server_conf)) < 1) {
2838         ap_log_error(APLOG_MARK, APLOG_ALERT | level_flags, 0,
2839                      (startup ? NULL : s),
2840                      "no listening sockets available, shutting down");
2841         return DONE;
2842     }
2843
2844     if (!one_process) {
2845         if ((rv = ap_event_pod_open(pconf, &pod))) {
2846             ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
2847                          (startup ? NULL : s),
2848                          "could not open pipe-of-death");
2849             return DONE;
2850         }
2851     }
2852     return OK;
2853 }
2854
2855 static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
2856                             apr_pool_t * ptemp)
2857 {
2858     int no_detach, debug, foreground;
2859     apr_status_t rv;
2860     const char *userdata_key = "mpm_event_module";
2861
2862     mpm_state = AP_MPMQ_STARTING;
2863
2864     debug = ap_exists_config_define("DEBUG");
2865
2866     if (debug) {
2867         foreground = one_process = 1;
2868         no_detach = 0;
2869     }
2870     else {
2871         one_process = ap_exists_config_define("ONE_PROCESS");
2872         no_detach = ap_exists_config_define("NO_DETACH");
2873         foreground = ap_exists_config_define("FOREGROUND");
2874     }
2875
2876     /* sigh, want this only the second time around */
2877     retained = ap_retained_data_get(userdata_key);
2878     if (!retained) {
2879         retained = ap_retained_data_create(userdata_key, sizeof(*retained));
2880         retained->max_daemons_limit = -1;
2881         retained->idle_spawn_rate = 1;
2882     }
2883     ++retained->module_loads;
2884     if (retained->module_loads == 2) {
2885         rv = apr_pollset_create(&event_pollset, 1, plog,
2886                                 APR_POLLSET_WAKEABLE|APR_POLLSET_NOCOPY);
2887         if (rv != APR_SUCCESS) {
2888             ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL,
2889                          "apr_pollset_create failed; check system or user limits");
2890             return HTTP_INTERNAL_SERVER_ERROR;
2891         }
2892         apr_pollset_destroy(event_pollset);
2893
2894         if (!one_process && !foreground) {
2895             /* before we detach, setup crash handlers to log to errorlog */
2896             ap_fatal_signal_setup(ap_server_conf, pconf);
2897             rv = apr_proc_detach(no_detach ? APR_PROC_DETACH_FOREGROUND
2898                                  : APR_PROC_DETACH_DAEMONIZE);
2899             if (rv != APR_SUCCESS) {
2900                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL,
2901                              "apr_proc_detach failed");
2902                 return HTTP_INTERNAL_SERVER_ERROR;
2903             }
2904         }
2905     }
2906
2907     parent_pid = ap_my_pid = getpid();
2908
2909     ap_listen_pre_config();
2910     ap_daemons_to_start = DEFAULT_START_DAEMON;
2911     min_spare_threads = DEFAULT_MIN_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD;
2912     max_spare_threads = DEFAULT_MAX_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD;
2913     server_limit = DEFAULT_SERVER_LIMIT;
2914     thread_limit = DEFAULT_THREAD_LIMIT;
2915     ap_daemons_limit = server_limit;
2916     threads_per_child = DEFAULT_THREADS_PER_CHILD;
2917     max_workers = ap_daemons_limit * threads_per_child;
2918     ap_extended_status = 0;
2919
2920     return OK;
2921 }
2922
2923 static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
2924                               apr_pool_t *ptemp, server_rec *s)
2925 {
2926     int startup = 0;
2927
2928     /* the reverse of pre_config, we want this only the first time around */
2929     if (retained->module_loads == 1) {
2930         startup = 1;
2931     }
2932
2933     if (server_limit > MAX_SERVER_LIMIT) {
2934         if (startup) {
2935             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2936                          "WARNING: ServerLimit of %d exceeds compile-time "
2937                          "limit of", server_limit);
2938             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2939                          " %d servers, decreasing to %d.",
2940                          MAX_SERVER_LIMIT, MAX_SERVER_LIMIT);
2941         } else {
2942             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2943                          "ServerLimit of %d exceeds compile-time limit "
2944                          "of %d, decreasing to match",
2945                          server_limit, MAX_SERVER_LIMIT);
2946         }
2947         server_limit = MAX_SERVER_LIMIT;
2948     }
2949     else if (server_limit < 1) {
2950         if (startup) {
2951             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2952                          "WARNING: ServerLimit of %d not allowed, "
2953                          "increasing to 1.", server_limit);
2954         } else {
2955             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2956                          "ServerLimit of %d not allowed, increasing to 1",
2957                          server_limit);
2958         }
2959         server_limit = 1;
2960     }
2961
2962     /* you cannot change ServerLimit across a restart; ignore
2963      * any such attempts
2964      */
2965     if (!retained->first_server_limit) {
2966         retained->first_server_limit = server_limit;
2967     }
2968     else if (server_limit != retained->first_server_limit) {
2969         /* don't need a startup console version here */
2970         ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2971                      "changing ServerLimit to %d from original value of %d "
2972                      "not allowed during restart",
2973                      server_limit, retained->first_server_limit);
2974         server_limit = retained->first_server_limit;
2975     }
2976
2977     if (thread_limit > MAX_THREAD_LIMIT) {
2978         if (startup) {
2979             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2980                          "WARNING: ThreadLimit of %d exceeds compile-time "
2981                          "limit of", thread_limit);
2982             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2983                          " %d threads, decreasing to %d.",
2984                          MAX_THREAD_LIMIT, MAX_THREAD_LIMIT);
2985         } else {
2986             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2987                          "ThreadLimit of %d exceeds compile-time limit "
2988                          "of %d, decreasing to match",
2989                          thread_limit, MAX_THREAD_LIMIT);
2990         }
2991         thread_limit = MAX_THREAD_LIMIT;
2992     }
2993     else if (thread_limit < 1) {
2994         if (startup) {
2995             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2996                          "WARNING: ThreadLimit of %d not allowed, "
2997                          "increasing to 1.", thread_limit);
2998         } else {
2999             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
3000                          "ThreadLimit of %d not allowed, increasing to 1",
3001                          thread_limit);
3002         }
3003         thread_limit = 1;
3004     }
3005
3006     /* you cannot change ThreadLimit across a restart; ignore
3007      * any such attempts
3008      */
3009     if (!retained->first_thread_limit) {
3010         retained->first_thread_limit = thread_limit;
3011     }
3012     else if (thread_limit != retained->first_thread_limit) {
3013         /* don't need a startup console version here */
3014         ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
3015                      "changing ThreadLimit to %d from original value of %d "
3016                      "not allowed during restart",
3017                      thread_limit, retained->first_thread_limit);
3018         thread_limit = retained->first_thread_limit;
3019     }
3020
3021     if (threads_per_child > thread_limit) {
3022         if (startup) {
3023             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3024                          "WARNING: ThreadsPerChild of %d exceeds ThreadLimit "
3025                          "of", threads_per_child);
3026             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3027                          " %d threads, decreasing to %d.",
3028                          thread_limit, thread_limit);
3029             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3030                          " To increase, please see the ThreadLimit "
3031                          "directive.");
3032         } else {
3033             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
3034                          "ThreadsPerChild of %d exceeds ThreadLimit "
3035                          "of %d, decreasing to match",
3036                          threads_per_child, thread_limit);
3037         }
3038         threads_per_child = thread_limit;
3039     }
3040     else if (threads_per_child < 1) {
3041         if (startup) {
3042             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3043                          "WARNING: ThreadsPerChild of %d not allowed, "
3044                          "increasing to 1.", threads_per_child);
3045         } else {
3046             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
3047                          "ThreadsPerChild of %d not allowed, increasing to 1",
3048                          threads_per_child);
3049         }
3050         threads_per_child = 1;
3051     }
3052
3053     if (max_workers < threads_per_child) {
3054         if (startup) {
3055             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3056                          "WARNING: MaxRequestWorkers of %d is less than "
3057                          "ThreadsPerChild of", max_workers);
3058             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3059                          " %d, increasing to %d.  MaxRequestWorkers must be at "
3060                          "least as large",
3061                          threads_per_child, threads_per_child);
3062             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3063                          " as the number of threads in a single server.");
3064         } else {
3065             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
3066                          "MaxRequestWorkers of %d is less than ThreadsPerChild "
3067                          "of %d, increasing to match",
3068                          max_workers, threads_per_child);
3069         }
3070         max_workers = threads_per_child;
3071     }
3072
3073     ap_daemons_limit = max_workers / threads_per_child;
3074
3075     if (max_workers % threads_per_child) {
3076         int tmp_max_workers = ap_daemons_limit * threads_per_child;
3077
3078         if (startup) {
3079             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3080                          "WARNING: MaxRequestWorkers of %d is not an integer "
3081                          "multiple of", max_workers);
3082             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3083                          " ThreadsPerChild of %d, decreasing to nearest "
3084                          "multiple %d,", threads_per_child,
3085                          tmp_max_workers);
3086             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3087                          " for a maximum of %d servers.",
3088                          ap_daemons_limit);
3089         } else {
3090             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
3091                          "MaxRequestWorkers of %d is not an integer multiple "
3092                          "of ThreadsPerChild of %d, decreasing to nearest "
3093                          "multiple %d", max_workers, threads_per_child,
3094                          tmp_max_workers);
3095         }
3096         max_workers = tmp_max_workers;
3097     }
3098
3099     if (ap_daemons_limit > server_limit) {
3100         if (startup) {
3101             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3102                          "WARNING: MaxRequestWorkers of %d would require %d "
3103                          "servers and ", max_workers, ap_daemons_limit);
3104             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3105                          " would exceed ServerLimit of %d, decreasing to %d.",
3106                          server_limit, server_limit * threads_per_child);
3107             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3108                          " To increase, please see the ServerLimit "
3109                          "directive.");
3110         } else {
3111             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
3112                          "MaxRequestWorkers of %d would require %d servers and "
3113                          "exceed ServerLimit of %d, decreasing to %d",
3114                          max_workers, ap_daemons_limit, server_limit,
3115                          server_limit * threads_per_child);
3116         }
3117         ap_daemons_limit = server_limit;
3118     }
3119
3120     /* ap_daemons_to_start > ap_daemons_limit checked in ap_mpm_run() */
3121     if (ap_daemons_to_start < 0) {
3122         if (startup) {
3123             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3124                          "WARNING: StartServers of %d not allowed, "
3125                          "increasing to 1.", ap_daemons_to_start);
3126         } else {
3127             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
3128                          "StartServers of %d not allowed, increasing to 1",
3129                          ap_daemons_to_start);
3130         }
3131         ap_daemons_to_start = 1;
3132     }
3133
3134     if (min_spare_threads < 1) {
3135         if (startup) {
3136             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3137                          "WARNING: MinSpareThreads of %d not allowed, "
3138                          "increasing to 1", min_spare_threads);
3139             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3140                          " to avoid almost certain server failure.");
3141             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3142                          " Please read the documentation.");
3143         } else {
3144             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
3145                          "MinSpareThreads of %d not allowed, increasing to 1",
3146                          min_spare_threads);
3147         }
3148         min_spare_threads = 1;
3149     }
3150
3151     /* max_spare_threads < min_spare_threads + threads_per_child
3152      * checked in ap_mpm_run()
3153      */
3154
3155     return OK;
3156 }
3157
3158 static void event_hooks(apr_pool_t * p)
3159 {
3160     /* Our open_logs hook function must run before the core's, or stderr
3161      * will be redirected to a file, and the messages won't print to the
3162      * console.
3163      */
3164     static const char *const aszSucc[] = { "core.c", NULL };
3165     one_process = 0;
3166
3167     ap_hook_open_logs(event_open_logs, NULL, aszSucc, APR_HOOK_REALLY_FIRST);
3168     /* we need to set the MPM state before other pre-config hooks use MPM query
3169      * to retrieve it, so register as REALLY_FIRST
3170      */
3171     ap_hook_pre_config(event_pre_config, NULL, NULL, APR_HOOK_REALLY_FIRST);
3172     ap_hook_check_config(event_check_config, NULL, NULL, APR_HOOK_MIDDLE);
3173     ap_hook_mpm(event_run, NULL, NULL, APR_HOOK_MIDDLE);
3174     ap_hook_mpm_query(event_query, NULL, NULL, APR_HOOK_MIDDLE);
3175     ap_hook_mpm_register_timed_callback(event_register_timed_callback, NULL, NULL,
3176                                         APR_HOOK_MIDDLE);
3177     ap_hook_mpm_get_name(event_get_name, NULL, NULL, APR_HOOK_MIDDLE);
3178 }
3179
3180 static const char *set_daemons_to_start(cmd_parms *cmd, void *dummy,
3181                                         const char *arg)
3182 {
3183     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3184     if (err != NULL) {
3185         return err;
3186     }
3187
3188     ap_daemons_to_start = atoi(arg);
3189     return NULL;
3190 }
3191
3192 static const char *set_min_spare_threads(cmd_parms * cmd, void *dummy,
3193                                          const char *arg)
3194 {
3195     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3196     if (err != NULL) {
3197         return err;
3198     }
3199
3200     min_spare_threads = atoi(arg);
3201     return NULL;
3202 }
3203
3204 static const char *set_max_spare_threads(cmd_parms * cmd, void *dummy,
3205                                          const char *arg)
3206 {
3207     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3208     if (err != NULL) {
3209         return err;
3210     }
3211
3212     max_spare_threads = atoi(arg);
3213     return NULL;
3214 }
3215
3216 static const char *set_max_workers(cmd_parms * cmd, void *dummy,
3217                                    const char *arg)
3218 {
3219     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3220     if (err != NULL) {
3221         return err;
3222     }
3223     if (!strcasecmp(cmd->cmd->name, "MaxClients")) {
3224         ap_log_error(APLOG_MARK, APLOG_INFO, 0, NULL,
3225                      "MaxClients is deprecated, use MaxRequestWorkers "
3226                      "instead.");
3227     }
3228     max_workers = atoi(arg);
3229     return NULL;
3230 }
3231
3232 static const char *set_threads_per_child(cmd_parms * cmd, void *dummy,
3233                                          const char *arg)
3234 {
3235     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3236     if (err != NULL) {
3237         return err;
3238     }
3239
3240     threads_per_child = atoi(arg);
3241     return NULL;
3242 }
3243 static const char *set_server_limit (cmd_parms *cmd, void *dummy, const char *arg)
3244 {
3245     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3246     if (err != NULL) {
3247         return err;
3248     }
3249
3250     server_limit = atoi(arg);
3251     return NULL;
3252 }
3253
3254 static const char *set_thread_limit(cmd_parms * cmd, void *dummy,
3255                                     const char *arg)
3256 {
3257     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3258     if (err != NULL) {
3259         return err;
3260     }
3261
3262     thread_limit = atoi(arg);
3263     return NULL;
3264 }
3265
3266 static const char *set_worker_factor(cmd_parms * cmd, void *dummy,
3267                                      const char *arg)
3268 {
3269     double val;
3270     char *endptr;
3271     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3272     if (err != NULL) {
3273         return err;
3274     }
3275
3276     val = strtod(arg, &endptr);
3277     if (*endptr)
3278         return "error parsing value";
3279
3280     worker_factor = val * WORKER_FACTOR_SCALE;
3281     if (worker_factor == 0)
3282         worker_factor = 1;
3283     return NULL;
3284 }
3285
3286
3287 static const command_rec event_cmds[] = {
3288     LISTEN_COMMANDS,
3289     AP_INIT_TAKE1("StartServers", set_daemons_to_start, NULL, RSRC_CONF,
3290                   "Number of child processes launched at server startup"),
3291     AP_INIT_TAKE1("ServerLimit", set_server_limit, NULL, RSRC_CONF,
3292                   "Maximum number of child processes for this run of Apache"),
3293     AP_INIT_TAKE1("MinSpareThreads", set_min_spare_threads, NULL, RSRC_CONF,
3294                   "Minimum number of idle threads, to handle request spikes"),
3295     AP_INIT_TAKE1("MaxSpareThreads", set_max_spare_threads, NULL, RSRC_CONF,
3296                   "Maximum number of idle threads"),
3297     AP_INIT_TAKE1("MaxClients", set_max_workers, NULL, RSRC_CONF,
3298                   "Deprecated name of MaxRequestWorkers"),
3299     AP_INIT_TAKE1("MaxRequestWorkers", set_max_workers, NULL, RSRC_CONF,
3300                   "Maximum number of threads alive at the same time"),
3301     AP_INIT_TAKE1("ThreadsPerChild", set_threads_per_child, NULL, RSRC_CONF,
3302                   "Number of threads each child creates"),
3303     AP_INIT_TAKE1("ThreadLimit", set_thread_limit, NULL, RSRC_CONF,
3304                   "Maximum number of worker threads per child process for this "
3305                   "run of Apache - Upper limit for ThreadsPerChild"),
3306     AP_INIT_TAKE1("AsyncRequestWorkerFactor", set_worker_factor, NULL, RSRC_CONF,
3307                   "How many additional connects will be accepted per idle "
3308                   "worker thread"),
3309     AP_GRACEFUL_SHUTDOWN_TIMEOUT_COMMAND,
3310     {NULL}
3311 };
3312
3313 AP_DECLARE_MODULE(mpm_event) = {
3314     MPM20_MODULE_STUFF,
3315     NULL,                       /* hook to run before apache parses args */
3316     NULL,                       /* create per-directory config structure */
3317     NULL,                       /* merge per-directory config structures */
3318     NULL,                       /* create per-server config structure */
3319     NULL,                       /* merge per-server config structures */
3320     event_cmds,                 /* command apr_table_t */
3321     event_hooks                 /* register_hooks */
3322 };