granicus.if.org Git - apache/blob - server/mpm/event/event.c

   1 /* Licensed to the Apache Software Foundation (ASF) under one or more
   2  * contributor license agreements.  See the NOTICE file distributed with
   3  * this work for additional information regarding copyright ownership.
   4  * The ASF licenses this file to You under the Apache License, Version 2.0
   5  * (the "License"); you may not use this file except in compliance with
   6  * the License.  You may obtain a copy of the License at
   7  *
   8  *     http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /**
  18  * This MPM tries to fix the 'keep alive problem' in HTTP.
  19  *
  20  * After a client completes the first request, the client can keep the
  21  * connection open to send more requests with the same socket.  This can save
  22  * signifigant overhead in creating TCP connections.  However, the major
  23  * disadvantage is that Apache traditionally keeps an entire child
  24  * process/thread waiting for data from the client.  To solve this problem,
  25  * this MPM has a dedicated thread for handling both the Listenting sockets,
  26  * and all sockets that are in a Keep Alive status.
  27  *
  28  * The MPM assumes the underlying apr_pollset implementation is somewhat
  29  * threadsafe.  This currently is only compatible with KQueue and EPoll.  This
  30  * enables the MPM to avoid extra high level locking or having to wake up the
  31  * listener thread when a keep-alive socket needs to be sent to it.
  32  *
  33  * This MPM not preform well on older platforms that do not have very good
  34  * threading, like Linux with a 2.4 kernel, but this does not matter, since we
  35  * require EPoll or KQueue.
  36  *
  37  * For FreeBSD, use 5.3.  It is possible to run this MPM on FreeBSD 5.2.1, if
  38  * you use libkse (see `man libmap.conf`).
  39  *
  40  * For NetBSD, use at least 2.0.
  41  *
  42  * For Linux, you should use a 2.6 kernel, and make sure your glibc has epoll
  43  * support compiled in.
  44  *
  45  */
  46
  47 #include "apr.h"
  48 #include "apr_portable.h"
  49 #include "apr_strings.h"
  50 #include "apr_file_io.h"
  51 #include "apr_thread_proc.h"
  52 #include "apr_signal.h"
  53 #include "apr_thread_mutex.h"
  54 #include "apr_poll.h"
  55 #include "apr_ring.h"
  56 #include "apr_queue.h"
  57 #include "apr_atomic.h"
  58 #define APR_WANT_STRFUNC
  59 #include "apr_want.h"
  60 #include "apr_version.h"
  61
  62 #if APR_HAVE_UNISTD_H
  63 #include <unistd.h>
  64 #endif
  65 #if APR_HAVE_SYS_SOCKET_H
  66 #include <sys/socket.h>
  67 #endif
  68 #if APR_HAVE_SYS_WAIT_H
  69 #include <sys/wait.h>
  70 #endif
  71 #ifdef HAVE_SYS_PROCESSOR_H
  72 #include <sys/processor.h>      /* for bindprocessor() */
  73 #endif
  74
  75 #if !APR_HAS_THREADS
  76 #error The Event MPM requires APR threads, but they are unavailable.
  77 #endif
  78
  79 #include "ap_config.h"
  80 #include "httpd.h"
  81 #include "http_main.h"
  82 #include "http_log.h"
  83 #include "http_config.h"        /* for read_config */
  84 #include "http_core.h"          /* for get_remote_host */
  85 #include "http_connection.h"
  86 #include "ap_mpm.h"
  87 #include "pod.h"
  88 #include "mpm_common.h"
  89 #include "ap_listen.h"
  90 #include "scoreboard.h"
  91 #include "fdqueue.h"
  92 #include "mpm_default.h"
  93 #include "http_vhost.h"
  94 #include "unixd.h"
  95
  96 #include <signal.h>
  97 #include <limits.h>             /* for INT_MAX */
  98
  99
 100 #if HAVE_SERF
 101 #include "mod_serf.h"
 102 #include "serf.h"
 103 #endif
 104
 105 /* Limit on the total --- clients will be locked out if more servers than
 106  * this are needed.  It is intended solely to keep the server from crashing
 107  * when things get out of hand.
 108  *
 109  * We keep a hard maximum number of servers, for two reasons --- first off,
 110  * in case something goes seriously wrong, we want to stop the fork bomb
 111  * short of actually crashing the machine we're running on by filling some
 112  * kernel table.  Secondly, it keeps the size of the scoreboard file small
 113  * enough that we can read the whole thing without worrying too much about
 114  * the overhead.
 115  */
 116 #ifndef DEFAULT_SERVER_LIMIT
 117 #define DEFAULT_SERVER_LIMIT 16
 118 #endif
 119
 120 /* Admin can't tune ServerLimit beyond MAX_SERVER_LIMIT.  We want
 121  * some sort of compile-time limit to help catch typos.
 122  */
 123 #ifndef MAX_SERVER_LIMIT
 124 #define MAX_SERVER_LIMIT 20000
 125 #endif
 126
 127 /* Limit on the threads per process.  Clients will be locked out if more than
 128  * this are needed.
 129  *
 130  * We keep this for one reason it keeps the size of the scoreboard file small
 131  * enough that we can read the whole thing without worrying too much about
 132  * the overhead.
 133  */
 134 #ifndef DEFAULT_THREAD_LIMIT
 135 #define DEFAULT_THREAD_LIMIT 64
 136 #endif
 137
 138 /* Admin can't tune ThreadLimit beyond MAX_THREAD_LIMIT.  We want
 139  * some sort of compile-time limit to help catch typos.
 140  */
 141 #ifndef MAX_THREAD_LIMIT
 142 #define MAX_THREAD_LIMIT 100000
 143 #endif
 144
 145 #define MPM_CHILD_PID(i) (ap_scoreboard_image->parent[i].pid)
 146
 147 #if !APR_VERSION_AT_LEAST(1,4,0)
 148 #define apr_time_from_msec(x) (x * 1000)
 149 #endif
 150
 151 #ifndef MAX_SECS_TO_LINGER
 152 #define MAX_SECS_TO_LINGER 30
 153 #endif
 154 #define SECONDS_TO_LINGER  2
 155
 156 /*
 157  * Actual definitions of config globals
 158  */
 159
 160 #ifndef DEFAULT_WORKER_FACTOR
 161 #define DEFAULT_WORKER_FACTOR 2
 162 #endif
 163 #define WORKER_FACTOR_SCALE   16  /* scale factor to allow fractional values */
 164 static unsigned int worker_factor = DEFAULT_WORKER_FACTOR * WORKER_FACTOR_SCALE;
 165
 166 static int threads_per_child = 0;   /* Worker threads per child */
 167 static int ap_daemons_to_start = 0;
 168 static int min_spare_threads = 0;
 169 static int max_spare_threads = 0;
 170 static int ap_daemons_limit = 0;
 171 static int max_workers = 0;
 172 static int server_limit = 0;
 173 static int thread_limit = 0;
 174 static int dying = 0;
 175 static int workers_may_exit = 0;
 176 static int start_thread_may_exit = 0;
 177 static int listener_may_exit = 0;
 178 static int requests_this_child;
 179 static int num_listensocks = 0;
 180 static apr_uint32_t connection_count = 0;
 181 static int resource_shortage = 0;
 182 static fd_queue_t *worker_queue;
 183 static fd_queue_info_t *worker_queue_info;
 184 static int mpm_state = AP_MPMQ_STARTING;
 185
 186 static apr_thread_mutex_t *timeout_mutex;
 187 APR_RING_HEAD(timeout_head_t, conn_state_t);
 188 struct timeout_queue {
 189     struct timeout_head_t head;
 190     int count;
 191     const char *tag;
 192 };
 193 /*
 194  * Several timeout queues that use different timeouts, so that we always can
 195  * simply append to the end.
 196  *   write_completion_q uses TimeOut
 197  *   keepalive_q        uses KeepAliveTimeOut
 198  *   linger_q           uses MAX_SECS_TO_LINGER
 199  *   short_linger_q     uses SECONDS_TO_LINGER
 200  */
 201 static struct timeout_queue write_completion_q, keepalive_q, linger_q,
 202                             short_linger_q;
 203 static apr_pollfd_t *listener_pollfd;
 204
 205 /*
 206  * Macros for accessing struct timeout_queue.
 207  * For TO_QUEUE_APPEND and TO_QUEUE_REMOVE, timeout_mutex must be held.
 208  */
 209 #define TO_QUEUE_APPEND(q, el)                                            \
 210     do {                                                                  \
 211         APR_RING_INSERT_TAIL(&(q).head, el, conn_state_t, timeout_list);  \
 212         (q).count++;                                                      \
 213     } while (0)
 214
 215 #define TO_QUEUE_REMOVE(q, el)             \
 216     do {                                   \
 217         APR_RING_REMOVE(el, timeout_list); \
 218         (q).count--;                       \
 219     } while (0)
 220
 221 #define TO_QUEUE_INIT(q)                                            \
 222     do {                                                            \
 223             APR_RING_INIT(&(q).head, conn_state_t, timeout_list);   \
 224             (q).tag = #q;                                           \
 225     } while (0)
 226
 227 #define TO_QUEUE_ELEM_INIT(el) APR_RING_ELEM_INIT(el, timeout_list)
 228
 229 /*
 230  * The pollset for sockets that are in any of the timeout queues. Currently
 231  * we use the timeout_mutex to make sure that connections are added/removed
 232  * atomically to/from both event_pollset and a timeout queue. Otherwise
 233  * some confusion can happen under high load if timeout queues and pollset
 234  * get out of sync.
 235  * XXX: It should be possible to make the lock unnecessary in many or even all
 236  * XXX: cases.
 237  */
 238 static apr_pollset_t *event_pollset;
 239
 240 #if HAVE_SERF
 241 typedef struct {
 242     apr_pollset_t *pollset;
 243     apr_pool_t *pool;
 244 } s_baton_t;
 245
 246 static serf_context_t *g_serf;
 247 #endif
 248
 249 /* The structure used to pass unique initialization info to each thread */
 250 typedef struct
 251 {
 252     int pid;
 253     int tid;
 254     int sd;
 255 } proc_info;
 256
 257 /* Structure used to pass information to the thread responsible for
 258  * creating the rest of the threads.
 259  */
 260 typedef struct
 261 {
 262     apr_thread_t **threads;
 263     apr_thread_t *listener;
 264     int child_num_arg;
 265     apr_threadattr_t *threadattr;
 266 } thread_starter;
 267
 268 typedef enum
 269 {
 270     PT_CSD,
 271     PT_ACCEPT
 272 #if HAVE_SERF
 273     , PT_SERF
 274 #endif
 275 } poll_type_e;
 276
 277 typedef struct
 278 {
 279     poll_type_e type;
 280     void *baton;
 281 } listener_poll_type;
 282
 283 /* data retained by event across load/unload of the module
 284  * allocated on first call to pre-config hook; located on
 285  * subsequent calls to pre-config hook
 286  */
 287 typedef struct event_retained_data {
 288     int first_server_limit;
 289     int first_thread_limit;
 290     int module_loads;
 291     int sick_child_detected;
 292     ap_generation_t my_generation;
 293     int volatile is_graceful; /* set from signal handler */
 294     int maxclients_reported;
 295     /*
 296      * The max child slot ever assigned, preserved across restarts.  Necessary
 297      * to deal with MaxRequestWorkers changes across AP_SIG_GRACEFUL restarts.
 298      * We use this value to optimize routines that have to scan the entire
 299      * scoreboard.
 300      */
 301     int max_daemons_limit;
 302     /*
 303      * idle_spawn_rate is the number of children that will be spawned on the
 304      * next maintenance cycle if there aren't enough idle servers.  It is
 305      * doubled up to MAX_SPAWN_RATE, and reset only when a cycle goes by
 306      * without the need to spawn.
 307      */
 308     int idle_spawn_rate;
 309 #ifndef MAX_SPAWN_RATE
 310 #define MAX_SPAWN_RATE        (32)
 311 #endif
 312     int hold_off_on_exponential_spawning;
 313 } event_retained_data;
 314 static event_retained_data *retained;
 315
 316 #define ID_FROM_CHILD_THREAD(c, t)    ((c * thread_limit) + t)
 317
 318 static ap_event_pod_t *pod;
 319
 320 /* The event MPM respects a couple of runtime flags that can aid
 321  * in debugging. Setting the -DNO_DETACH flag will prevent the root process
 322  * from detaching from its controlling terminal. Additionally, setting
 323  * the -DONE_PROCESS flag (which implies -DNO_DETACH) will get you the
 324  * child_main loop running in the process which originally started up.
 325  * This gives you a pretty nice debugging environment.  (You'll get a SIGHUP
 326  * early in standalone_main; just continue through.  This is the server
 327  * trying to kill off any child processes which it might have lying
 328  * around --- Apache doesn't keep track of their pids, it just sends
 329  * SIGHUP to the process group, ignoring it in the root process.
 330  * Continue through and you'll be fine.).
 331  */
 332
 333 static int one_process = 0;
 334
 335 #ifdef DEBUG_SIGSTOP
 336 int raise_sigstop_flags;
 337 #endif
 338
 339 static apr_pool_t *pconf;       /* Pool for config stuff */
 340 static apr_pool_t *pchild;      /* Pool for httpd child stuff */
 341
 342 static pid_t ap_my_pid;         /* Linux getpid() doesn't work except in main
 343                                    thread. Use this instead */
 344 static pid_t parent_pid;
 345 static apr_os_thread_t *listener_os_thread;
 346
 347 /* The LISTENER_SIGNAL signal will be sent from the main thread to the
 348  * listener thread to wake it up for graceful termination (what a child
 349  * process from an old generation does when the admin does "apachectl
 350  * graceful").  This signal will be blocked in all threads of a child
 351  * process except for the listener thread.
 352  */
 353 #define LISTENER_SIGNAL     SIGHUP
 354
 355 /* An array of socket descriptors in use by each thread used to
 356  * perform a non-graceful (forced) shutdown of the server.
 357  */
 358 static apr_socket_t **worker_sockets;
 359
 360 static void disable_listensocks(int process_slot)
 361 {
 362     int i;
 363     for (i = 0; i < num_listensocks; i++) {
 364         apr_pollset_remove(event_pollset, &listener_pollfd[i]);
 365     }
 366     ap_scoreboard_image->parent[process_slot].not_accepting = 1;
 367 }
 368
 369 static void enable_listensocks(int process_slot)
 370 {
 371     int i;
 372     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
 373                  "Accepting new connections again: "
 374                  "%u active conns, %u idle workers",
 375                  apr_atomic_read32(&connection_count),
 376                  ap_queue_info_get_idlers(worker_queue_info));
 377     for (i = 0; i < num_listensocks; i++)
 378         apr_pollset_add(event_pollset, &listener_pollfd[i]);
 379     /*
 380      * XXX: This is not yet optimal. If many workers suddenly become available,
 381      * XXX: the parent may kill some processes off too soon.
 382      */
 383     ap_scoreboard_image->parent[process_slot].not_accepting = 0;
 384 }
 385
 386 static void close_worker_sockets(void)
 387 {
 388     int i;
 389     for (i = 0; i < threads_per_child; i++) {
 390         if (worker_sockets[i]) {
 391             apr_socket_close(worker_sockets[i]);
 392             worker_sockets[i] = NULL;
 393         }
 394     }
 395 }
 396
 397 static void wakeup_listener(void)
 398 {
 399     listener_may_exit = 1;
 400     if (!listener_os_thread) {
 401         /* XXX there is an obscure path that this doesn't handle perfectly:
 402          *     right after listener thread is created but before
 403          *     listener_os_thread is set, the first worker thread hits an
 404          *     error and starts graceful termination
 405          */
 406         return;
 407     }
 408
 409     /* unblock the listener if it's waiting for a worker */
 410     ap_queue_info_term(worker_queue_info);
 411
 412     /*
 413      * we should just be able to "kill(ap_my_pid, LISTENER_SIGNAL)" on all
 414      * platforms and wake up the listener thread since it is the only thread
 415      * with SIGHUP unblocked, but that doesn't work on Linux
 416      */
 417 #ifdef HAVE_PTHREAD_KILL
 418     pthread_kill(*listener_os_thread, LISTENER_SIGNAL);
 419 #else
 420     kill(ap_my_pid, LISTENER_SIGNAL);
 421 #endif
 422 }
 423
 424 #define ST_INIT              0
 425 #define ST_GRACEFUL          1
 426 #define ST_UNGRACEFUL        2
 427
 428 static int terminate_mode = ST_INIT;
 429
 430 static void signal_threads(int mode)
 431 {
 432     if (terminate_mode == mode) {
 433         return;
 434     }
 435     terminate_mode = mode;
 436     mpm_state = AP_MPMQ_STOPPING;
 437
 438     /* in case we weren't called from the listener thread, wake up the
 439      * listener thread
 440      */
 441     wakeup_listener();
 442
 443     /* for ungraceful termination, let the workers exit now;
 444      * for graceful termination, the listener thread will notify the
 445      * workers to exit once it has stopped accepting new connections
 446      */
 447     if (mode == ST_UNGRACEFUL) {
 448         workers_may_exit = 1;
 449         ap_queue_interrupt_all(worker_queue);
 450         close_worker_sockets(); /* forcefully kill all current connections */
 451     }
 452 }
 453
 454 static int event_query(int query_code, int *result, apr_status_t *rv)
 455 {
 456     *rv = APR_SUCCESS;
 457     switch (query_code) {
 458     case AP_MPMQ_MAX_DAEMON_USED:
 459         *result = retained->max_daemons_limit;
 460         break;
 461     case AP_MPMQ_IS_THREADED:
 462         *result = AP_MPMQ_STATIC;
 463         break;
 464     case AP_MPMQ_IS_FORKED:
 465         *result = AP_MPMQ_DYNAMIC;
 466         break;
 467     case AP_MPMQ_IS_ASYNC:
 468         *result = 1;
 469         break;
 470     case AP_MPMQ_HAS_SERF:
 471         *result = 1;
 472         break;
 473     case AP_MPMQ_HARD_LIMIT_DAEMONS:
 474         *result = server_limit;
 475         break;
 476     case AP_MPMQ_HARD_LIMIT_THREADS:
 477         *result = thread_limit;
 478         break;
 479     case AP_MPMQ_MAX_THREADS:
 480         *result = threads_per_child;
 481         break;
 482     case AP_MPMQ_MIN_SPARE_DAEMONS:
 483         *result = 0;
 484         break;
 485     case AP_MPMQ_MIN_SPARE_THREADS:
 486         *result = min_spare_threads;
 487         break;
 488     case AP_MPMQ_MAX_SPARE_DAEMONS:
 489         *result = 0;
 490         break;
 491     case AP_MPMQ_MAX_SPARE_THREADS:
 492         *result = max_spare_threads;
 493         break;
 494     case AP_MPMQ_MAX_REQUESTS_DAEMON:
 495         *result = ap_max_requests_per_child;
 496         break;
 497     case AP_MPMQ_MAX_DAEMONS:
 498         *result = ap_daemons_limit;
 499         break;
 500     case AP_MPMQ_MPM_STATE:
 501         *result = mpm_state;
 502         break;
 503     case AP_MPMQ_GENERATION:
 504         *result = retained->my_generation;
 505         break;
 506     default:
 507         *rv = APR_ENOTIMPL;
 508         break;
 509     }
 510     return OK;
 511 }
 512
 513 static void event_note_child_killed(int childnum, pid_t pid, ap_generation_t gen)
 514 {
 515     if (childnum != -1) { /* child had a scoreboard slot? */
 516         ap_run_child_status(ap_server_conf,
 517                             ap_scoreboard_image->parent[childnum].pid,
 518                             ap_scoreboard_image->parent[childnum].generation,
 519                             childnum, MPM_CHILD_EXITED);
 520         ap_scoreboard_image->parent[childnum].pid = 0;
 521     }
 522     else {
 523         ap_run_child_status(ap_server_conf, pid, gen, -1, MPM_CHILD_EXITED);
 524     }
 525 }
 526
 527 static void event_note_child_started(int slot, pid_t pid)
 528 {
 529     ap_scoreboard_image->parent[slot].pid = pid;
 530     ap_run_child_status(ap_server_conf,
 531                         ap_scoreboard_image->parent[slot].pid,
 532                         retained->my_generation, slot, MPM_CHILD_STARTED);
 533 }
 534
 535 static void event_note_child_lost_slot(int slot, pid_t newpid)
 536 {
 537     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
 538                  "pid %" APR_PID_T_FMT " taking over scoreboard slot from "
 539                  "%" APR_PID_T_FMT "%s",
 540                  newpid,
 541                  ap_scoreboard_image->parent[slot].pid,
 542                  ap_scoreboard_image->parent[slot].quiescing ?
 543                  " (quiescing)" : "");
 544     ap_run_child_status(ap_server_conf,
 545                         ap_scoreboard_image->parent[slot].pid,
 546                         ap_scoreboard_image->parent[slot].generation,
 547                         slot, MPM_CHILD_LOST_SLOT);
 548     /* Don't forget about this exiting child process, or we
 549      * won't be able to kill it if it doesn't exit by the
 550      * time the server is shut down.
 551      */
 552     ap_register_extra_mpm_process(ap_scoreboard_image->parent[slot].pid,
 553                                   ap_scoreboard_image->parent[slot].generation);
 554 }
 555
 556 static const char *event_get_name(void)
 557 {
 558     return "event";
 559 }
 560
 561 /* a clean exit from a child with proper cleanup */
 562 static void clean_child_exit(int code) __attribute__ ((noreturn));
 563 static void clean_child_exit(int code)
 564 {
 565     mpm_state = AP_MPMQ_STOPPING;
 566     if (pchild) {
 567         apr_pool_destroy(pchild);
 568     }
 569
 570     if (one_process) {
 571         event_note_child_killed(/* slot */ 0, 0, 0);
 572     }
 573
 574     exit(code);
 575 }
 576
 577 static void just_die(int sig)
 578 {
 579     clean_child_exit(0);
 580 }
 581
 582 /*****************************************************************
 583  * Connection structures and accounting...
 584  */
 585
 586 static int child_fatal;
 587
 588 /* volatile because they're updated from a signal handler */
 589 static int volatile shutdown_pending;
 590 static int volatile restart_pending;
 591
 592 static apr_status_t decrement_connection_count(void *dummy) {
 593     apr_atomic_dec32(&connection_count);
 594     return APR_SUCCESS;
 595 }
 596
 597 /*
 598  * ap_start_shutdown() and ap_start_restart(), below, are a first stab at
 599  * functions to initiate shutdown or restart without relying on signals.
 600  * Previously this was initiated in sig_term() and restart() signal handlers,
 601  * but we want to be able to start a shutdown/restart from other sources --
 602  * e.g. on Win32, from the service manager. Now the service manager can
 603  * call ap_start_shutdown() or ap_start_restart() as appropiate.  Note that
 604  * these functions can also be called by the child processes, since global
 605  * variables are no longer used to pass on the required action to the parent.
 606  *
 607  * These should only be called from the parent process itself, since the
 608  * parent process will use the shutdown_pending and restart_pending variables
 609  * to determine whether to shutdown or restart. The child process should
 610  * call signal_parent() directly to tell the parent to die -- this will
 611  * cause neither of those variable to be set, which the parent will
 612  * assume means something serious is wrong (which it will be, for the
 613  * child to force an exit) and so do an exit anyway.
 614  */
 615
 616 static void ap_start_shutdown(int graceful)
 617 {
 618     mpm_state = AP_MPMQ_STOPPING;
 619     if (shutdown_pending == 1) {
 620         /* Um, is this _probably_ not an error, if the user has
 621          * tried to do a shutdown twice quickly, so we won't
 622          * worry about reporting it.
 623          */
 624         return;
 625     }
 626     shutdown_pending = 1;
 627     retained->is_graceful = graceful;
 628 }
 629
 630 /* do a graceful restart if graceful == 1 */
 631 static void ap_start_restart(int graceful)
 632 {
 633     mpm_state = AP_MPMQ_STOPPING;
 634     if (restart_pending == 1) {
 635         /* Probably not an error - don't bother reporting it */
 636         return;
 637     }
 638     restart_pending = 1;
 639     retained->is_graceful = graceful;
 640 }
 641
 642 static void sig_term(int sig)
 643 {
 644     ap_start_shutdown(sig == AP_SIG_GRACEFUL_STOP);
 645 }
 646
 647 static void restart(int sig)
 648 {
 649     ap_start_restart(sig == AP_SIG_GRACEFUL);
 650 }
 651
 652 static void set_signals(void)
 653 {
 654 #ifndef NO_USE_SIGACTION
 655     struct sigaction sa;
 656 #endif
 657
 658     if (!one_process) {
 659         ap_fatal_signal_setup(ap_server_conf, pconf);
 660     }
 661
 662 #ifndef NO_USE_SIGACTION
 663     sigemptyset(&sa.sa_mask);
 664     sa.sa_flags = 0;
 665
 666     sa.sa_handler = sig_term;
 667     if (sigaction(SIGTERM, &sa, NULL) < 0)
 668         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 669                      "sigaction(SIGTERM)");
 670 #ifdef AP_SIG_GRACEFUL_STOP
 671     if (sigaction(AP_SIG_GRACEFUL_STOP, &sa, NULL) < 0)
 672         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 673                      "sigaction(" AP_SIG_GRACEFUL_STOP_STRING ")");
 674 #endif
 675 #ifdef SIGINT
 676     if (sigaction(SIGINT, &sa, NULL) < 0)
 677         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 678                      "sigaction(SIGINT)");
 679 #endif
 680 #ifdef SIGXCPU
 681     sa.sa_handler = SIG_DFL;
 682     if (sigaction(SIGXCPU, &sa, NULL) < 0)
 683         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 684                      "sigaction(SIGXCPU)");
 685 #endif
 686 #ifdef SIGXFSZ
 687     /* For systems following the LFS standard, ignoring SIGXFSZ allows
 688      * a write() beyond the 2GB limit to fail gracefully with E2BIG
 689      * rather than terminate the process. */
 690     sa.sa_handler = SIG_IGN;
 691     if (sigaction(SIGXFSZ, &sa, NULL) < 0)
 692         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 693                      "sigaction(SIGXFSZ)");
 694 #endif
 695 #ifdef SIGPIPE
 696     sa.sa_handler = SIG_IGN;
 697     if (sigaction(SIGPIPE, &sa, NULL) < 0)
 698         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 699                      "sigaction(SIGPIPE)");
 700 #endif
 701
 702     /* we want to ignore HUPs and AP_SIG_GRACEFUL while we're busy
 703      * processing one */
 704     sigaddset(&sa.sa_mask, SIGHUP);
 705     sigaddset(&sa.sa_mask, AP_SIG_GRACEFUL);
 706     sa.sa_handler = restart;
 707     if (sigaction(SIGHUP, &sa, NULL) < 0)
 708         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 709                      "sigaction(SIGHUP)");
 710     if (sigaction(AP_SIG_GRACEFUL, &sa, NULL) < 0)
 711         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 712                      "sigaction(" AP_SIG_GRACEFUL_STRING ")");
 713 #else
 714     if (!one_process) {
 715 #ifdef SIGXCPU
 716         apr_signal(SIGXCPU, SIG_DFL);
 717 #endif /* SIGXCPU */
 718 #ifdef SIGXFSZ
 719         apr_signal(SIGXFSZ, SIG_IGN);
 720 #endif /* SIGXFSZ */
 721     }
 722
 723     apr_signal(SIGTERM, sig_term);
 724 #ifdef SIGHUP
 725     apr_signal(SIGHUP, restart);
 726 #endif /* SIGHUP */
 727 #ifdef AP_SIG_GRACEFUL
 728     apr_signal(AP_SIG_GRACEFUL, restart);
 729 #endif /* AP_SIG_GRACEFUL */
 730 #ifdef AP_SIG_GRACEFUL_STOP
 731      apr_signal(AP_SIG_GRACEFUL_STOP, sig_term);
 732 #endif /* AP_SIG_GRACEFUL_STOP */
 733 #ifdef SIGPIPE
 734     apr_signal(SIGPIPE, SIG_IGN);
 735 #endif /* SIGPIPE */
 736
 737 #endif
 738 }
 739
 740 /*
 741  * close our side of the connection
 742  * Pre-condition: cs is not in any timeout queue and not in the pollset,
 743  *                timeout_mutex is not locked
 744  * return: 0 if connection is fully closed,
 745  *         1 if connection is lingering
 746  * may be called by listener or by worker thread
 747  */
 748 static int start_lingering_close(conn_state_t *cs)
 749 {
 750     apr_status_t rv;
 751     if (ap_start_lingering_close(cs->c)) {
 752         apr_pool_clear(cs->p);
 753         ap_push_pool(worker_queue_info, cs->p);
 754         return 0;
 755     }
 756     else {
 757         apr_socket_t *csd = ap_get_conn_socket(cs->c);
 758         struct timeout_queue *q;
 759
 760         rv = apr_socket_timeout_set(csd, 0);
 761         AP_DEBUG_ASSERT(rv == APR_SUCCESS);
 762         /*
 763          * If some module requested a shortened waiting period, only wait for
 764          * 2s (SECONDS_TO_LINGER). This is useful for mitigating certain
 765          * DoS attacks.
 766          */
 767         if (apr_table_get(cs->c->notes, "short-lingering-close")) {
 768             cs->expiration_time =
 769                 apr_time_now() + apr_time_from_sec(SECONDS_TO_LINGER);
 770             q = &short_linger_q;
 771             cs->state = CONN_STATE_LINGER_SHORT;
 772         }
 773         else {
 774             cs->expiration_time =
 775                 apr_time_now() + apr_time_from_sec(MAX_SECS_TO_LINGER);
 776             q = &linger_q;
 777             cs->state = CONN_STATE_LINGER_NORMAL;
 778         }
 779         apr_thread_mutex_lock(timeout_mutex);
 780         TO_QUEUE_APPEND(*q, cs);
 781         cs->pfd.reqevents = APR_POLLIN | APR_POLLHUP | APR_POLLERR;
 782         rv = apr_pollset_add(event_pollset, &cs->pfd);
 783         apr_thread_mutex_unlock(timeout_mutex);
 784         if (rv != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rv)) {
 785             ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf,
 786                          "start_lingering_close: apr_pollset_add failure");
 787             apr_thread_mutex_lock(timeout_mutex);
 788             TO_QUEUE_REMOVE(*q, cs);
 789             apr_thread_mutex_unlock(timeout_mutex);
 790             apr_socket_close(cs->pfd.desc.s);
 791             apr_pool_clear(cs->p);
 792             ap_push_pool(worker_queue_info, cs->p);
 793             return 0;
 794         }
 795     }
 796     return 1;
 797 }
 798
 799 /*
 800  * forcibly close a lingering connection after the lingering period has
 801  * expired
 802  * Pre-condition: cs is not in any timeout queue and not in the pollset
 803  * return: irrelevant (need same prototype as start_lingering_close)
 804  */
 805 static int stop_lingering_close(conn_state_t *cs)
 806 {
 807     apr_status_t rv;
 808     apr_socket_t *csd = ap_get_conn_socket(cs->c);
 809     ap_log_error(APLOG_MARK, APLOG_TRACE4, 0, ap_server_conf,
 810                  "socket reached timeout in lingering-close state");
 811     rv = apr_socket_close(csd);
 812     if (rv != APR_SUCCESS) {
 813         ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, "error closing socket");
 814         AP_DEBUG_ASSERT(0);
 815     }
 816     apr_pool_clear(cs->p);
 817     ap_push_pool(worker_queue_info, cs->p);
 818     return 0;
 819 }
 820
 821 /*
 822  * process one connection in the worker
 823  * return: 1 if the connection has been completed,
 824  *         0 if it is still open and waiting for some event
 825  */
 826 static int process_socket(apr_thread_t *thd, apr_pool_t * p, apr_socket_t * sock,
 827                           conn_state_t * cs, int my_child_num,
 828                           int my_thread_num)
 829 {
 830     conn_rec *c;
 831     long conn_id = ID_FROM_CHILD_THREAD(my_child_num, my_thread_num);
 832     int rc;
 833     ap_sb_handle_t *sbh;
 834
 835     ap_create_sb_handle(&sbh, p, my_child_num, my_thread_num);
 836
 837     if (cs == NULL) {           /* This is a new connection */
 838         listener_poll_type *pt = apr_pcalloc(p, sizeof(*pt));
 839         cs = apr_pcalloc(p, sizeof(conn_state_t));
 840         cs->bucket_alloc = apr_bucket_alloc_create(p);
 841         c = ap_run_create_connection(p, ap_server_conf, sock,
 842                                      conn_id, sbh, cs->bucket_alloc);
 843         if (!c) {
 844             apr_bucket_alloc_destroy(cs->bucket_alloc);
 845             apr_pool_clear(p);
 846             ap_push_pool(worker_queue_info, p);
 847             return 1;
 848         }
 849         apr_atomic_inc32(&connection_count);
 850         apr_pool_cleanup_register(c->pool, NULL, decrement_connection_count, apr_pool_cleanup_null);
 851         c->current_thread = thd;
 852         cs->c = c;
 853         c->cs = cs;
 854         cs->p = p;
 855         cs->pfd.desc_type = APR_POLL_SOCKET;
 856         cs->pfd.reqevents = APR_POLLIN;
 857         cs->pfd.desc.s = sock;
 858         pt->type = PT_CSD;
 859         pt->baton = cs;
 860         cs->pfd.client_data = pt;
 861         TO_QUEUE_ELEM_INIT(cs);
 862
 863         ap_update_vhost_given_ip(c);
 864
 865         rc = ap_run_pre_connection(c, sock);
 866         if (rc != OK && rc != DONE) {
 867             ap_log_cerror(APLOG_MARK, APLOG_DEBUG, 0, c,
 868                           "process_socket: connection aborted");
 869             c->aborted = 1;
 870         }
 871
 872         /**
 873          * XXX If the platform does not have a usable way of bundling
 874          * accept() with a socket readability check, like Win32,
 875          * and there are measurable delays before the
 876          * socket is readable due to the first data packet arriving,
 877          * it might be better to create the cs on the listener thread
 878          * with the state set to CONN_STATE_CHECK_REQUEST_LINE_READABLE
 879          *
 880          * FreeBSD users will want to enable the HTTP accept filter
 881          * module in their kernel for the highest performance
 882          * When the accept filter is active, sockets are kept in the
 883          * kernel until a HTTP request is received.
 884          */
 885         cs->state = CONN_STATE_READ_REQUEST_LINE;
 886
 887     }
 888     else {
 889         c = cs->c;
 890         c->sbh = sbh;
 891         c->current_thread = thd;
 892     }
 893
 894     if (c->clogging_input_filters && !c->aborted) {
 895         /* Since we have an input filter which 'cloggs' the input stream,
 896          * like mod_ssl, lets just do the normal read from input filters,
 897          * like the Worker MPM does.
 898          */
 899         ap_run_process_connection(c);
 900         if (cs->state != CONN_STATE_SUSPENDED) {
 901             cs->state = CONN_STATE_LINGER;
 902         }
 903     }
 904
 905 read_request:
 906     if (cs->state == CONN_STATE_READ_REQUEST_LINE) {
 907         if (!c->aborted) {
 908             ap_run_process_connection(c);
 909
 910             /* state will be updated upon return
 911              * fall thru to either wait for readability/timeout or
 912              * do lingering close
 913              */
 914         }
 915         else {
 916             cs->state = CONN_STATE_LINGER;
 917         }
 918     }
 919
 920     if (cs->state == CONN_STATE_WRITE_COMPLETION) {
 921         ap_filter_t *output_filter = c->output_filters;
 922         apr_status_t rv;
 923         ap_update_child_status_from_conn(sbh, SERVER_BUSY_WRITE, c);
 924         while (output_filter->next != NULL) {
 925             output_filter = output_filter->next;
 926         }
 927         rv = output_filter->frec->filter_func.out_func(output_filter, NULL);
 928         if (rv != APR_SUCCESS) {
 929             ap_log_cerror(APLOG_MARK, APLOG_DEBUG, rv, c,
 930                           "network write failure in core output filter");
 931             cs->state = CONN_STATE_LINGER;
 932         }
 933         else if (c->data_in_output_filters) {
 934             /* Still in WRITE_COMPLETION_STATE:
 935              * Set a write timeout for this connection, and let the
 936              * event thread poll for writeability.
 937              */
 938             cs->expiration_time = ap_server_conf->timeout + apr_time_now();
 939             apr_thread_mutex_lock(timeout_mutex);
 940             TO_QUEUE_APPEND(write_completion_q, cs);
 941             cs->pfd.reqevents = APR_POLLOUT | APR_POLLHUP | APR_POLLERR;
 942             rc = apr_pollset_add(event_pollset, &cs->pfd);
 943             apr_thread_mutex_unlock(timeout_mutex);
 944             return 1;
 945         }
 946         else if (c->keepalive != AP_CONN_KEEPALIVE || c->aborted ||
 947             listener_may_exit) {
 948             c->cs->state = CONN_STATE_LINGER;
 949         }
 950         else if (c->data_in_input_filters) {
 951             cs->state = CONN_STATE_READ_REQUEST_LINE;
 952             goto read_request;
 953         }
 954         else {
 955             cs->state = CONN_STATE_CHECK_REQUEST_LINE_READABLE;
 956         }
 957     }
 958
 959     if (cs->state == CONN_STATE_LINGER) {
 960         if (!start_lingering_close(cs))
 961             return 0;
 962     }
 963     else if (cs->state == CONN_STATE_CHECK_REQUEST_LINE_READABLE) {
 964         apr_status_t rc;
 965
 966         /* It greatly simplifies the logic to use a single timeout value here
 967          * because the new element can just be added to the end of the list and
 968          * it will stay sorted in expiration time sequence.  If brand new
 969          * sockets are sent to the event thread for a readability check, this
 970          * will be a slight behavior change - they use the non-keepalive
 971          * timeout today.  With a normal client, the socket will be readable in
 972          * a few milliseconds anyway.
 973          */
 974         cs->expiration_time = ap_server_conf->keep_alive_timeout +
 975                               apr_time_now();
 976         apr_thread_mutex_lock(timeout_mutex);
 977         TO_QUEUE_APPEND(keepalive_q, cs);
 978
 979         /* Add work to pollset. */
 980         cs->pfd.reqevents = APR_POLLIN;
 981         rc = apr_pollset_add(event_pollset, &cs->pfd);
 982         apr_thread_mutex_unlock(timeout_mutex);
 983
 984         if (rc != APR_SUCCESS) {
 985             ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
 986                          "process_socket: apr_pollset_add failure");
 987             AP_DEBUG_ASSERT(rc == APR_SUCCESS);
 988         }
 989     }
 990     return 1;
 991 }
 992
 993 /* requests_this_child has gone to zero or below.  See if the admin coded
 994    "MaxConnectionsPerChild 0", and keep going in that case.  Doing it this way
 995    simplifies the hot path in worker_thread */
 996 static void check_infinite_requests(void)
 997 {
 998     if (ap_max_requests_per_child) {
 999         signal_threads(ST_GRACEFUL);
1000     }
1001     else {
1002         requests_this_child = INT_MAX;  /* keep going */
1003     }
1004 }
1005
1006 static void close_listeners(int process_slot, int *closed) {
1007     if (!*closed) {
1008         int i;
1009         disable_listensocks(process_slot);
1010         ap_close_listeners();
1011         *closed = 1;
1012         dying = 1;
1013         ap_scoreboard_image->parent[process_slot].quiescing = 1;
1014         for (i = 0; i < threads_per_child; ++i) {
1015             ap_update_child_status_from_indexes(process_slot, i,
1016                                                 SERVER_GRACEFUL, NULL);
1017         }
1018         /* wake up the main thread */
1019         kill(ap_my_pid, SIGTERM);
1020     }
1021 }
1022
1023 static void unblock_signal(int sig)
1024 {
1025     sigset_t sig_mask;
1026
1027     sigemptyset(&sig_mask);
1028     sigaddset(&sig_mask, sig);
1029 #if defined(SIGPROCMASK_SETS_THREAD_MASK)
1030     sigprocmask(SIG_UNBLOCK, &sig_mask, NULL);
1031 #else
1032     pthread_sigmask(SIG_UNBLOCK, &sig_mask, NULL);
1033 #endif
1034 }
1035
1036 static void dummy_signal_handler(int sig)
1037 {
1038     /* XXX If specifying SIG_IGN is guaranteed to unblock a syscall,
1039      *     then we don't need this goofy function.
1040      */
1041 }
1042
1043
1044 #if HAVE_SERF
1045 static apr_status_t s_socket_add(void *user_baton,
1046                                  apr_pollfd_t *pfd,
1047                                  void *serf_baton)
1048 {
1049     s_baton_t *s = (s_baton_t*)user_baton;
1050     /* XXXXX: recycle listener_poll_types */
1051     listener_poll_type *pt = ap_malloc(sizeof(*pt));
1052     pt->type = PT_SERF;
1053     pt->baton = serf_baton;
1054     pfd->client_data = pt;
1055     return apr_pollset_add(s->pollset, pfd);
1056 }
1057
1058 static apr_status_t s_socket_remove(void *user_baton,
1059                                     apr_pollfd_t *pfd,
1060                                     void *serf_baton)
1061 {
1062     s_baton_t *s = (s_baton_t*)user_baton;
1063     listener_poll_type *pt = pfd->client_data;
1064     free(pt);
1065     return apr_pollset_remove(s->pollset, pfd);
1066 }
1067 #endif
1068
1069 static apr_status_t init_pollset(apr_pool_t *p)
1070 {
1071 #if HAVE_SERF
1072     s_baton_t *baton = NULL;
1073 #endif
1074     ap_listen_rec *lr;
1075     listener_poll_type *pt;
1076     int i = 0;
1077
1078     TO_QUEUE_INIT(write_completion_q);
1079     TO_QUEUE_INIT(keepalive_q);
1080     TO_QUEUE_INIT(linger_q);
1081     TO_QUEUE_INIT(short_linger_q);
1082
1083     listener_pollfd = apr_palloc(p, sizeof(apr_pollfd_t) * num_listensocks);
1084     for (lr = ap_listeners; lr != NULL; lr = lr->next, i++) {
1085         apr_pollfd_t *pfd;
1086         AP_DEBUG_ASSERT(i < num_listensocks);
1087         pfd = &listener_pollfd[i];
1088         pt = apr_pcalloc(p, sizeof(*pt));
1089         pfd->desc_type = APR_POLL_SOCKET;
1090         pfd->desc.s = lr->sd;
1091         pfd->reqevents = APR_POLLIN;
1092
1093         pt->type = PT_ACCEPT;
1094         pt->baton = lr;
1095
1096         pfd->client_data = pt;
1097
1098         apr_socket_opt_set(pfd->desc.s, APR_SO_NONBLOCK, 1);
1099         apr_pollset_add(event_pollset, pfd);
1100
1101         lr->accept_func = ap_unixd_accept;
1102     }
1103
1104 #if HAVE_SERF
1105     baton = apr_pcalloc(p, sizeof(*baton));
1106     baton->pollset = event_pollset;
1107     /* TODO: subpools, threads, reuse, etc.  -- currently use malloc() inside :( */
1108     baton->pool = p;
1109
1110     g_serf = serf_context_create_ex(baton,
1111                                     s_socket_add,
1112                                     s_socket_remove, p);
1113
1114     ap_register_provider(p, "mpm_serf",
1115                          "instance", "0", g_serf);
1116
1117 #endif
1118
1119     return APR_SUCCESS;
1120 }
1121
1122 static apr_status_t push_timer2worker(timer_event_t* te)
1123 {
1124     return ap_queue_push_timer(worker_queue, te);
1125 }
1126
1127 /*
1128  * Pre-condition: pfd->cs is neither in pollset nor timeout queue
1129  * this function may only be called by the listener
1130  */
1131 static apr_status_t push2worker(const apr_pollfd_t * pfd,
1132                                 apr_pollset_t * pollset)
1133 {
1134     listener_poll_type *pt = (listener_poll_type *) pfd->client_data;
1135     conn_state_t *cs = (conn_state_t *) pt->baton;
1136     apr_status_t rc;
1137
1138     rc = ap_queue_push(worker_queue, cs->pfd.desc.s, cs, cs->p);
1139     if (rc != APR_SUCCESS) {
1140         /* trash the connection; we couldn't queue the connected
1141          * socket to a worker
1142          */
1143         apr_bucket_alloc_destroy(cs->bucket_alloc);
1144         apr_socket_close(cs->pfd.desc.s);
1145         ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1146                      ap_server_conf, "push2worker: ap_queue_push failed");
1147         apr_pool_clear(cs->p);
1148         ap_push_pool(worker_queue_info, cs->p);
1149     }
1150
1151     return rc;
1152 }
1153
1154 /* get_worker:
1155  *     If *have_idle_worker_p == 0, reserve a worker thread, and set
1156  *     *have_idle_worker_p = 1.
1157  *     If *have_idle_worker_p is already 1, will do nothing.
1158  *     If blocking == 1, block if all workers are currently busy.
1159  *     If no worker was available immediately, will set *all_busy to 1.
1160  *     XXX: If there are no workers, we should not block immediately but
1161  *     XXX: close all keep-alive connections first.
1162  */
1163 static void get_worker(int *have_idle_worker_p, int blocking, int *all_busy)
1164 {
1165     apr_status_t rc;
1166
1167     if (*have_idle_worker_p) {
1168         /* already reserved a worker thread - must have hit a
1169          * transient error on a previous pass
1170          */
1171         return;
1172     }
1173
1174     if (blocking)
1175         rc = ap_queue_info_wait_for_idler(worker_queue_info, all_busy);
1176     else
1177         rc = ap_queue_info_try_get_idler(worker_queue_info);
1178
1179     if (rc == APR_SUCCESS) {
1180         *have_idle_worker_p = 1;
1181     }
1182     else if (!blocking && rc == APR_EAGAIN) {
1183         *all_busy = 1;
1184     }
1185     else if (!APR_STATUS_IS_EOF(rc)) {
1186         ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
1187                      "ap_queue_info_wait_for_idler failed.  "
1188                      "Attempting to shutdown process gracefully");
1189         signal_threads(ST_GRACEFUL);
1190     }
1191 }
1192
1193 /* XXXXXX: Convert to skiplist or other better data structure
1194  * (yes, this is VERY VERY VERY VERY BAD)
1195  */
1196
1197 /* Structures to reuse */
1198 static APR_RING_HEAD(timer_free_ring_t, timer_event_t) timer_free_ring;
1199 /* Active timers */
1200 static APR_RING_HEAD(timer_ring_t, timer_event_t) timer_ring;
1201
1202 static apr_thread_mutex_t *g_timer_ring_mtx;
1203
1204 static apr_status_t event_register_timed_callback(apr_time_t t,
1205                                                   ap_mpm_callback_fn_t *cbfn,
1206                                                   void *baton)
1207 {
1208     int inserted = 0;
1209     timer_event_t *ep;
1210     timer_event_t *te;
1211     /* oh yeah, and make locking smarter/fine grained. */
1212     apr_thread_mutex_lock(g_timer_ring_mtx);
1213
1214     if (!APR_RING_EMPTY(&timer_free_ring, timer_event_t, link)) {
1215         te = APR_RING_FIRST(&timer_free_ring);
1216         APR_RING_REMOVE(te, link);
1217     }
1218     else {
1219         /* XXXXX: lol, pool allocation without a context from any thread.Yeah. Right. MPMs Suck. */
1220         te = ap_malloc(sizeof(timer_event_t));
1221         APR_RING_ELEM_INIT(te, link);
1222     }
1223
1224     te->cbfunc = cbfn;
1225     te->baton = baton;
1226     /* XXXXX: optimize */
1227     te->when = t + apr_time_now();
1228
1229     /* Okay, insert sorted by when.. */
1230     for (ep = APR_RING_FIRST(&timer_ring);
1231          ep != APR_RING_SENTINEL(&timer_ring,
1232                                  timer_event_t, link);
1233          ep = APR_RING_NEXT(ep, link))
1234     {
1235         if (ep->when > te->when) {
1236             inserted = 1;
1237             APR_RING_INSERT_BEFORE(ep, te, link);
1238             break;
1239         }
1240     }
1241
1242     if (!inserted) {
1243         APR_RING_INSERT_TAIL(&timer_ring, te, timer_event_t, link);
1244     }
1245
1246     apr_thread_mutex_unlock(g_timer_ring_mtx);
1247
1248     return APR_SUCCESS;
1249 }
1250
1251 /*
1252  * Close socket and clean up if remote closed its end while we were in
1253  * lingering close.
1254  * Only to be called in the listener thread;
1255  * Pre-condition: cs is in one of the linger queues and in the pollset
1256  */
1257 static void process_lingering_close(conn_state_t *cs, const apr_pollfd_t *pfd)
1258 {
1259     apr_socket_t *csd = ap_get_conn_socket(cs->c);
1260     char dummybuf[2048];
1261     apr_size_t nbytes;
1262     apr_status_t rv;
1263     struct timeout_queue *q;
1264     q = (cs->state == CONN_STATE_LINGER_SHORT) ?  &short_linger_q : &linger_q;
1265
1266     /* socket is already in non-blocking state */
1267     do {
1268         nbytes = sizeof(dummybuf);
1269         rv = apr_socket_recv(csd, dummybuf, &nbytes);
1270     } while (rv == APR_SUCCESS);
1271
1272     if (!APR_STATUS_IS_EOF(rv)) {
1273         return;
1274     }
1275
1276     apr_thread_mutex_lock(timeout_mutex);
1277     rv = apr_pollset_remove(event_pollset, pfd);
1278     AP_DEBUG_ASSERT(rv == APR_SUCCESS);
1279
1280     rv = apr_socket_close(csd);
1281     AP_DEBUG_ASSERT(rv == APR_SUCCESS);
1282
1283     TO_QUEUE_REMOVE(*q, cs);
1284     apr_thread_mutex_unlock(timeout_mutex);
1285     TO_QUEUE_ELEM_INIT(cs);
1286
1287     apr_pool_clear(cs->p);
1288     ap_push_pool(worker_queue_info, cs->p);
1289 }
1290
1291 /* call 'func' for all elements of 'q' with timeout less than 'timeout_time'.
1292  * Pre-condition: timeout_mutex must already be locked
1293  * Post-condition: timeout_mutex will be locked again
1294  */
1295 static void process_timeout_queue(struct timeout_queue *q,
1296                                   apr_time_t timeout_time,
1297                                   int (*func)(conn_state_t *))
1298 {
1299     int count = 0;
1300     conn_state_t *first, *cs, *last;
1301     apr_status_t rv;
1302     if (!q->count) {
1303         return;
1304     }
1305     AP_DEBUG_ASSERT(!APR_RING_EMPTY(&q->head, conn_state_t, timeout_list));
1306
1307     cs = first = APR_RING_FIRST(&q->head);
1308     while (cs != APR_RING_SENTINEL(&q->head, conn_state_t, timeout_list)
1309            && cs->expiration_time < timeout_time) {
1310         last = cs;
1311         rv = apr_pollset_remove(event_pollset, &cs->pfd);
1312         if (rv != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rv)) {
1313             ap_log_cerror(APLOG_MARK, APLOG_ERR, rv, cs->c,
1314                           "apr_pollset_remove failed");
1315         }
1316         cs = APR_RING_NEXT(cs, timeout_list);
1317         count++;
1318     }
1319     if (!count)
1320         return;
1321
1322     APR_RING_UNSPLICE(first, last, timeout_list);
1323     AP_DEBUG_ASSERT(q->count >= count);
1324     q->count -= count;
1325     apr_thread_mutex_unlock(timeout_mutex);
1326     while (count) {
1327         cs = APR_RING_NEXT(first, timeout_list);
1328         TO_QUEUE_ELEM_INIT(first);
1329         func(first);
1330         first = cs;
1331         count--;
1332     }
1333     apr_thread_mutex_lock(timeout_mutex);
1334 }
1335
1336 static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
1337 {
1338     timer_event_t *ep;
1339     timer_event_t *te;
1340     apr_status_t rc;
1341     proc_info *ti = dummy;
1342     int process_slot = ti->pid;
1343     apr_pool_t *tpool = apr_thread_pool_get(thd);
1344     void *csd = NULL;
1345     apr_pool_t *ptrans;         /* Pool for per-transaction stuff */
1346     ap_listen_rec *lr;
1347     int have_idle_worker = 0;
1348     conn_state_t *cs;
1349     const apr_pollfd_t *out_pfd;
1350     apr_int32_t num = 0;
1351     apr_interval_time_t timeout_interval;
1352     apr_time_t timeout_time = 0, now, last_log;
1353     listener_poll_type *pt;
1354     int closed = 0, listeners_disabled = 0;
1355
1356     last_log = apr_time_now();
1357     free(ti);
1358
1359     /* the following times out events that are really close in the future
1360      *   to prevent extra poll calls
1361      *
1362      * current value is .1 second
1363      */
1364 #define TIMEOUT_FUDGE_FACTOR 100000
1365 #define EVENT_FUDGE_FACTOR 10000
1366
1367     rc = init_pollset(tpool);
1368     if (rc != APR_SUCCESS) {
1369         ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
1370                      "failed to initialize pollset, "
1371                      "attempting to shutdown process gracefully");
1372         signal_threads(ST_GRACEFUL);
1373         return NULL;
1374     }
1375
1376     /* Unblock the signal used to wake this thread up, and set a handler for
1377      * it.
1378      */
1379     unblock_signal(LISTENER_SIGNAL);
1380     apr_signal(LISTENER_SIGNAL, dummy_signal_handler);
1381
1382     for (;;) {
1383         int workers_were_busy = 0;
1384         if (listener_may_exit) {
1385             close_listeners(process_slot, &closed);
1386             if (terminate_mode == ST_UNGRACEFUL
1387                 || apr_atomic_read32(&connection_count) == 0)
1388                 break;
1389         }
1390
1391         if (requests_this_child <= 0) {
1392             check_infinite_requests();
1393         }
1394
1395         now = apr_time_now();
1396         if (APLOGtrace6(ap_server_conf)) {
1397             /* trace log status every second */
1398             if (now - last_log > apr_time_from_msec(1000)) {
1399                 last_log = now;
1400                 apr_thread_mutex_lock(timeout_mutex);
1401                 ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf,
1402                              "connections: %d (write-completion: %d "
1403                              "keep-alive: %d lingering: %d)",
1404                              connection_count, write_completion_q.count,
1405                              keepalive_q.count,
1406                              linger_q.count + short_linger_q.count);
1407                 apr_thread_mutex_unlock(timeout_mutex);
1408             }
1409         }
1410
1411         apr_thread_mutex_lock(g_timer_ring_mtx);
1412         if (!APR_RING_EMPTY(&timer_ring, timer_event_t, link)) {
1413             te = APR_RING_FIRST(&timer_ring);
1414             if (te->when > now) {
1415                 timeout_interval = te->when - now;
1416             }
1417             else {
1418                 timeout_interval = 1;
1419             }
1420         }
1421         else {
1422             timeout_interval = apr_time_from_msec(100);
1423         }
1424         apr_thread_mutex_unlock(g_timer_ring_mtx);
1425
1426 #if HAVE_SERF
1427         rc = serf_context_prerun(g_serf);
1428         if (rc != APR_SUCCESS) {
1429             /* TOOD: what should do here? ugh. */
1430         }
1431 #endif
1432         rc = apr_pollset_poll(event_pollset, timeout_interval, &num, &out_pfd);
1433         if (rc != APR_SUCCESS) {
1434             if (APR_STATUS_IS_EINTR(rc)) {
1435                 continue;
1436             }
1437             if (!APR_STATUS_IS_TIMEUP(rc)) {
1438                 ap_log_error(APLOG_MARK, APLOG_CRIT, rc, ap_server_conf,
1439                              "apr_pollset_poll failed.  Attempting to "
1440                              "shutdown process gracefully");
1441                 signal_threads(ST_GRACEFUL);
1442             }
1443         }
1444
1445         if (listener_may_exit) {
1446             close_listeners(process_slot, &closed);
1447             if (terminate_mode == ST_UNGRACEFUL
1448                 || apr_atomic_read32(&connection_count) == 0)
1449                 break;
1450         }
1451
1452         now = apr_time_now();
1453         apr_thread_mutex_lock(g_timer_ring_mtx);
1454         for (ep = APR_RING_FIRST(&timer_ring);
1455              ep != APR_RING_SENTINEL(&timer_ring,
1456                                      timer_event_t, link);
1457              ep = APR_RING_FIRST(&timer_ring))
1458         {
1459             if (ep->when < now + EVENT_FUDGE_FACTOR) {
1460                 APR_RING_REMOVE(ep, link);
1461                 push_timer2worker(ep);
1462             }
1463             else {
1464                 break;
1465             }
1466         }
1467         apr_thread_mutex_unlock(g_timer_ring_mtx);
1468
1469         while (num) {
1470             pt = (listener_poll_type *) out_pfd->client_data;
1471             if (pt->type == PT_CSD) {
1472                 /* one of the sockets is readable */
1473                 struct timeout_queue *remove_from_q = &write_completion_q;
1474                 int blocking = 1;
1475                 cs = (conn_state_t *) pt->baton;
1476                 switch (cs->state) {
1477                 case CONN_STATE_CHECK_REQUEST_LINE_READABLE:
1478                     cs->state = CONN_STATE_READ_REQUEST_LINE;
1479                     remove_from_q = &keepalive_q;
1480                     /* don't wait for a worker for a keepalive request */
1481                     blocking = 0;
1482                     /* FALL THROUGH */
1483                 case CONN_STATE_WRITE_COMPLETION:
1484                     get_worker(&have_idle_worker, blocking,
1485                                &workers_were_busy);
1486                     apr_thread_mutex_lock(timeout_mutex);
1487                     TO_QUEUE_REMOVE(*remove_from_q, cs);
1488                     rc = apr_pollset_remove(event_pollset, &cs->pfd);
1489
1490                     /*
1491                      * Some of the pollset backends, like KQueue or Epoll
1492                      * automagically remove the FD if the socket is closed,
1493                      * therefore, we can accept _SUCCESS or _NOTFOUND,
1494                      * and we still want to keep going
1495                      */
1496                     if (rc != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rc)) {
1497                         ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
1498                                      "pollset remove failed");
1499                         apr_thread_mutex_unlock(timeout_mutex);
1500                         start_lingering_close(cs);
1501                         break;
1502                     }
1503
1504                     apr_thread_mutex_unlock(timeout_mutex);
1505                     TO_QUEUE_ELEM_INIT(cs);
1506                     /* If we didn't get a worker immediately for a keep-alive
1507                      * request, we close the connection, so that the client can
1508                      * re-connect to a different process.
1509                      */
1510                     if (!have_idle_worker) {
1511                         start_lingering_close(cs);
1512                         break;
1513                     }
1514                     rc = push2worker(out_pfd, event_pollset);
1515                     if (rc != APR_SUCCESS) {
1516                         ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1517                                      ap_server_conf, "push2worker failed");
1518                     }
1519                     else {
1520                         have_idle_worker = 0;
1521                     }
1522                     break;
1523                 case CONN_STATE_LINGER_NORMAL:
1524                 case CONN_STATE_LINGER_SHORT:
1525                     process_lingering_close(cs, out_pfd);
1526                     break;
1527                 default:
1528                     ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1529                                  ap_server_conf,
1530                                  "event_loop: unexpected state %d",
1531                                  cs->state);
1532                     ap_assert(0);
1533                 }
1534             }
1535             else if (pt->type == PT_ACCEPT) {
1536                 /* A Listener Socket is ready for an accept() */
1537                 if (workers_were_busy) {
1538                     if (!listeners_disabled)
1539                         disable_listensocks(process_slot);
1540                     listeners_disabled = 1;
1541                     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
1542                                  "All workers busy, not accepting new conns"
1543                                  "in this process");
1544                 }
1545                 else if (apr_atomic_read32(&connection_count) > threads_per_child
1546                          + ap_queue_info_get_idlers(worker_queue_info) *
1547                            worker_factor / WORKER_FACTOR_SCALE)
1548                 {
1549                     if (!listeners_disabled)
1550                         disable_listensocks(process_slot);
1551                     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
1552                                  "Too many open connections (%u), "
1553                                  "not accepting new conns in this process",
1554                                  apr_atomic_read32(&connection_count));
1555                     ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
1556                                  "Idle workers: %u",
1557                                  ap_queue_info_get_idlers(worker_queue_info));
1558                     listeners_disabled = 1;
1559                 }
1560                 else if (listeners_disabled) {
1561                     listeners_disabled = 0;
1562                     enable_listensocks(process_slot);
1563                 }
1564                 if (!listeners_disabled) {
1565                     lr = (ap_listen_rec *) pt->baton;
1566                     ap_pop_pool(&ptrans, worker_queue_info);
1567
1568                     if (ptrans == NULL) {
1569                         /* create a new transaction pool for each accepted socket */
1570                         apr_allocator_t *allocator;
1571
1572                         apr_allocator_create(&allocator);
1573                         apr_allocator_max_free_set(allocator,
1574                                                    ap_max_mem_free);
1575                         apr_pool_create_ex(&ptrans, pconf, NULL, allocator);
1576                         apr_allocator_owner_set(allocator, ptrans);
1577                         if (ptrans == NULL) {
1578                             ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1579                                          ap_server_conf,
1580                                          "Failed to create transaction pool");
1581                             signal_threads(ST_GRACEFUL);
1582                             return NULL;
1583                         }
1584                     }
1585                     apr_pool_tag(ptrans, "transaction");
1586
1587                     get_worker(&have_idle_worker, 1, &workers_were_busy);
1588                     rc = lr->accept_func(&csd, lr, ptrans);
1589
1590                     /* later we trash rv and rely on csd to indicate
1591                      * success/failure
1592                      */
1593                     AP_DEBUG_ASSERT(rc == APR_SUCCESS || !csd);
1594
1595                     if (rc == APR_EGENERAL) {
1596                         /* E[NM]FILE, ENOMEM, etc */
1597                         resource_shortage = 1;
1598                         signal_threads(ST_GRACEFUL);
1599                     }
1600
1601                     if (csd != NULL) {
1602                         rc = ap_queue_push(worker_queue, csd, NULL, ptrans);
1603                         if (rc != APR_SUCCESS) {
1604                             /* trash the connection; we couldn't queue the connected
1605                              * socket to a worker
1606                              */
1607                             apr_socket_close(csd);
1608                             ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1609                                          ap_server_conf,
1610                                          "ap_queue_push failed");
1611                             apr_pool_clear(ptrans);
1612                             ap_push_pool(worker_queue_info, ptrans);
1613                         }
1614                         else {
1615                             have_idle_worker = 0;
1616                         }
1617                     }
1618                     else {
1619                         apr_pool_clear(ptrans);
1620                         ap_push_pool(worker_queue_info, ptrans);
1621                     }
1622                 }
1623             }               /* if:else on pt->type */
1624 #if HAVE_SERF
1625             else if (pt->type == PT_SERF) {
1626                 /* send socket to serf. */
1627                 /* XXXX: this doesn't require get_worker() */
1628                 serf_event_trigger(g_serf, pt->baton, out_pfd);
1629             }
1630 #endif
1631             out_pfd++;
1632             num--;
1633         }                   /* while for processing poll */
1634
1635         /* XXX possible optimization: stash the current time for use as
1636          * r->request_time for new requests
1637          */
1638         now = apr_time_now();
1639         /* we only do this once per 0.1s (TIMEOUT_FUDGE_FACTOR) */
1640         if (now > timeout_time) {
1641             struct process_score *ps;
1642             timeout_time = now + TIMEOUT_FUDGE_FACTOR;
1643
1644             /* handle timed out sockets */
1645             apr_thread_mutex_lock(timeout_mutex);
1646
1647             /* Step 1: keepalive timeouts */
1648             /* If all workers are busy, we kill older keep-alive connections so that they
1649              * may connect to another process.
1650              */
1651             if (workers_were_busy && keepalive_q.count) {
1652                 ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
1653                              "All workers are busy, will close %d keep-alive "
1654                              "connections",
1655                              keepalive_q.count);
1656                 process_timeout_queue(&keepalive_q,
1657                                       timeout_time + ap_server_conf->keep_alive_timeout,
1658                                       start_lingering_close);
1659             }
1660             else {
1661                 process_timeout_queue(&keepalive_q, timeout_time,
1662                                       start_lingering_close);
1663             }
1664             /* Step 2: write completion timeouts */
1665             process_timeout_queue(&write_completion_q, timeout_time, start_lingering_close);
1666             /* Step 3: (normal) lingering close completion timeouts */
1667             process_timeout_queue(&linger_q, timeout_time, stop_lingering_close);
1668             /* Step 4: (short) lingering close completion timeouts */
1669             process_timeout_queue(&short_linger_q, timeout_time, stop_lingering_close);
1670
1671             ps = ap_get_scoreboard_process(process_slot);
1672             ps->write_completion = write_completion_q.count;
1673             ps->lingering_close = linger_q.count + short_linger_q.count;
1674             ps->keep_alive = keepalive_q.count;
1675             apr_thread_mutex_unlock(timeout_mutex);
1676
1677             ps->connections = apr_atomic_read32(&connection_count);
1678             /* XXX: should count CONN_STATE_SUSPENDED and set ps->suspended */
1679         }
1680         if (listeners_disabled && !workers_were_busy &&
1681             (int)apr_atomic_read32(&connection_count) <
1682             ((int)ap_queue_info_get_idlers(worker_queue_info) - 1) *
1683             worker_factor / WORKER_FACTOR_SCALE + threads_per_child)
1684         {
1685             listeners_disabled = 0;
1686             enable_listensocks(process_slot);
1687         }
1688         /*
1689          * XXX: do we need to set some timeout that re-enables the listensocks
1690          * XXX: in case no other event occurs?
1691          */
1692     }     /* listener main loop */
1693
1694     close_listeners(process_slot, &closed);
1695     ap_queue_term(worker_queue);
1696
1697     apr_thread_exit(thd, APR_SUCCESS);
1698     return NULL;
1699 }
1700
1701 /* XXX For ungraceful termination/restart, we definitely don't want to
1702  *     wait for active connections to finish but we may want to wait
1703  *     for idle workers to get out of the queue code and release mutexes,
1704  *     since those mutexes are cleaned up pretty soon and some systems
1705  *     may not react favorably (i.e., segfault) if operations are attempted
1706  *     on cleaned-up mutexes.
1707  */
1708 static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy)
1709 {
1710     proc_info *ti = dummy;
1711     int process_slot = ti->pid;
1712     int thread_slot = ti->tid;
1713     apr_socket_t *csd = NULL;
1714     conn_state_t *cs;
1715     apr_pool_t *ptrans;         /* Pool for per-transaction stuff */
1716     apr_status_t rv;
1717     int is_idle = 0;
1718     timer_event_t *te = NULL;
1719
1720     free(ti);
1721
1722     ap_scoreboard_image->servers[process_slot][thread_slot].pid = ap_my_pid;
1723     ap_scoreboard_image->servers[process_slot][thread_slot].tid = apr_os_thread_current();
1724     ap_scoreboard_image->servers[process_slot][thread_slot].generation = retained->my_generation;
1725     ap_update_child_status_from_indexes(process_slot, thread_slot,
1726                                         SERVER_STARTING, NULL);
1727
1728     while (!workers_may_exit) {
1729         if (!is_idle) {
1730             rv = ap_queue_info_set_idle(worker_queue_info, NULL);
1731             if (rv != APR_SUCCESS) {
1732                 ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf,
1733                              "ap_queue_info_set_idle failed. Attempting to "
1734                              "shutdown process gracefully.");
1735                 signal_threads(ST_GRACEFUL);
1736                 break;
1737             }
1738             is_idle = 1;
1739         }
1740
1741         ap_update_child_status_from_indexes(process_slot, thread_slot,
1742                                             dying ? SERVER_GRACEFUL : SERVER_READY, NULL);
1743       worker_pop:
1744         if (workers_may_exit) {
1745             break;
1746         }
1747
1748         te = NULL;
1749         rv = ap_queue_pop_something(worker_queue, &csd, &cs, &ptrans, &te);
1750
1751         if (rv != APR_SUCCESS) {
1752             /* We get APR_EOF during a graceful shutdown once all the
1753              * connections accepted by this server process have been handled.
1754              */
1755             if (APR_STATUS_IS_EOF(rv)) {
1756                 break;
1757             }
1758             /* We get APR_EINTR whenever ap_queue_pop() has been interrupted
1759              * from an explicit call to ap_queue_interrupt_all(). This allows
1760              * us to unblock threads stuck in ap_queue_pop() when a shutdown
1761              * is pending.
1762              *
1763              * If workers_may_exit is set and this is ungraceful termination/
1764              * restart, we are bound to get an error on some systems (e.g.,
1765              * AIX, which sanity-checks mutex operations) since the queue
1766              * may have already been cleaned up.  Don't log the "error" if
1767              * workers_may_exit is set.
1768              */
1769             else if (APR_STATUS_IS_EINTR(rv)) {
1770                 goto worker_pop;
1771             }
1772             /* We got some other error. */
1773             else if (!workers_may_exit) {
1774                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
1775                              "ap_queue_pop failed");
1776             }
1777             continue;
1778         }
1779         if (te != NULL) {
1780             te->cbfunc(te->baton);
1781
1782             {
1783                 apr_thread_mutex_lock(g_timer_ring_mtx);
1784                 APR_RING_INSERT_TAIL(&timer_free_ring, te, timer_event_t, link);
1785                 apr_thread_mutex_unlock(g_timer_ring_mtx);
1786             }
1787         }
1788         else {
1789             is_idle = 0;
1790             worker_sockets[thread_slot] = csd;
1791             rv = process_socket(thd, ptrans, csd, cs, process_slot, thread_slot);
1792             if (!rv) {
1793                 requests_this_child--;
1794             }
1795             worker_sockets[thread_slot] = NULL;
1796         }
1797     }
1798
1799     ap_update_child_status_from_indexes(process_slot, thread_slot,
1800                                         dying ? SERVER_DEAD :
1801                                         SERVER_GRACEFUL,
1802                                         (request_rec *) NULL);
1803
1804     apr_thread_exit(thd, APR_SUCCESS);
1805     return NULL;
1806 }
1807
1808 static int check_signal(int signum)
1809 {
1810     switch (signum) {
1811     case SIGTERM:
1812     case SIGINT:
1813         return 1;
1814     }
1815     return 0;
1816 }
1817
1818
1819
1820 static void create_listener_thread(thread_starter * ts)
1821 {
1822     int my_child_num = ts->child_num_arg;
1823     apr_threadattr_t *thread_attr = ts->threadattr;
1824     proc_info *my_info;
1825     apr_status_t rv;
1826
1827     my_info = (proc_info *) ap_malloc(sizeof(proc_info));
1828     my_info->pid = my_child_num;
1829     my_info->tid = -1;          /* listener thread doesn't have a thread slot */
1830     my_info->sd = 0;
1831     rv = apr_thread_create(&ts->listener, thread_attr, listener_thread,
1832                            my_info, pchild);
1833     if (rv != APR_SUCCESS) {
1834         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
1835                      "apr_thread_create: unable to create listener thread");
1836         /* let the parent decide how bad this really is */
1837         clean_child_exit(APEXIT_CHILDSICK);
1838     }
1839     apr_os_thread_get(&listener_os_thread, ts->listener);
1840 }
1841
1842 /* XXX under some circumstances not understood, children can get stuck
1843  *     in start_threads forever trying to take over slots which will
1844  *     never be cleaned up; for now there is an APLOG_DEBUG message issued
1845  *     every so often when this condition occurs
1846  */
1847 static void *APR_THREAD_FUNC start_threads(apr_thread_t * thd, void *dummy)
1848 {
1849     thread_starter *ts = dummy;
1850     apr_thread_t **threads = ts->threads;
1851     apr_threadattr_t *thread_attr = ts->threadattr;
1852     int child_num_arg = ts->child_num_arg;
1853     int my_child_num = child_num_arg;
1854     proc_info *my_info;
1855     apr_status_t rv;
1856     int i;
1857     int threads_created = 0;
1858     int listener_started = 0;
1859     int loops;
1860     int prev_threads_created;
1861     int max_recycled_pools = -1;
1862
1863     /* We must create the fd queues before we start up the listener
1864      * and worker threads. */
1865     worker_queue = apr_pcalloc(pchild, sizeof(*worker_queue));
1866     rv = ap_queue_init(worker_queue, threads_per_child, pchild);
1867     if (rv != APR_SUCCESS) {
1868         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
1869                      "ap_queue_init() failed");
1870         clean_child_exit(APEXIT_CHILDFATAL);
1871     }
1872
1873     if (ap_max_mem_free != APR_ALLOCATOR_MAX_FREE_UNLIMITED) {
1874         /* If we want to conserve memory, let's not keep an unlimited number of
1875          * pools & allocators.
1876          * XXX: This should probably be a separate config directive
1877          */
1878         max_recycled_pools = threads_per_child * 3 / 4 ;
1879     }
1880     rv = ap_queue_info_create(&worker_queue_info, pchild,
1881                               threads_per_child, max_recycled_pools);
1882     if (rv != APR_SUCCESS) {
1883         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
1884                      "ap_queue_info_create() failed");
1885         clean_child_exit(APEXIT_CHILDFATAL);
1886     }
1887
1888     /* Create the timeout mutex and main pollset before the listener
1889      * thread starts.
1890      */
1891     rv = apr_thread_mutex_create(&timeout_mutex, APR_THREAD_MUTEX_DEFAULT,
1892                                  pchild);
1893     if (rv != APR_SUCCESS) {
1894         ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf,
1895                      "creation of the timeout mutex failed.");
1896         clean_child_exit(APEXIT_CHILDFATAL);
1897     }
1898
1899     /* Create the main pollset */
1900     rv = apr_pollset_create(&event_pollset,
1901                             threads_per_child, /* XXX don't we need more, to handle
1902                                                 * connections in K-A or lingering
1903                                                 * close?
1904                                                 */
1905                             pchild, APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY);
1906     if (rv != APR_SUCCESS) {
1907         ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf,
1908                      "apr_pollset_create with Thread Safety failed.");
1909         clean_child_exit(APEXIT_CHILDFATAL);
1910     }
1911
1912     worker_sockets = apr_pcalloc(pchild, threads_per_child
1913                                  * sizeof(apr_socket_t *));
1914
1915     loops = prev_threads_created = 0;
1916     while (1) {
1917         /* threads_per_child does not include the listener thread */
1918         for (i = 0; i < threads_per_child; i++) {
1919             int status =
1920                 ap_scoreboard_image->servers[child_num_arg][i].status;
1921
1922             if (status != SERVER_GRACEFUL && status != SERVER_DEAD) {
1923                 continue;
1924             }
1925
1926             my_info = (proc_info *) ap_malloc(sizeof(proc_info));
1927             my_info->pid = my_child_num;
1928             my_info->tid = i;
1929             my_info->sd = 0;
1930
1931             /* We are creating threads right now */
1932             ap_update_child_status_from_indexes(my_child_num, i,
1933                                                 SERVER_STARTING, NULL);
1934             /* We let each thread update its own scoreboard entry.  This is
1935              * done because it lets us deal with tid better.
1936              */
1937             rv = apr_thread_create(&threads[i], thread_attr,
1938                                    worker_thread, my_info, pchild);
1939             if (rv != APR_SUCCESS) {
1940                 ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
1941                              "apr_thread_create: unable to create worker thread");
1942                 /* let the parent decide how bad this really is */
1943                 clean_child_exit(APEXIT_CHILDSICK);
1944             }
1945             threads_created++;
1946         }
1947
1948         /* Start the listener only when there are workers available */
1949         if (!listener_started && threads_created) {
1950             create_listener_thread(ts);
1951             listener_started = 1;
1952         }
1953
1954
1955         if (start_thread_may_exit || threads_created == threads_per_child) {
1956             break;
1957         }
1958         /* wait for previous generation to clean up an entry */
1959         apr_sleep(apr_time_from_sec(1));
1960         ++loops;
1961         if (loops % 120 == 0) { /* every couple of minutes */
1962             if (prev_threads_created == threads_created) {
1963                 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
1964                              "child %" APR_PID_T_FMT " isn't taking over "
1965                              "slots very quickly (%d of %d)",
1966                              ap_my_pid, threads_created,
1967                              threads_per_child);
1968             }
1969             prev_threads_created = threads_created;
1970         }
1971     }
1972
1973     /* What state should this child_main process be listed as in the
1974      * scoreboard...?
1975      *  ap_update_child_status_from_indexes(my_child_num, i, SERVER_STARTING,
1976      *                                      (request_rec *) NULL);
1977      *
1978      *  This state should be listed separately in the scoreboard, in some kind
1979      *  of process_status, not mixed in with the worker threads' status.
1980      *  "life_status" is almost right, but it's in the worker's structure, and
1981      *  the name could be clearer.   gla
1982      */
1983     apr_thread_exit(thd, APR_SUCCESS);
1984     return NULL;
1985 }
1986
1987 static void join_workers(apr_thread_t * listener, apr_thread_t ** threads)
1988 {
1989     int i;
1990     apr_status_t rv, thread_rv;
1991
1992     if (listener) {
1993         int iter;
1994
1995         /* deal with a rare timing window which affects waking up the
1996          * listener thread...  if the signal sent to the listener thread
1997          * is delivered between the time it verifies that the
1998          * listener_may_exit flag is clear and the time it enters a
1999          * blocking syscall, the signal didn't do any good...  work around
2000          * that by sleeping briefly and sending it again
2001          */
2002
2003         iter = 0;
2004         while (iter < 10 && !dying) {
2005             /* listener has not stopped accepting yet */
2006             apr_sleep(apr_time_make(0, 500000));
2007             wakeup_listener();
2008             ++iter;
2009         }
2010         if (iter >= 10) {
2011             ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
2012                          "the listener thread didn't stop accepting");
2013         }
2014         else {
2015             rv = apr_thread_join(&thread_rv, listener);
2016             if (rv != APR_SUCCESS) {
2017                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
2018                              "apr_thread_join: unable to join listener thread");
2019             }
2020         }
2021     }
2022
2023     for (i = 0; i < threads_per_child; i++) {
2024         if (threads[i]) {       /* if we ever created this thread */
2025             rv = apr_thread_join(&thread_rv, threads[i]);
2026             if (rv != APR_SUCCESS) {
2027                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
2028                              "apr_thread_join: unable to join worker "
2029                              "thread %d", i);
2030             }
2031         }
2032     }
2033 }
2034
2035 static void join_start_thread(apr_thread_t * start_thread_id)
2036 {
2037     apr_status_t rv, thread_rv;
2038
2039     start_thread_may_exit = 1;  /* tell it to give up in case it is still
2040                                  * trying to take over slots from a
2041                                  * previous generation
2042                                  */
2043     rv = apr_thread_join(&thread_rv, start_thread_id);
2044     if (rv != APR_SUCCESS) {
2045         ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
2046                      "apr_thread_join: unable to join the start " "thread");
2047     }
2048 }
2049
2050 static void child_main(int child_num_arg)
2051 {
2052     apr_thread_t **threads;
2053     apr_status_t rv;
2054     thread_starter *ts;
2055     apr_threadattr_t *thread_attr;
2056     apr_thread_t *start_thread_id;
2057
2058     mpm_state = AP_MPMQ_STARTING;       /* for benefit of any hooks that run as this
2059                                          * child initializes
2060                                          */
2061     ap_my_pid = getpid();
2062     ap_fatal_signal_child_setup(ap_server_conf);
2063     apr_pool_create(&pchild, pconf);
2064
2065     /*stuff to do before we switch id's, so we have permissions. */
2066     ap_reopen_scoreboard(pchild, NULL, 0);
2067
2068     if (ap_run_drop_privileges(pchild, ap_server_conf)) {
2069         clean_child_exit(APEXIT_CHILDFATAL);
2070     }
2071
2072     apr_thread_mutex_create(&g_timer_ring_mtx, APR_THREAD_MUTEX_DEFAULT, pchild);
2073     APR_RING_INIT(&timer_free_ring, timer_event_t, link);
2074     APR_RING_INIT(&timer_ring, timer_event_t, link);
2075     ap_run_child_init(pchild, ap_server_conf);
2076
2077     /* done with init critical section */
2078
2079     /* Just use the standard apr_setup_signal_thread to block all signals
2080      * from being received.  The child processes no longer use signals for
2081      * any communication with the parent process.
2082      */
2083     rv = apr_setup_signal_thread();
2084     if (rv != APR_SUCCESS) {
2085         ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf,
2086                      "Couldn't initialize signal thread");
2087         clean_child_exit(APEXIT_CHILDFATAL);
2088     }
2089
2090     if (ap_max_requests_per_child) {
2091         requests_this_child = ap_max_requests_per_child;
2092     }
2093     else {
2094         /* coding a value of zero means infinity */
2095         requests_this_child = INT_MAX;
2096     }
2097
2098     /* Setup worker threads */
2099
2100     /* clear the storage; we may not create all our threads immediately,
2101      * and we want a 0 entry to indicate a thread which was not created
2102      */
2103     threads = ap_calloc(threads_per_child, sizeof(apr_thread_t *));
2104     ts = apr_palloc(pchild, sizeof(*ts));
2105
2106     apr_threadattr_create(&thread_attr, pchild);
2107     /* 0 means PTHREAD_CREATE_JOINABLE */
2108     apr_threadattr_detach_set(thread_attr, 0);
2109
2110     if (ap_thread_stacksize != 0) {
2111         apr_threadattr_stacksize_set(thread_attr, ap_thread_stacksize);
2112     }
2113
2114     ts->threads = threads;
2115     ts->listener = NULL;
2116     ts->child_num_arg = child_num_arg;
2117     ts->threadattr = thread_attr;
2118
2119     rv = apr_thread_create(&start_thread_id, thread_attr, start_threads,
2120                            ts, pchild);
2121     if (rv != APR_SUCCESS) {
2122         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
2123                      "apr_thread_create: unable to create worker thread");
2124         /* let the parent decide how bad this really is */
2125         clean_child_exit(APEXIT_CHILDSICK);
2126     }
2127
2128     mpm_state = AP_MPMQ_RUNNING;
2129
2130     /* If we are only running in one_process mode, we will want to
2131      * still handle signals. */
2132     if (one_process) {
2133         /* Block until we get a terminating signal. */
2134         apr_signal_thread(check_signal);
2135         /* make sure the start thread has finished; signal_threads()
2136          * and join_workers() depend on that
2137          */
2138         /* XXX join_start_thread() won't be awakened if one of our
2139          *     threads encounters a critical error and attempts to
2140          *     shutdown this child
2141          */
2142         join_start_thread(start_thread_id);
2143
2144         /* helps us terminate a little more quickly than the dispatch of the
2145          * signal thread; beats the Pipe of Death and the browsers
2146          */
2147         signal_threads(ST_UNGRACEFUL);
2148
2149         /* A terminating signal was received. Now join each of the
2150          * workers to clean them up.
2151          *   If the worker already exited, then the join frees
2152          *   their resources and returns.
2153          *   If the worker hasn't exited, then this blocks until
2154          *   they have (then cleans up).
2155          */
2156         join_workers(ts->listener, threads);
2157     }
2158     else {                      /* !one_process */
2159         /* remove SIGTERM from the set of blocked signals...  if one of
2160          * the other threads in the process needs to take us down
2161          * (e.g., for MaxConnectionsPerChild) it will send us SIGTERM
2162          */
2163         unblock_signal(SIGTERM);
2164         apr_signal(SIGTERM, dummy_signal_handler);
2165         /* Watch for any messages from the parent over the POD */
2166         while (1) {
2167             rv = ap_event_pod_check(pod);
2168             if (rv == AP_NORESTART) {
2169                 /* see if termination was triggered while we slept */
2170                 switch (terminate_mode) {
2171                 case ST_GRACEFUL:
2172                     rv = AP_GRACEFUL;
2173                     break;
2174                 case ST_UNGRACEFUL:
2175                     rv = AP_RESTART;
2176                     break;
2177                 }
2178             }
2179             if (rv == AP_GRACEFUL || rv == AP_RESTART) {
2180                 /* make sure the start thread has finished;
2181                  * signal_threads() and join_workers depend on that
2182                  */
2183                 join_start_thread(start_thread_id);
2184                 signal_threads(rv ==
2185                                AP_GRACEFUL ? ST_GRACEFUL : ST_UNGRACEFUL);
2186                 break;
2187             }
2188         }
2189
2190         /* A terminating signal was received. Now join each of the
2191          * workers to clean them up.
2192          *   If the worker already exited, then the join frees
2193          *   their resources and returns.
2194          *   If the worker hasn't exited, then this blocks until
2195          *   they have (then cleans up).
2196          */
2197         join_workers(ts->listener, threads);
2198     }
2199
2200     free(threads);
2201
2202     clean_child_exit(resource_shortage ? APEXIT_CHILDSICK : 0);
2203 }
2204
2205 static int make_child(server_rec * s, int slot)
2206 {
2207     int pid;
2208
2209     if (slot + 1 > retained->max_daemons_limit) {
2210         retained->max_daemons_limit = slot + 1;
2211     }
2212
2213     if (one_process) {
2214         set_signals();
2215         event_note_child_started(slot, getpid());
2216         child_main(slot);
2217         /* NOTREACHED */
2218     }
2219
2220     if ((pid = fork()) == -1) {
2221         ap_log_error(APLOG_MARK, APLOG_ERR, errno, s,
2222                      "fork: Unable to fork new process");
2223
2224         /* fork didn't succeed.  There's no need to touch the scoreboard;
2225          * if we were trying to replace a failed child process, then
2226          * server_main_loop() marked its workers SERVER_DEAD, and if
2227          * we were trying to replace a child process that exited normally,
2228          * its worker_thread()s left SERVER_DEAD or SERVER_GRACEFUL behind.
2229          */
2230
2231         /* In case system resources are maxxed out, we don't want
2232            Apache running away with the CPU trying to fork over and
2233            over and over again. */
2234         apr_sleep(apr_time_from_sec(10));
2235
2236         return -1;
2237     }
2238
2239     if (!pid) {
2240 #ifdef HAVE_BINDPROCESSOR
2241         /* By default, AIX binds to a single processor.  This bit unbinds
2242          * children which will then bind to another CPU.
2243          */
2244         int status = bindprocessor(BINDPROCESS, (int) getpid(),
2245                                    PROCESSOR_CLASS_ANY);
2246         if (status != OK)
2247             ap_log_error(APLOG_MARK, APLOG_DEBUG, errno,
2248                          ap_server_conf,
2249                          "processor unbind failed");
2250 #endif
2251         RAISE_SIGSTOP(MAKE_CHILD);
2252
2253         apr_signal(SIGTERM, just_die);
2254         child_main(slot);
2255         /* NOTREACHED */
2256     }
2257     /* else */
2258     if (ap_scoreboard_image->parent[slot].pid != 0) {
2259         /* This new child process is squatting on the scoreboard
2260          * entry owned by an exiting child process, which cannot
2261          * exit until all active requests complete.
2262          */
2263         event_note_child_lost_slot(slot, pid);
2264     }
2265     ap_scoreboard_image->parent[slot].quiescing = 0;
2266     ap_scoreboard_image->parent[slot].not_accepting = 0;
2267     event_note_child_started(slot, pid);
2268     return 0;
2269 }
2270
2271 /* start up a bunch of children */
2272 static void startup_children(int number_to_start)
2273 {
2274     int i;
2275
2276     for (i = 0; number_to_start && i < ap_daemons_limit; ++i) {
2277         if (ap_scoreboard_image->parent[i].pid != 0) {
2278             continue;
2279         }
2280         if (make_child(ap_server_conf, i) < 0) {
2281             break;
2282         }
2283         --number_to_start;
2284     }
2285 }
2286
2287 static void perform_idle_server_maintenance(void)
2288 {
2289     int i, j;
2290     int idle_thread_count;
2291     worker_score *ws;
2292     process_score *ps;
2293     int free_length;
2294     int totally_free_length = 0;
2295     int free_slots[MAX_SPAWN_RATE];
2296     int last_non_dead;
2297     int total_non_dead;
2298     int active_thread_count = 0;
2299
2300     /* initialize the free_list */
2301     free_length = 0;
2302
2303     idle_thread_count = 0;
2304     last_non_dead = -1;
2305     total_non_dead = 0;
2306
2307     for (i = 0; i < ap_daemons_limit; ++i) {
2308         /* Initialization to satisfy the compiler. It doesn't know
2309          * that threads_per_child is always > 0 */
2310         int status = SERVER_DEAD;
2311         int any_dying_threads = 0;
2312         int any_dead_threads = 0;
2313         int all_dead_threads = 1;
2314
2315         if (i >= retained->max_daemons_limit
2316             && totally_free_length == retained->idle_spawn_rate)
2317             /* short cut if all active processes have been examined and
2318              * enough empty scoreboard slots have been found
2319              */
2320
2321             break;
2322         ps = &ap_scoreboard_image->parent[i];
2323         for (j = 0; j < threads_per_child; j++) {
2324             ws = &ap_scoreboard_image->servers[i][j];
2325             status = ws->status;
2326
2327             /* XXX any_dying_threads is probably no longer needed    GLA */
2328             any_dying_threads = any_dying_threads ||
2329                 (status == SERVER_GRACEFUL);
2330             any_dead_threads = any_dead_threads || (status == SERVER_DEAD);
2331             all_dead_threads = all_dead_threads &&
2332                 (status == SERVER_DEAD || status == SERVER_GRACEFUL);
2333
2334             /* We consider a starting server as idle because we started it
2335              * at least a cycle ago, and if it still hasn't finished starting
2336              * then we're just going to swamp things worse by forking more.
2337              * So we hopefully won't need to fork more if we count it.
2338              * This depends on the ordering of SERVER_READY and SERVER_STARTING.
2339              */
2340             if (ps->pid != 0) { /* XXX just set all_dead_threads in outer
2341                                    for loop if no pid?  not much else matters */
2342                 if (status <= SERVER_READY && !ps->quiescing && !ps->not_accepting
2343                     && ps->generation == retained->my_generation)
2344                 {
2345                     ++idle_thread_count;
2346                 }
2347                 if (status >= SERVER_READY && status < SERVER_GRACEFUL) {
2348                     ++active_thread_count;
2349                 }
2350             }
2351         }
2352         if (any_dead_threads
2353             && totally_free_length < retained->idle_spawn_rate
2354             && free_length < MAX_SPAWN_RATE
2355             && (!ps->pid      /* no process in the slot */
2356                   || ps->quiescing)) {  /* or at least one is going away */
2357             if (all_dead_threads) {
2358                 /* great! we prefer these, because the new process can
2359                  * start more threads sooner.  So prioritize this slot
2360                  * by putting it ahead of any slots with active threads.
2361                  *
2362                  * first, make room by moving a slot that's potentially still
2363                  * in use to the end of the array
2364                  */
2365                 free_slots[free_length] = free_slots[totally_free_length];
2366                 free_slots[totally_free_length++] = i;
2367             }
2368             else {
2369                 /* slot is still in use - back of the bus
2370                  */
2371                 free_slots[free_length] = i;
2372             }
2373             ++free_length;
2374         }
2375         /* XXX if (!ps->quiescing)     is probably more reliable  GLA */
2376         if (!any_dying_threads) {
2377             last_non_dead = i;
2378             ++total_non_dead;
2379         }
2380     }
2381
2382     if (retained->sick_child_detected) {
2383         if (active_thread_count > 0) {
2384             /* some child processes appear to be working.  don't kill the
2385              * whole server.
2386              */
2387             retained->sick_child_detected = 0;
2388         }
2389         else {
2390             /* looks like a basket case.  give up.
2391              */
2392             shutdown_pending = 1;
2393             child_fatal = 1;
2394             ap_log_error(APLOG_MARK, APLOG_ALERT, 0,
2395                          ap_server_conf,
2396                          "No active workers found..."
2397                          " Apache is exiting!");
2398             /* the child already logged the failure details */
2399             return;
2400         }
2401     }
2402
2403     retained->max_daemons_limit = last_non_dead + 1;
2404
2405     if (idle_thread_count > max_spare_threads) {
2406         /* Kill off one child */
2407         ap_event_pod_signal(pod, TRUE);
2408         retained->idle_spawn_rate = 1;
2409     }
2410     else if (idle_thread_count < min_spare_threads) {
2411         /* terminate the free list */
2412         if (free_length == 0) { /* scoreboard is full, can't fork */
2413
2414             if (active_thread_count >= ap_daemons_limit * threads_per_child) {
2415                 if (!retained->maxclients_reported) {
2416                     /* only report this condition once */
2417                     ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf,
2418                                  "server reached MaxRequestWorkers setting, "
2419                                  "consider raising the MaxRequestWorkers "
2420                                  "setting");
2421                     retained->maxclients_reported = 1;
2422                 }
2423             }
2424             else {
2425                 ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf,
2426                              "scoreboard is full, not at MaxRequestWorkers");
2427             }
2428             retained->idle_spawn_rate = 1;
2429         }
2430         else {
2431             if (free_length > retained->idle_spawn_rate) {
2432                 free_length = retained->idle_spawn_rate;
2433             }
2434             if (retained->idle_spawn_rate >= 8) {
2435                 ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf,
2436                              "server seems busy, (you may need "
2437                              "to increase StartServers, ThreadsPerChild "
2438                              "or Min/MaxSpareThreads), "
2439                              "spawning %d children, there are around %d idle "
2440                              "threads, and %d total children", free_length,
2441                              idle_thread_count, total_non_dead);
2442             }
2443             for (i = 0; i < free_length; ++i) {
2444                 make_child(ap_server_conf, free_slots[i]);
2445             }
2446             /* the next time around we want to spawn twice as many if this
2447              * wasn't good enough, but not if we've just done a graceful
2448              */
2449             if (retained->hold_off_on_exponential_spawning) {
2450                 --retained->hold_off_on_exponential_spawning;
2451             }
2452             else if (retained->idle_spawn_rate < MAX_SPAWN_RATE) {
2453                 retained->idle_spawn_rate *= 2;
2454             }
2455         }
2456     }
2457     else {
2458         retained->idle_spawn_rate = 1;
2459     }
2460 }
2461
2462 static void server_main_loop(int remaining_children_to_start)
2463 {
2464     ap_generation_t old_gen;
2465     int child_slot;
2466     apr_exit_why_e exitwhy;
2467     int status, processed_status;
2468     apr_proc_t pid;
2469     int i;
2470
2471     while (!restart_pending && !shutdown_pending) {
2472         ap_wait_or_timeout(&exitwhy, &status, &pid, pconf, ap_server_conf);
2473
2474         if (pid.pid != -1) {
2475             processed_status = ap_process_child_status(&pid, exitwhy, status);
2476             child_slot = ap_find_child_by_pid(&pid);
2477             if (processed_status == APEXIT_CHILDFATAL) {
2478                 /* fix race condition found in PR 39311
2479                  * A child created at the same time as a graceful happens
2480                  * can find the lock missing and create a fatal error.
2481                  * It is not fatal for the last generation to be in this state.
2482                  */
2483                 if (child_slot < 0
2484                     || ap_get_scoreboard_process(child_slot)->generation
2485                        == retained->my_generation) {
2486                     shutdown_pending = 1;
2487                     child_fatal = 1;
2488                     return;
2489                 }
2490                 else {
2491                     ap_log_error(APLOG_MARK, APLOG_WARNING, 0, ap_server_conf,
2492                                  "Ignoring fatal error in child of previous "
2493                                  "generation (pid %ld).",
2494                                  (long)pid.pid);
2495                     retained->sick_child_detected = 1;
2496                 }
2497             }
2498             else if (processed_status == APEXIT_CHILDSICK) {
2499                 /* tell perform_idle_server_maintenance to check into this
2500                  * on the next timer pop
2501                  */
2502                 retained->sick_child_detected = 1;
2503             }
2504             /* non-fatal death... note that it's gone in the scoreboard. */
2505             if (child_slot >= 0) {
2506                 for (i = 0; i < threads_per_child; i++)
2507                     ap_update_child_status_from_indexes(child_slot, i,
2508                                                         SERVER_DEAD,
2509                                                         (request_rec *) NULL);
2510
2511                 event_note_child_killed(child_slot, 0, 0);
2512                 ap_scoreboard_image->parent[child_slot].quiescing = 0;
2513                 if (processed_status == APEXIT_CHILDSICK) {
2514                     /* resource shortage, minimize the fork rate */
2515                     retained->idle_spawn_rate = 1;
2516                 }
2517                 else if (remaining_children_to_start
2518                          && child_slot < ap_daemons_limit) {
2519                     /* we're still doing a 1-for-1 replacement of dead
2520                      * children with new children
2521                      */
2522                     make_child(ap_server_conf, child_slot);
2523                     --remaining_children_to_start;
2524                 }
2525             }
2526             else if (ap_unregister_extra_mpm_process(pid.pid, &old_gen) == 1) {
2527
2528                 event_note_child_killed(-1, /* already out of the scoreboard */
2529                                         pid.pid, old_gen);
2530 #if APR_HAS_OTHER_CHILD
2531             }
2532             else if (apr_proc_other_child_alert(&pid, APR_OC_REASON_DEATH,
2533                                                 status) == 0) {
2534                 /* handled */
2535 #endif
2536             }
2537             else if (retained->is_graceful) {
2538                 /* Great, we've probably just lost a slot in the
2539                  * scoreboard.  Somehow we don't know about this child.
2540                  */
2541                 ap_log_error(APLOG_MARK, APLOG_WARNING, 0,
2542                              ap_server_conf,
2543                              "long lost child came home! (pid %ld)",
2544                              (long) pid.pid);
2545             }
2546             /* Don't perform idle maintenance when a child dies,
2547              * only do it when there's a timeout.  Remember only a
2548              * finite number of children can die, and it's pretty
2549              * pathological for a lot to die suddenly.
2550              */
2551             continue;
2552         }
2553         else if (remaining_children_to_start) {
2554             /* we hit a 1 second timeout in which none of the previous
2555              * generation of children needed to be reaped... so assume
2556              * they're all done, and pick up the slack if any is left.
2557              */
2558             startup_children(remaining_children_to_start);
2559             remaining_children_to_start = 0;
2560             /* In any event we really shouldn't do the code below because
2561              * few of the servers we just started are in the IDLE state
2562              * yet, so we'd mistakenly create an extra server.
2563              */
2564             continue;
2565         }
2566
2567         perform_idle_server_maintenance();
2568     }
2569 }
2570
2571 static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
2572 {
2573     int remaining_children_to_start;
2574
2575     ap_log_pid(pconf, ap_pid_fname);
2576
2577     if (!retained->is_graceful) {
2578         if (ap_run_pre_mpm(s->process->pool, SB_SHARED) != OK) {
2579             mpm_state = AP_MPMQ_STOPPING;
2580             return DONE;
2581         }
2582         /* fix the generation number in the global score; we just got a new,
2583          * cleared scoreboard
2584          */
2585         ap_scoreboard_image->global->running_generation = retained->my_generation;
2586     }
2587
2588     restart_pending = shutdown_pending = 0;
2589     set_signals();
2590     /* Don't thrash... */
2591     if (max_spare_threads < min_spare_threads + threads_per_child)
2592         max_spare_threads = min_spare_threads + threads_per_child;
2593
2594     /* If we're doing a graceful_restart then we're going to see a lot
2595      * of children exiting immediately when we get into the main loop
2596      * below (because we just sent them AP_SIG_GRACEFUL).  This happens pretty
2597      * rapidly... and for each one that exits we may start a new one, until
2598      * there are at least min_spare_threads idle threads, counting across
2599      * all children.  But we may be permitted to start more children than
2600      * that, so we'll just keep track of how many we're
2601      * supposed to start up without the 1 second penalty between each fork.
2602      */
2603     remaining_children_to_start = ap_daemons_to_start;
2604     if (remaining_children_to_start > ap_daemons_limit) {
2605         remaining_children_to_start = ap_daemons_limit;
2606     }
2607     if (!retained->is_graceful) {
2608         startup_children(remaining_children_to_start);
2609         remaining_children_to_start = 0;
2610     }
2611     else {
2612         /* give the system some time to recover before kicking into
2613          * exponential mode */
2614         retained->hold_off_on_exponential_spawning = 10;
2615     }
2616
2617     ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf,
2618                  "%s configured -- resuming normal operations",
2619                  ap_get_server_description());
2620     ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf,
2621                  "Server built: %s", ap_get_server_built());
2622     ap_log_command_line(plog, s);
2623
2624     mpm_state = AP_MPMQ_RUNNING;
2625
2626     server_main_loop(remaining_children_to_start);
2627     mpm_state = AP_MPMQ_STOPPING;
2628
2629     if (shutdown_pending && !retained->is_graceful) {
2630         /* Time to shut down:
2631          * Kill child processes, tell them to call child_exit, etc...
2632          */
2633         ap_event_pod_killpg(pod, ap_daemons_limit, FALSE);
2634         ap_reclaim_child_processes(1, /* Start with SIGTERM */
2635                                    event_note_child_killed);
2636
2637         if (!child_fatal) {
2638             /* cleanup pid file on normal shutdown */
2639             ap_remove_pid(pconf, ap_pid_fname);
2640             ap_log_error(APLOG_MARK, APLOG_NOTICE, 0,
2641                          ap_server_conf, "caught SIGTERM, shutting down");
2642         }
2643         return DONE;
2644     } else if (shutdown_pending) {
2645         /* Time to gracefully shut down:
2646          * Kill child processes, tell them to call child_exit, etc...
2647          */
2648         int active_children;
2649         int index;
2650         apr_time_t cutoff = 0;
2651
2652         /* Close our listeners, and then ask our children to do same */
2653         ap_close_listeners();
2654         ap_event_pod_killpg(pod, ap_daemons_limit, TRUE);
2655         ap_relieve_child_processes(event_note_child_killed);
2656
2657         if (!child_fatal) {
2658             /* cleanup pid file on normal shutdown */
2659             ap_remove_pid(pconf, ap_pid_fname);
2660             ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf,
2661                          "caught " AP_SIG_GRACEFUL_STOP_STRING
2662                          ", shutting down gracefully");
2663         }
2664
2665         if (ap_graceful_shutdown_timeout) {
2666             cutoff = apr_time_now() +
2667                      apr_time_from_sec(ap_graceful_shutdown_timeout);
2668         }
2669
2670         /* Don't really exit until each child has finished */
2671         shutdown_pending = 0;
2672         do {
2673             /* Pause for a second */
2674             apr_sleep(apr_time_from_sec(1));
2675
2676             /* Relieve any children which have now exited */
2677             ap_relieve_child_processes(event_note_child_killed);
2678
2679             active_children = 0;
2680             for (index = 0; index < ap_daemons_limit; ++index) {
2681                 if (ap_mpm_safe_kill(MPM_CHILD_PID(index), 0) == APR_SUCCESS) {
2682                     active_children = 1;
2683                     /* Having just one child is enough to stay around */
2684                     break;
2685                 }
2686             }
2687         } while (!shutdown_pending && active_children &&
2688                  (!ap_graceful_shutdown_timeout || apr_time_now() < cutoff));
2689
2690         /* We might be here because we received SIGTERM, either
2691          * way, try and make sure that all of our processes are
2692          * really dead.
2693          */
2694         ap_event_pod_killpg(pod, ap_daemons_limit, FALSE);
2695         ap_reclaim_child_processes(1, event_note_child_killed);
2696
2697         return DONE;
2698     }
2699
2700     /* we've been told to restart */
2701     apr_signal(SIGHUP, SIG_IGN);
2702
2703     if (one_process) {
2704         /* not worth thinking about */
2705         return DONE;
2706     }
2707
2708     /* advance to the next generation */
2709     /* XXX: we really need to make sure this new generation number isn't in
2710      * use by any of the children.
2711      */
2712     ++retained->my_generation;
2713     ap_scoreboard_image->global->running_generation = retained->my_generation;
2714
2715     if (retained->is_graceful) {
2716         ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf,
2717                      AP_SIG_GRACEFUL_STRING
2718                      " received.  Doing graceful restart");
2719         /* wake up the children...time to die.  But we'll have more soon */
2720         ap_event_pod_killpg(pod, ap_daemons_limit, TRUE);
2721
2722
2723         /* This is mostly for debugging... so that we know what is still
2724          * gracefully dealing with existing request.
2725          */
2726
2727     }
2728     else {
2729         /* Kill 'em all.  Since the child acts the same on the parents SIGTERM
2730          * and a SIGHUP, we may as well use the same signal, because some user
2731          * pthreads are stealing signals from us left and right.
2732          */
2733         ap_event_pod_killpg(pod, ap_daemons_limit, FALSE);
2734
2735         ap_reclaim_child_processes(1,  /* Start with SIGTERM */
2736                                    event_note_child_killed);
2737         ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf,
2738                      "SIGHUP received.  Attempting to restart");
2739     }
2740
2741     return OK;
2742 }
2743
2744 /* This really should be a post_config hook, but the error log is already
2745  * redirected by that point, so we need to do this in the open_logs phase.
2746  */
2747 static int event_open_logs(apr_pool_t * p, apr_pool_t * plog,
2748                            apr_pool_t * ptemp, server_rec * s)
2749 {
2750     int startup = 0;
2751     int level_flags = 0;
2752     apr_status_t rv;
2753
2754     pconf = p;
2755
2756     /* the reverse of pre_config, we want this only the first time around */
2757     if (retained->module_loads == 1) {
2758         startup = 1;
2759         level_flags |= APLOG_STARTUP;
2760     }
2761
2762     if ((num_listensocks = ap_setup_listeners(ap_server_conf)) < 1) {
2763         ap_log_error(APLOG_MARK, APLOG_ALERT | level_flags, 0,
2764                      (startup ? NULL : s),
2765                      "no listening sockets available, shutting down");
2766         return DONE;
2767     }
2768
2769     if (!one_process) {
2770         if ((rv = ap_event_pod_open(pconf, &pod))) {
2771             ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
2772                          (startup ? NULL : s),
2773                          "could not open pipe-of-death");
2774             return DONE;
2775         }
2776     }
2777     return OK;
2778 }
2779
2780 static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
2781                             apr_pool_t * ptemp)
2782 {
2783     int no_detach, debug, foreground;
2784     apr_status_t rv;
2785     const char *userdata_key = "mpm_event_module";
2786
2787     mpm_state = AP_MPMQ_STARTING;
2788
2789     debug = ap_exists_config_define("DEBUG");
2790
2791     if (debug) {
2792         foreground = one_process = 1;
2793         no_detach = 0;
2794     }
2795     else {
2796         one_process = ap_exists_config_define("ONE_PROCESS");
2797         no_detach = ap_exists_config_define("NO_DETACH");
2798         foreground = ap_exists_config_define("FOREGROUND");
2799     }
2800
2801     /* sigh, want this only the second time around */
2802     retained = ap_retained_data_get(userdata_key);
2803     if (!retained) {
2804         retained = ap_retained_data_create(userdata_key, sizeof(*retained));
2805         retained->max_daemons_limit = -1;
2806         retained->idle_spawn_rate = 1;
2807     }
2808     ++retained->module_loads;
2809     if (retained->module_loads == 2) {
2810         rv = apr_pollset_create(&event_pollset, 1, plog,
2811                                 APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY);
2812         if (rv != APR_SUCCESS) {
2813             ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL,
2814                          "Couldn't create a Thread Safe Pollset. "
2815                          "Is it supported on your platform?"
2816                          "Also check system or user limits!");
2817             return HTTP_INTERNAL_SERVER_ERROR;
2818         }
2819         apr_pollset_destroy(event_pollset);
2820
2821         if (!one_process && !foreground) {
2822             /* before we detach, setup crash handlers to log to errorlog */
2823             ap_fatal_signal_setup(ap_server_conf, pconf);
2824             rv = apr_proc_detach(no_detach ? APR_PROC_DETACH_FOREGROUND
2825                                  : APR_PROC_DETACH_DAEMONIZE);
2826             if (rv != APR_SUCCESS) {
2827                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL,
2828                              "apr_proc_detach failed");
2829                 return HTTP_INTERNAL_SERVER_ERROR;
2830             }
2831         }
2832     }
2833
2834     parent_pid = ap_my_pid = getpid();
2835
2836     ap_listen_pre_config();
2837     ap_daemons_to_start = DEFAULT_START_DAEMON;
2838     min_spare_threads = DEFAULT_MIN_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD;
2839     max_spare_threads = DEFAULT_MAX_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD;
2840     server_limit = DEFAULT_SERVER_LIMIT;
2841     thread_limit = DEFAULT_THREAD_LIMIT;
2842     ap_daemons_limit = server_limit;
2843     threads_per_child = DEFAULT_THREADS_PER_CHILD;
2844     max_workers = ap_daemons_limit * threads_per_child;
2845     ap_extended_status = 0;
2846
2847     return OK;
2848 }
2849
2850 static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
2851                               apr_pool_t *ptemp, server_rec *s)
2852 {
2853     int startup = 0;
2854
2855     /* the reverse of pre_config, we want this only the first time around */
2856     if (retained->module_loads == 1) {
2857         startup = 1;
2858     }
2859
2860     if (server_limit > MAX_SERVER_LIMIT) {
2861         if (startup) {
2862             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2863                          "WARNING: ServerLimit of %d exceeds compile-time "
2864                          "limit of", server_limit);
2865             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2866                          " %d servers, decreasing to %d.",
2867                          MAX_SERVER_LIMIT, MAX_SERVER_LIMIT);
2868         } else {
2869             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2870                          "ServerLimit of %d exceeds compile-time limit "
2871                          "of %d, decreasing to match",
2872                          server_limit, MAX_SERVER_LIMIT);
2873         }
2874         server_limit = MAX_SERVER_LIMIT;
2875     }
2876     else if (server_limit < 1) {
2877         if (startup) {
2878             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2879                          "WARNING: ServerLimit of %d not allowed, "
2880                          "increasing to 1.", server_limit);
2881         } else {
2882             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2883                          "ServerLimit of %d not allowed, increasing to 1",
2884                          server_limit);
2885         }
2886         server_limit = 1;
2887     }
2888
2889     /* you cannot change ServerLimit across a restart; ignore
2890      * any such attempts
2891      */
2892     if (!retained->first_server_limit) {
2893         retained->first_server_limit = server_limit;
2894     }
2895     else if (server_limit != retained->first_server_limit) {
2896         /* don't need a startup console version here */
2897         ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2898                      "changing ServerLimit to %d from original value of %d "
2899                      "not allowed during restart",
2900                      server_limit, retained->first_server_limit);
2901         server_limit = retained->first_server_limit;
2902     }
2903
2904     if (thread_limit > MAX_THREAD_LIMIT) {
2905         if (startup) {
2906             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2907                          "WARNING: ThreadLimit of %d exceeds compile-time "
2908                          "limit of", thread_limit);
2909             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2910                          " %d threads, decreasing to %d.",
2911                          MAX_THREAD_LIMIT, MAX_THREAD_LIMIT);
2912         } else {
2913             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2914                          "ThreadLimit of %d exceeds compile-time limit "
2915                          "of %d, decreasing to match",
2916                          thread_limit, MAX_THREAD_LIMIT);
2917         }
2918         thread_limit = MAX_THREAD_LIMIT;
2919     }
2920     else if (thread_limit < 1) {
2921         if (startup) {
2922             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2923                          "WARNING: ThreadLimit of %d not allowed, "
2924                          "increasing to 1.", thread_limit);
2925         } else {
2926             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2927                          "ThreadLimit of %d not allowed, increasing to 1",
2928                          thread_limit);
2929         }
2930         thread_limit = 1;
2931     }
2932
2933     /* you cannot change ThreadLimit across a restart; ignore
2934      * any such attempts
2935      */
2936     if (!retained->first_thread_limit) {
2937         retained->first_thread_limit = thread_limit;
2938     }
2939     else if (thread_limit != retained->first_thread_limit) {
2940         /* don't need a startup console version here */
2941         ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2942                      "changing ThreadLimit to %d from original value of %d "
2943                      "not allowed during restart",
2944                      thread_limit, retained->first_thread_limit);
2945         thread_limit = retained->first_thread_limit;
2946     }
2947
2948     if (threads_per_child > thread_limit) {
2949         if (startup) {
2950             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2951                          "WARNING: ThreadsPerChild of %d exceeds ThreadLimit "
2952                          "of", threads_per_child);
2953             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2954                          " %d threads, decreasing to %d.",
2955                          thread_limit, thread_limit);
2956             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2957                          " To increase, please see the ThreadLimit "
2958                          "directive.");
2959         } else {
2960             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2961                          "ThreadsPerChild of %d exceeds ThreadLimit "
2962                          "of %d, decreasing to match",
2963                          threads_per_child, thread_limit);
2964         }
2965         threads_per_child = thread_limit;
2966     }
2967     else if (threads_per_child < 1) {
2968         if (startup) {
2969             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2970                          "WARNING: ThreadsPerChild of %d not allowed, "
2971                          "increasing to 1.", threads_per_child);
2972         } else {
2973             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2974                          "ThreadsPerChild of %d not allowed, increasing to 1",
2975                          threads_per_child);
2976         }
2977         threads_per_child = 1;
2978     }
2979
2980     if (max_workers < threads_per_child) {
2981         if (startup) {
2982             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2983                          "WARNING: MaxRequestWorkers of %d is less than "
2984                          "ThreadsPerChild of", max_workers);
2985             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2986                          " %d, increasing to %d.  MaxRequestWorkers must be at "
2987                          "least as large",
2988                          threads_per_child, threads_per_child);
2989             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2990                          " as the number of threads in a single server.");
2991         } else {
2992             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2993                          "MaxRequestWorkers of %d is less than ThreadsPerChild "
2994                          "of %d, increasing to match",
2995                          max_workers, threads_per_child);
2996         }
2997         max_workers = threads_per_child;
2998     }
2999
3000     ap_daemons_limit = max_workers / threads_per_child;
3001
3002     if (max_workers % threads_per_child) {
3003         int tmp_max_workers = ap_daemons_limit * threads_per_child;
3004
3005         if (startup) {
3006             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3007                          "WARNING: MaxRequestWorkers of %d is not an integer "
3008                          "multiple of", max_workers);
3009             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3010                          " ThreadsPerChild of %d, decreasing to nearest "
3011                          "multiple %d,", threads_per_child,
3012                          tmp_max_workers);
3013             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3014                          " for a maximum of %d servers.",
3015                          ap_daemons_limit);
3016         } else {
3017             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
3018                          "MaxRequestWorkers of %d is not an integer multiple "
3019                          "of ThreadsPerChild of %d, decreasing to nearest "
3020                          "multiple %d", max_workers, threads_per_child,
3021                          tmp_max_workers);
3022         }
3023         max_workers = tmp_max_workers;
3024     }
3025
3026     if (ap_daemons_limit > server_limit) {
3027         if (startup) {
3028             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3029                          "WARNING: MaxRequestWorkers of %d would require %d "
3030                          "servers and ", max_workers, ap_daemons_limit);
3031             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3032                          " would exceed ServerLimit of %d, decreasing to %d.",
3033                          server_limit, server_limit * threads_per_child);
3034             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3035                          " To increase, please see the ServerLimit "
3036                          "directive.");
3037         } else {
3038             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
3039                          "MaxRequestWorkers of %d would require %d servers and "
3040                          "exceed ServerLimit of %d, decreasing to %d",
3041                          max_workers, ap_daemons_limit, server_limit,
3042                          server_limit * threads_per_child);
3043         }
3044         ap_daemons_limit = server_limit;
3045     }
3046
3047     /* ap_daemons_to_start > ap_daemons_limit checked in ap_mpm_run() */
3048     if (ap_daemons_to_start < 0) {
3049         if (startup) {
3050             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3051                          "WARNING: StartServers of %d not allowed, "
3052                          "increasing to 1.", ap_daemons_to_start);
3053         } else {
3054             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
3055                          "StartServers of %d not allowed, increasing to 1",
3056                          ap_daemons_to_start);
3057         }
3058         ap_daemons_to_start = 1;
3059     }
3060
3061     if (min_spare_threads < 1) {
3062         if (startup) {
3063             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3064                          "WARNING: MinSpareThreads of %d not allowed, "
3065                          "increasing to 1", min_spare_threads);
3066             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3067                          " to avoid almost certain server failure.");
3068             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
3069                          " Please read the documentation.");
3070         } else {
3071             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
3072                          "MinSpareThreads of %d not allowed, increasing to 1",
3073                          min_spare_threads);
3074         }
3075         min_spare_threads = 1;
3076     }
3077
3078     /* max_spare_threads < min_spare_threads + threads_per_child
3079      * checked in ap_mpm_run()
3080      */
3081
3082     return OK;
3083 }
3084
3085 static void event_hooks(apr_pool_t * p)
3086 {
3087     /* Our open_logs hook function must run before the core's, or stderr
3088      * will be redirected to a file, and the messages won't print to the
3089      * console.
3090      */
3091     static const char *const aszSucc[] = { "core.c", NULL };
3092     one_process = 0;
3093
3094     ap_hook_open_logs(event_open_logs, NULL, aszSucc, APR_HOOK_REALLY_FIRST);
3095     /* we need to set the MPM state before other pre-config hooks use MPM query
3096      * to retrieve it, so register as REALLY_FIRST
3097      */
3098     ap_hook_pre_config(event_pre_config, NULL, NULL, APR_HOOK_REALLY_FIRST);
3099     ap_hook_check_config(event_check_config, NULL, NULL, APR_HOOK_MIDDLE);
3100     ap_hook_mpm(event_run, NULL, NULL, APR_HOOK_MIDDLE);
3101     ap_hook_mpm_query(event_query, NULL, NULL, APR_HOOK_MIDDLE);
3102     ap_hook_mpm_register_timed_callback(event_register_timed_callback, NULL, NULL,
3103                                         APR_HOOK_MIDDLE);
3104     ap_hook_mpm_get_name(event_get_name, NULL, NULL, APR_HOOK_MIDDLE);
3105 }
3106
3107 static const char *set_daemons_to_start(cmd_parms *cmd, void *dummy,
3108                                         const char *arg)
3109 {
3110     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3111     if (err != NULL) {
3112         return err;
3113     }
3114
3115     ap_daemons_to_start = atoi(arg);
3116     return NULL;
3117 }
3118
3119 static const char *set_min_spare_threads(cmd_parms * cmd, void *dummy,
3120                                          const char *arg)
3121 {
3122     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3123     if (err != NULL) {
3124         return err;
3125     }
3126
3127     min_spare_threads = atoi(arg);
3128     return NULL;
3129 }
3130
3131 static const char *set_max_spare_threads(cmd_parms * cmd, void *dummy,
3132                                          const char *arg)
3133 {
3134     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3135     if (err != NULL) {
3136         return err;
3137     }
3138
3139     max_spare_threads = atoi(arg);
3140     return NULL;
3141 }
3142
3143 static const char *set_max_workers(cmd_parms * cmd, void *dummy,
3144                                    const char *arg)
3145 {
3146     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3147     if (err != NULL) {
3148         return err;
3149     }
3150     if (!strcasecmp(cmd->cmd->name, "MaxClients")) {
3151         ap_log_error(APLOG_MARK, APLOG_INFO, 0, NULL,
3152                      "MaxClients is deprecated, use MaxRequestWorkers "
3153                      "instead.");
3154     }
3155     max_workers = atoi(arg);
3156     return NULL;
3157 }
3158
3159 static const char *set_threads_per_child(cmd_parms * cmd, void *dummy,
3160                                          const char *arg)
3161 {
3162     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3163     if (err != NULL) {
3164         return err;
3165     }
3166
3167     threads_per_child = atoi(arg);
3168     return NULL;
3169 }
3170 static const char *set_server_limit (cmd_parms *cmd, void *dummy, const char *arg)
3171 {
3172     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3173     if (err != NULL) {
3174         return err;
3175     }
3176
3177     server_limit = atoi(arg);
3178     return NULL;
3179 }
3180
3181 static const char *set_thread_limit(cmd_parms * cmd, void *dummy,
3182                                     const char *arg)
3183 {
3184     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3185     if (err != NULL) {
3186         return err;
3187     }
3188
3189     thread_limit = atoi(arg);
3190     return NULL;
3191 }
3192
3193 static const char *set_worker_factor(cmd_parms * cmd, void *dummy,
3194                                      const char *arg)
3195 {
3196     double val;
3197     char *endptr;
3198     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
3199     if (err != NULL) {
3200         return err;
3201     }
3202
3203     val = strtod(arg, &endptr);
3204     if (*endptr)
3205         return "error parsing value";
3206
3207     worker_factor = val * WORKER_FACTOR_SCALE;
3208     if (worker_factor == 0)
3209         worker_factor = 1;
3210     return NULL;
3211 }
3212
3213
3214 static const command_rec event_cmds[] = {
3215     LISTEN_COMMANDS,
3216     AP_INIT_TAKE1("StartServers", set_daemons_to_start, NULL, RSRC_CONF,
3217                   "Number of child processes launched at server startup"),
3218     AP_INIT_TAKE1("ServerLimit", set_server_limit, NULL, RSRC_CONF,
3219                   "Maximum number of child processes for this run of Apache"),
3220     AP_INIT_TAKE1("MinSpareThreads", set_min_spare_threads, NULL, RSRC_CONF,
3221                   "Minimum number of idle threads, to handle request spikes"),
3222     AP_INIT_TAKE1("MaxSpareThreads", set_max_spare_threads, NULL, RSRC_CONF,
3223                   "Maximum number of idle threads"),
3224     AP_INIT_TAKE1("MaxClients", set_max_workers, NULL, RSRC_CONF,
3225                   "Deprecated name of MaxRequestWorkers"),
3226     AP_INIT_TAKE1("MaxRequestWorkers", set_max_workers, NULL, RSRC_CONF,
3227                   "Maximum number of threads alive at the same time"),
3228     AP_INIT_TAKE1("ThreadsPerChild", set_threads_per_child, NULL, RSRC_CONF,
3229                   "Number of threads each child creates"),
3230     AP_INIT_TAKE1("ThreadLimit", set_thread_limit, NULL, RSRC_CONF,
3231                   "Maximum number of worker threads per child process for this "
3232                   "run of Apache - Upper limit for ThreadsPerChild"),
3233     AP_INIT_TAKE1("AsyncRequestWorkerFactor", set_worker_factor, NULL, RSRC_CONF,
3234                   "How many additional connects will be accepted per idle "
3235                   "worker thread"),
3236     AP_GRACEFUL_SHUTDOWN_TIMEOUT_COMMAND,
3237     {NULL}
3238 };
3239
3240 AP_DECLARE_MODULE(mpm_event) = {
3241     MPM20_MODULE_STUFF,
3242     NULL,                       /* hook to run before apache parses args */
3243     NULL,                       /* create per-directory config structure */
3244     NULL,                       /* merge per-directory config structures */
3245     NULL,                       /* create per-server config structure */
3246     NULL,                       /* merge per-server config structures */
3247     event_cmds,                 /* command apr_table_t */
3248     event_hooks                 /* register_hooks */
3249 };