granicus.if.org Git - apache/blob - server/mpm/event/event.c

   1 /* Licensed to the Apache Software Foundation (ASF) under one or more
   2  * contributor license agreements.  See the NOTICE file distributed with
   3  * this work for additional information regarding copyright ownership.
   4  * The ASF licenses this file to You under the Apache License, Version 2.0
   5  * (the "License"); you may not use this file except in compliance with
   6  * the License.  You may obtain a copy of the License at
   7  *
   8  *     http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /**
  18  * This MPM tries to fix the 'keep alive problem' in HTTP.
  19  *
  20  * After a client completes the first request, the client can keep the
  21  * connection open to send more requests with the same socket.  This can save
  22  * signifigant overhead in creating TCP connections.  However, the major
  23  * disadvantage is that Apache traditionally keeps an entire child
  24  * process/thread waiting for data from the client.  To solve this problem,
  25  * this MPM has a dedicated thread for handling both the Listenting sockets,
  26  * and all sockets that are in a Keep Alive status.
  27  *
  28  * The MPM assumes the underlying apr_pollset implementation is somewhat
  29  * threadsafe.  This currently is only compatible with KQueue and EPoll.  This
  30  * enables the MPM to avoid extra high level locking or having to wake up the
  31  * listener thread when a keep-alive socket needs to be sent to it.
  32  *
  33  * This MPM not preform well on older platforms that do not have very good
  34  * threading, like Linux with a 2.4 kernel, but this does not matter, since we
  35  * require EPoll or KQueue.
  36  *
  37  * For FreeBSD, use 5.3.  It is possible to run this MPM on FreeBSD 5.2.1, if
  38  * you use libkse (see `man libmap.conf`).
  39  *
  40  * For NetBSD, use at least 2.0.
  41  *
  42  * For Linux, you should use a 2.6 kernel, and make sure your glibc has epoll
  43  * support compiled in.
  44  *
  45  */
  46
  47 #include "apr.h"
  48 #include "apr_portable.h"
  49 #include "apr_strings.h"
  50 #include "apr_file_io.h"
  51 #include "apr_thread_proc.h"
  52 #include "apr_signal.h"
  53 #include "apr_thread_mutex.h"
  54 #include "apr_poll.h"
  55 #include "apr_ring.h"
  56 #include "apr_queue.h"
  57 #define APR_WANT_STRFUNC
  58 #include "apr_want.h"
  59 #include "apr_version.h"
  60
  61 #if APR_HAVE_UNISTD_H
  62 #include <unistd.h>
  63 #endif
  64 #if APR_HAVE_SYS_SOCKET_H
  65 #include <sys/socket.h>
  66 #endif
  67 #if APR_HAVE_SYS_WAIT_H
  68 #include <sys/wait.h>
  69 #endif
  70 #ifdef HAVE_SYS_PROCESSOR_H
  71 #include <sys/processor.h>      /* for bindprocessor() */
  72 #endif
  73
  74 #if !APR_HAS_THREADS
  75 #error The Event MPM requires APR threads, but they are unavailable.
  76 #endif
  77
  78 #include "ap_config.h"
  79 #include "httpd.h"
  80 #include "http_main.h"
  81 #include "http_log.h"
  82 #include "http_config.h"        /* for read_config */
  83 #include "http_core.h"          /* for get_remote_host */
  84 #include "http_connection.h"
  85 #include "ap_mpm.h"
  86 #include "pod.h"
  87 #include "mpm_common.h"
  88 #include "ap_listen.h"
  89 #include "scoreboard.h"
  90 #include "fdqueue.h"
  91 #include "mpm_default.h"
  92 #include "http_vhost.h"
  93 #include "unixd.h"
  94
  95 #include <signal.h>
  96 #include <limits.h>             /* for INT_MAX */
  97
  98
  99 #if HAVE_SERF
 100 #include "mod_serf.h"
 101 #include "serf.h"
 102 #endif
 103
 104 /* Limit on the total --- clients will be locked out if more servers than
 105  * this are needed.  It is intended solely to keep the server from crashing
 106  * when things get out of hand.
 107  *
 108  * We keep a hard maximum number of servers, for two reasons --- first off,
 109  * in case something goes seriously wrong, we want to stop the fork bomb
 110  * short of actually crashing the machine we're running on by filling some
 111  * kernel table.  Secondly, it keeps the size of the scoreboard file small
 112  * enough that we can read the whole thing without worrying too much about
 113  * the overhead.
 114  */
 115 #ifndef DEFAULT_SERVER_LIMIT
 116 #define DEFAULT_SERVER_LIMIT 16
 117 #endif
 118
 119 /* Admin can't tune ServerLimit beyond MAX_SERVER_LIMIT.  We want
 120  * some sort of compile-time limit to help catch typos.
 121  */
 122 #ifndef MAX_SERVER_LIMIT
 123 #define MAX_SERVER_LIMIT 20000
 124 #endif
 125
 126 /* Limit on the threads per process.  Clients will be locked out if more than
 127  * this are needed.
 128  *
 129  * We keep this for one reason it keeps the size of the scoreboard file small
 130  * enough that we can read the whole thing without worrying too much about
 131  * the overhead.
 132  */
 133 #ifndef DEFAULT_THREAD_LIMIT
 134 #define DEFAULT_THREAD_LIMIT 64
 135 #endif
 136
 137 /* Admin can't tune ThreadLimit beyond MAX_THREAD_LIMIT.  We want
 138  * some sort of compile-time limit to help catch typos.
 139  */
 140 #ifndef MAX_THREAD_LIMIT
 141 #define MAX_THREAD_LIMIT 100000
 142 #endif
 143
 144 #define MPM_CHILD_PID(i) (ap_scoreboard_image->parent[i].pid)
 145
 146 #if !APR_VERSION_AT_LEAST(1,4,0)
 147 #define apr_time_from_msec(x) (x * 1000)
 148 #endif
 149
 150 /*
 151  * Actual definitions of config globals
 152  */
 153
 154 static int threads_per_child = 0;   /* Worker threads per child */
 155 static int ap_daemons_to_start = 0;
 156 static int min_spare_threads = 0;
 157 static int max_spare_threads = 0;
 158 static int ap_daemons_limit = 0;
 159 static int max_clients = 0;
 160 static int server_limit = 0;
 161 static int thread_limit = 0;
 162 static int dying = 0;
 163 static int workers_may_exit = 0;
 164 static int start_thread_may_exit = 0;
 165 static int listener_may_exit = 0;
 166 static int requests_this_child;
 167 static int num_listensocks = 0;
 168 static int resource_shortage = 0;
 169 static fd_queue_t *worker_queue;
 170 static fd_queue_info_t *worker_queue_info;
 171 static int mpm_state = AP_MPMQ_STARTING;
 172 static int sick_child_detected;
 173 static ap_generation_t volatile my_generation = 0;
 174
 175 static apr_thread_mutex_t *timeout_mutex;
 176 APR_RING_HEAD(timeout_head_t, conn_state_t);
 177 static struct timeout_head_t timeout_head, keepalive_timeout_head;
 178
 179 static apr_pollset_t *event_pollset;
 180
 181 #if HAVE_SERF
 182 typedef struct {
 183     apr_pollset_t *pollset;
 184     apr_pool_t *pool;
 185 } s_baton_t;
 186
 187 static serf_context_t *g_serf;
 188 #endif
 189
 190 /* The structure used to pass unique initialization info to each thread */
 191 typedef struct
 192 {
 193     int pid;
 194     int tid;
 195     int sd;
 196 } proc_info;
 197
 198 /* Structure used to pass information to the thread responsible for
 199  * creating the rest of the threads.
 200  */
 201 typedef struct
 202 {
 203     apr_thread_t **threads;
 204     apr_thread_t *listener;
 205     int child_num_arg;
 206     apr_threadattr_t *threadattr;
 207 } thread_starter;
 208
 209 typedef enum
 210 {
 211     PT_CSD,
 212     PT_ACCEPT
 213 #if HAVE_SERF
 214     , PT_SERF
 215 #endif
 216 } poll_type_e;
 217
 218 typedef struct
 219 {
 220     poll_type_e type;
 221     int bypass_push;
 222     void *baton;
 223 } listener_poll_type;
 224
 225 /* data retained by event across load/unload of the module
 226  * allocated on first call to pre-config hook; located on
 227  * subsequent calls to pre-config hook
 228  */
 229 typedef struct event_retained_data {
 230     int first_server_limit;
 231     int first_thread_limit;
 232     int module_loads;
 233 } event_retained_data;
 234 static event_retained_data *retained;
 235
 236 #define ID_FROM_CHILD_THREAD(c, t)    ((c * thread_limit) + t)
 237
 238 /*
 239  * The max child slot ever assigned, preserved across restarts.  Necessary
 240  * to deal with MaxClients changes across AP_SIG_GRACEFUL restarts.  We
 241  * use this value to optimize routines that have to scan the entire
 242  * scoreboard.
 243  */
 244 static int max_daemons_limit = -1;
 245
 246 static ap_event_pod_t *pod;
 247
 248 /* The event MPM respects a couple of runtime flags that can aid
 249  * in debugging. Setting the -DNO_DETACH flag will prevent the root process
 250  * from detaching from its controlling terminal. Additionally, setting
 251  * the -DONE_PROCESS flag (which implies -DNO_DETACH) will get you the
 252  * child_main loop running in the process which originally started up.
 253  * This gives you a pretty nice debugging environment.  (You'll get a SIGHUP
 254  * early in standalone_main; just continue through.  This is the server
 255  * trying to kill off any child processes which it might have lying
 256  * around --- Apache doesn't keep track of their pids, it just sends
 257  * SIGHUP to the process group, ignoring it in the root process.
 258  * Continue through and you'll be fine.).
 259  */
 260
 261 static int one_process = 0;
 262
 263 #ifdef DEBUG_SIGSTOP
 264 int raise_sigstop_flags;
 265 #endif
 266
 267 static apr_pool_t *pconf;       /* Pool for config stuff */
 268 static apr_pool_t *pchild;      /* Pool for httpd child stuff */
 269
 270 static pid_t ap_my_pid;         /* Linux getpid() doesn't work except in main
 271                                    thread. Use this instead */
 272 static pid_t parent_pid;
 273 static apr_os_thread_t *listener_os_thread;
 274
 275 /* The LISTENER_SIGNAL signal will be sent from the main thread to the
 276  * listener thread to wake it up for graceful termination (what a child
 277  * process from an old generation does when the admin does "apachectl
 278  * graceful").  This signal will be blocked in all threads of a child
 279  * process except for the listener thread.
 280  */
 281 #define LISTENER_SIGNAL     SIGHUP
 282
 283 /* An array of socket descriptors in use by each thread used to
 284  * perform a non-graceful (forced) shutdown of the server.
 285  */
 286 static apr_socket_t **worker_sockets;
 287
 288 static void close_worker_sockets(void)
 289 {
 290     int i;
 291     for (i = 0; i < threads_per_child; i++) {
 292         if (worker_sockets[i]) {
 293             apr_socket_close(worker_sockets[i]);
 294             worker_sockets[i] = NULL;
 295         }
 296     }
 297 }
 298
 299 static void wakeup_listener(void)
 300 {
 301     listener_may_exit = 1;
 302     if (!listener_os_thread) {
 303         /* XXX there is an obscure path that this doesn't handle perfectly:
 304          *     right after listener thread is created but before
 305          *     listener_os_thread is set, the first worker thread hits an
 306          *     error and starts graceful termination
 307          */
 308         return;
 309     }
 310
 311     /* unblock the listener if it's waiting for a worker */
 312     ap_queue_info_term(worker_queue_info);
 313
 314     /*
 315      * we should just be able to "kill(ap_my_pid, LISTENER_SIGNAL)" on all
 316      * platforms and wake up the listener thread since it is the only thread
 317      * with SIGHUP unblocked, but that doesn't work on Linux
 318      */
 319 #ifdef HAVE_PTHREAD_KILL
 320     pthread_kill(*listener_os_thread, LISTENER_SIGNAL);
 321 #else
 322     kill(ap_my_pid, LISTENER_SIGNAL);
 323 #endif
 324 }
 325
 326 #define ST_INIT              0
 327 #define ST_GRACEFUL          1
 328 #define ST_UNGRACEFUL        2
 329
 330 static int terminate_mode = ST_INIT;
 331
 332 static void signal_threads(int mode)
 333 {
 334     if (terminate_mode == mode) {
 335         return;
 336     }
 337     terminate_mode = mode;
 338     mpm_state = AP_MPMQ_STOPPING;
 339
 340     /* in case we weren't called from the listener thread, wake up the
 341      * listener thread
 342      */
 343     wakeup_listener();
 344
 345     /* for ungraceful termination, let the workers exit now;
 346      * for graceful termination, the listener thread will notify the
 347      * workers to exit once it has stopped accepting new connections
 348      */
 349     if (mode == ST_UNGRACEFUL) {
 350         workers_may_exit = 1;
 351         ap_queue_interrupt_all(worker_queue);
 352         close_worker_sockets(); /* forcefully kill all current connections */
 353     }
 354 }
 355
 356 static int event_query(int query_code, int *result, apr_status_t *rv)
 357 {
 358     *rv = APR_SUCCESS;
 359     switch (query_code) {
 360     case AP_MPMQ_MAX_DAEMON_USED:
 361         *result = max_daemons_limit;
 362         break;
 363     case AP_MPMQ_IS_THREADED:
 364         *result = AP_MPMQ_STATIC;
 365         break;
 366     case AP_MPMQ_IS_FORKED:
 367         *result = AP_MPMQ_DYNAMIC;
 368         break;
 369     case AP_MPMQ_IS_ASYNC:
 370         *result = 1;
 371         break;
 372     case AP_MPMQ_HAS_SERF:
 373         *result = 1;
 374         break;
 375     case AP_MPMQ_HARD_LIMIT_DAEMONS:
 376         *result = server_limit;
 377         break;
 378     case AP_MPMQ_HARD_LIMIT_THREADS:
 379         *result = thread_limit;
 380         break;
 381     case AP_MPMQ_MAX_THREADS:
 382         *result = threads_per_child;
 383         break;
 384     case AP_MPMQ_MIN_SPARE_DAEMONS:
 385         *result = 0;
 386         break;
 387     case AP_MPMQ_MIN_SPARE_THREADS:
 388         *result = min_spare_threads;
 389         break;
 390     case AP_MPMQ_MAX_SPARE_DAEMONS:
 391         *result = 0;
 392         break;
 393     case AP_MPMQ_MAX_SPARE_THREADS:
 394         *result = max_spare_threads;
 395         break;
 396     case AP_MPMQ_MAX_REQUESTS_DAEMON:
 397         *result = ap_max_requests_per_child;
 398         break;
 399     case AP_MPMQ_MAX_DAEMONS:
 400         *result = ap_daemons_limit;
 401         break;
 402     case AP_MPMQ_MPM_STATE:
 403         *result = mpm_state;
 404         break;
 405     case AP_MPMQ_GENERATION:
 406         *result = my_generation;
 407         break;
 408     default:
 409         *rv = APR_ENOTIMPL;
 410         break;
 411     }
 412     return OK;
 413 }
 414
 415 static apr_status_t event_note_child_killed(int childnum)
 416 {
 417     ap_scoreboard_image->parent[childnum].pid = 0;
 418     return APR_SUCCESS;
 419 }
 420
 421 static const char *event_get_name(void)
 422 {
 423     return "event";
 424 }
 425
 426 /* a clean exit from a child with proper cleanup */
 427 static void clean_child_exit(int code) __attribute__ ((noreturn));
 428 static void clean_child_exit(int code)
 429 {
 430     mpm_state = AP_MPMQ_STOPPING;
 431     if (pchild) {
 432         apr_pool_destroy(pchild);
 433     }
 434     exit(code);
 435 }
 436
 437 static void just_die(int sig)
 438 {
 439     clean_child_exit(0);
 440 }
 441
 442 /*****************************************************************
 443  * Connection structures and accounting...
 444  */
 445
 446 /* volatile just in case */
 447 static int volatile shutdown_pending;
 448 static int volatile restart_pending;
 449 static int volatile is_graceful;
 450 static volatile int child_fatal;
 451
 452 /*
 453  * ap_start_shutdown() and ap_start_restart(), below, are a first stab at
 454  * functions to initiate shutdown or restart without relying on signals.
 455  * Previously this was initiated in sig_term() and restart() signal handlers,
 456  * but we want to be able to start a shutdown/restart from other sources --
 457  * e.g. on Win32, from the service manager. Now the service manager can
 458  * call ap_start_shutdown() or ap_start_restart() as appropiate.  Note that
 459  * these functions can also be called by the child processes, since global
 460  * variables are no longer used to pass on the required action to the parent.
 461  *
 462  * These should only be called from the parent process itself, since the
 463  * parent process will use the shutdown_pending and restart_pending variables
 464  * to determine whether to shutdown or restart. The child process should
 465  * call signal_parent() directly to tell the parent to die -- this will
 466  * cause neither of those variable to be set, which the parent will
 467  * assume means something serious is wrong (which it will be, for the
 468  * child to force an exit) and so do an exit anyway.
 469  */
 470
 471 static void ap_start_shutdown(int graceful)
 472 {
 473     mpm_state = AP_MPMQ_STOPPING;
 474     if (shutdown_pending == 1) {
 475         /* Um, is this _probably_ not an error, if the user has
 476          * tried to do a shutdown twice quickly, so we won't
 477          * worry about reporting it.
 478          */
 479         return;
 480     }
 481     shutdown_pending = 1;
 482     is_graceful = graceful;
 483 }
 484
 485 /* do a graceful restart if graceful == 1 */
 486 static void ap_start_restart(int graceful)
 487 {
 488     mpm_state = AP_MPMQ_STOPPING;
 489     if (restart_pending == 1) {
 490         /* Probably not an error - don't bother reporting it */
 491         return;
 492     }
 493     restart_pending = 1;
 494     is_graceful = graceful;
 495 }
 496
 497 static void sig_term(int sig)
 498 {
 499     ap_start_shutdown(sig == AP_SIG_GRACEFUL_STOP);
 500 }
 501
 502 static void restart(int sig)
 503 {
 504     ap_start_restart(sig == AP_SIG_GRACEFUL);
 505 }
 506
 507 static void set_signals(void)
 508 {
 509 #ifndef NO_USE_SIGACTION
 510     struct sigaction sa;
 511 #endif
 512
 513     if (!one_process) {
 514         ap_fatal_signal_setup(ap_server_conf, pconf);
 515     }
 516
 517 #ifndef NO_USE_SIGACTION
 518     sigemptyset(&sa.sa_mask);
 519     sa.sa_flags = 0;
 520
 521     sa.sa_handler = sig_term;
 522     if (sigaction(SIGTERM, &sa, NULL) < 0)
 523         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 524                      "sigaction(SIGTERM)");
 525 #ifdef AP_SIG_GRACEFUL_STOP
 526     if (sigaction(AP_SIG_GRACEFUL_STOP, &sa, NULL) < 0)
 527         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 528                      "sigaction(" AP_SIG_GRACEFUL_STOP_STRING ")");
 529 #endif
 530 #ifdef SIGINT
 531     if (sigaction(SIGINT, &sa, NULL) < 0)
 532         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 533                      "sigaction(SIGINT)");
 534 #endif
 535 #ifdef SIGXCPU
 536     sa.sa_handler = SIG_DFL;
 537     if (sigaction(SIGXCPU, &sa, NULL) < 0)
 538         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 539                      "sigaction(SIGXCPU)");
 540 #endif
 541 #ifdef SIGXFSZ
 542     sa.sa_handler = SIG_DFL;
 543     if (sigaction(SIGXFSZ, &sa, NULL) < 0)
 544         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 545                      "sigaction(SIGXFSZ)");
 546 #endif
 547 #ifdef SIGPIPE
 548     sa.sa_handler = SIG_IGN;
 549     if (sigaction(SIGPIPE, &sa, NULL) < 0)
 550         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 551                      "sigaction(SIGPIPE)");
 552 #endif
 553
 554     /* we want to ignore HUPs and AP_SIG_GRACEFUL while we're busy
 555      * processing one */
 556     sigaddset(&sa.sa_mask, SIGHUP);
 557     sigaddset(&sa.sa_mask, AP_SIG_GRACEFUL);
 558     sa.sa_handler = restart;
 559     if (sigaction(SIGHUP, &sa, NULL) < 0)
 560         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 561                      "sigaction(SIGHUP)");
 562     if (sigaction(AP_SIG_GRACEFUL, &sa, NULL) < 0)
 563         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 564                      "sigaction(" AP_SIG_GRACEFUL_STRING ")");
 565 #else
 566     if (!one_process) {
 567 #ifdef SIGXCPU
 568         apr_signal(SIGXCPU, SIG_DFL);
 569 #endif /* SIGXCPU */
 570 #ifdef SIGXFSZ
 571         apr_signal(SIGXFSZ, SIG_DFL);
 572 #endif /* SIGXFSZ */
 573     }
 574
 575     apr_signal(SIGTERM, sig_term);
 576 #ifdef SIGHUP
 577     apr_signal(SIGHUP, restart);
 578 #endif /* SIGHUP */
 579 #ifdef AP_SIG_GRACEFUL
 580     apr_signal(AP_SIG_GRACEFUL, restart);
 581 #endif /* AP_SIG_GRACEFUL */
 582 #ifdef AP_SIG_GRACEFUL_STOP
 583      apr_signal(AP_SIG_GRACEFUL_STOP, sig_term);
 584 #endif /* AP_SIG_GRACEFUL_STOP */
 585 #ifdef SIGPIPE
 586     apr_signal(SIGPIPE, SIG_IGN);
 587 #endif /* SIGPIPE */
 588
 589 #endif
 590 }
 591
 592 /*****************************************************************
 593  * Child process main loop.
 594  */
 595
 596 static int process_socket(apr_thread_t *thd, apr_pool_t * p, apr_socket_t * sock,
 597                           conn_state_t * cs, int my_child_num,
 598                           int my_thread_num)
 599 {
 600     conn_rec *c;
 601     listener_poll_type *pt;
 602     long conn_id = ID_FROM_CHILD_THREAD(my_child_num, my_thread_num);
 603     int rc;
 604     ap_sb_handle_t *sbh;
 605
 606     ap_create_sb_handle(&sbh, p, my_child_num, my_thread_num);
 607
 608     if (cs == NULL) {           /* This is a new connection */
 609
 610         cs = apr_pcalloc(p, sizeof(conn_state_t));
 611
 612         pt = apr_pcalloc(p, sizeof(*pt));
 613
 614         cs->bucket_alloc = apr_bucket_alloc_create(p);
 615         c = ap_run_create_connection(p, ap_server_conf, sock,
 616                                      conn_id, sbh, cs->bucket_alloc);
 617         c->current_thread = thd;
 618         cs->c = c;
 619         c->cs = cs;
 620         cs->p = p;
 621         cs->pfd.desc_type = APR_POLL_SOCKET;
 622         cs->pfd.reqevents = APR_POLLIN;
 623         cs->pfd.desc.s = sock;
 624         pt->type = PT_CSD;
 625         pt->bypass_push = 1;
 626         pt->baton = cs;
 627         cs->pfd.client_data = pt;
 628         APR_RING_ELEM_INIT(cs, timeout_list);
 629
 630         ap_update_vhost_given_ip(c);
 631
 632         rc = ap_run_pre_connection(c, sock);
 633         if (rc != OK && rc != DONE) {
 634             ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
 635                          "process_socket: connection aborted");
 636             c->aborted = 1;
 637         }
 638
 639         /**
 640          * XXX If the platform does not have a usable way of bundling
 641          * accept() with a socket readability check, like Win32,
 642          * and there are measurable delays before the
 643          * socket is readable due to the first data packet arriving,
 644          * it might be better to create the cs on the listener thread
 645          * with the state set to CONN_STATE_CHECK_REQUEST_LINE_READABLE
 646          *
 647          * FreeBSD users will want to enable the HTTP accept filter
 648          * module in their kernel for the highest performance
 649          * When the accept filter is active, sockets are kept in the
 650          * kernel until a HTTP request is received.
 651          */
 652         cs->state = CONN_STATE_READ_REQUEST_LINE;
 653
 654     }
 655     else {
 656         c = cs->c;
 657         c->sbh = sbh;
 658         pt = cs->pfd.client_data;
 659         c->current_thread = thd;
 660     }
 661
 662     if (c->clogging_input_filters && !c->aborted) {
 663         /* Since we have an input filter which 'cloggs' the input stream,
 664          * like mod_ssl, lets just do the normal read from input filters,
 665          * like the Worker MPM does.
 666          */
 667         ap_run_process_connection(c);
 668         if (cs->state != CONN_STATE_SUSPENDED) {
 669             cs->state = CONN_STATE_LINGER;
 670         }
 671     }
 672
 673 read_request:
 674     if (cs->state == CONN_STATE_READ_REQUEST_LINE) {
 675         if (!c->aborted) {
 676             ap_run_process_connection(c);
 677
 678             /* state will be updated upon return
 679              * fall thru to either wait for readability/timeout or
 680              * do lingering close
 681              */
 682         }
 683         else {
 684             cs->state = CONN_STATE_LINGER;
 685         }
 686     }
 687
 688     if (cs->state == CONN_STATE_WRITE_COMPLETION) {
 689         ap_filter_t *output_filter = c->output_filters;
 690         apr_status_t rv;
 691         while (output_filter->next != NULL) {
 692             output_filter = output_filter->next;
 693         }
 694         rv = output_filter->frec->filter_func.out_func(output_filter, NULL);
 695         if (rv != APR_SUCCESS) {
 696             ap_log_error(APLOG_MARK, APLOG_WARNING, rv, ap_server_conf,
 697                      "network write failure in core output filter");
 698             cs->state = CONN_STATE_LINGER;
 699         }
 700         else if (c->data_in_output_filters) {
 701             /* Still in WRITE_COMPLETION_STATE:
 702              * Set a write timeout for this connection, and let the
 703              * event thread poll for writeability.
 704              */
 705             cs->expiration_time = ap_server_conf->timeout + apr_time_now();
 706             apr_thread_mutex_lock(timeout_mutex);
 707             APR_RING_INSERT_TAIL(&timeout_head, cs, conn_state_t, timeout_list);
 708             apr_thread_mutex_unlock(timeout_mutex);
 709             pt->bypass_push = 0;
 710             cs->pfd.reqevents = APR_POLLOUT | APR_POLLHUP | APR_POLLERR;
 711             rc = apr_pollset_add(event_pollset, &cs->pfd);
 712             return 1;
 713         }
 714         else if (c->keepalive != AP_CONN_KEEPALIVE || c->aborted ||
 715             listener_may_exit) {
 716             c->cs->state = CONN_STATE_LINGER;
 717         }
 718         else if (c->data_in_input_filters) {
 719             cs->state = CONN_STATE_READ_REQUEST_LINE;
 720             goto read_request;
 721         }
 722         else {
 723             cs->state = CONN_STATE_CHECK_REQUEST_LINE_READABLE;
 724         }
 725     }
 726
 727     if (cs->state == CONN_STATE_LINGER) {
 728         ap_lingering_close(c);
 729         apr_pool_clear(p);
 730         ap_push_pool(worker_queue_info, p);
 731         return 0;
 732     }
 733     else if (cs->state == CONN_STATE_CHECK_REQUEST_LINE_READABLE) {
 734         apr_status_t rc;
 735         listener_poll_type *pt = (listener_poll_type *) cs->pfd.client_data;
 736
 737         /* It greatly simplifies the logic to use a single timeout value here
 738          * because the new element can just be added to the end of the list and
 739          * it will stay sorted in expiration time sequence.  If brand new
 740          * sockets are sent to the event thread for a readability check, this
 741          * will be a slight behavior change - they use the non-keepalive
 742          * timeout today.  With a normal client, the socket will be readable in
 743          * a few milliseconds anyway.
 744          */
 745         cs->expiration_time = ap_server_conf->keep_alive_timeout +
 746                               apr_time_now();
 747         apr_thread_mutex_lock(timeout_mutex);
 748         APR_RING_INSERT_TAIL(&keepalive_timeout_head, cs, conn_state_t, timeout_list);
 749         apr_thread_mutex_unlock(timeout_mutex);
 750
 751         pt->bypass_push = 0;
 752         /* Add work to pollset. */
 753         cs->pfd.reqevents = APR_POLLIN;
 754         rc = apr_pollset_add(event_pollset, &cs->pfd);
 755
 756         if (rc != APR_SUCCESS) {
 757             ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
 758                          "process_socket: apr_pollset_add failure");
 759             AP_DEBUG_ASSERT(rc == APR_SUCCESS);
 760         }
 761     }
 762     return 1;
 763 }
 764
 765 /* requests_this_child has gone to zero or below.  See if the admin coded
 766    "MaxRequestsPerChild 0", and keep going in that case.  Doing it this way
 767    simplifies the hot path in worker_thread */
 768 static void check_infinite_requests(void)
 769 {
 770     if (ap_max_requests_per_child) {
 771         signal_threads(ST_GRACEFUL);
 772     }
 773     else {
 774         requests_this_child = INT_MAX;  /* keep going */
 775     }
 776 }
 777
 778 static void unblock_signal(int sig)
 779 {
 780     sigset_t sig_mask;
 781
 782     sigemptyset(&sig_mask);
 783     sigaddset(&sig_mask, sig);
 784 #if defined(SIGPROCMASK_SETS_THREAD_MASK)
 785     sigprocmask(SIG_UNBLOCK, &sig_mask, NULL);
 786 #else
 787     pthread_sigmask(SIG_UNBLOCK, &sig_mask, NULL);
 788 #endif
 789 }
 790
 791 static void dummy_signal_handler(int sig)
 792 {
 793     /* XXX If specifying SIG_IGN is guaranteed to unblock a syscall,
 794      *     then we don't need this goofy function.
 795      */
 796 }
 797
 798
 799 #if HAVE_SERF
 800 static apr_status_t s_socket_add(void *user_baton,
 801                                  apr_pollfd_t *pfd,
 802                                  void *serf_baton)
 803 {
 804     s_baton_t *s = (s_baton_t*)user_baton;
 805     /* XXXXX: recycle listener_poll_types */
 806     listener_poll_type *pt = malloc(sizeof(*pt));
 807     pt->type = PT_SERF;
 808     pt->baton = serf_baton;
 809     pfd->client_data = pt;
 810     return apr_pollset_add(s->pollset, pfd);
 811 }
 812
 813 static apr_status_t s_socket_remove(void *user_baton,
 814                                     apr_pollfd_t *pfd,
 815                                     void *serf_baton)
 816 {
 817     s_baton_t *s = (s_baton_t*)user_baton;
 818     listener_poll_type *pt = pfd->client_data;
 819     free(pt);
 820     return apr_pollset_remove(s->pollset, pfd);
 821 }
 822 #endif
 823
 824 static apr_status_t init_pollset(apr_pool_t *p)
 825 {
 826 #if HAVE_SERF
 827     s_baton_t *baton = NULL;
 828 #endif
 829     apr_status_t rv;
 830     ap_listen_rec *lr;
 831     listener_poll_type *pt;
 832
 833     rv = apr_thread_mutex_create(&timeout_mutex, APR_THREAD_MUTEX_DEFAULT, p);
 834     if (rv != APR_SUCCESS) {
 835         ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf,
 836                      "creation of the timeout mutex failed.");
 837         return rv;
 838     }
 839
 840     APR_RING_INIT(&timeout_head, conn_state_t, timeout_list);
 841     APR_RING_INIT(&keepalive_timeout_head, conn_state_t, timeout_list);
 842
 843     /* Create the main pollset */
 844     rv = apr_pollset_create(&event_pollset,
 845                             threads_per_child,
 846                             p, APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY);
 847     if (rv != APR_SUCCESS) {
 848         ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf,
 849                      "apr_pollset_create with Thread Safety failed.");
 850         return rv;
 851     }
 852
 853     for (lr = ap_listeners; lr != NULL; lr = lr->next) {
 854         apr_pollfd_t *pfd = apr_palloc(p, sizeof(*pfd));
 855         pt = apr_pcalloc(p, sizeof(*pt));
 856         pfd->desc_type = APR_POLL_SOCKET;
 857         pfd->desc.s = lr->sd;
 858         pfd->reqevents = APR_POLLIN;
 859
 860         pt->type = PT_ACCEPT;
 861         pt->baton = lr;
 862
 863         pfd->client_data = pt;
 864
 865         apr_socket_opt_set(pfd->desc.s, APR_SO_NONBLOCK, 1);
 866         apr_pollset_add(event_pollset, pfd);
 867
 868         lr->accept_func = ap_unixd_accept;
 869     }
 870
 871 #if HAVE_SERF
 872     baton = apr_pcalloc(p, sizeof(*baton));
 873     baton->pollset = event_pollset;
 874     /* TODO: subpools, threads, reuse, etc.  -- currently use malloc() inside :( */
 875     baton->pool = p;
 876
 877     g_serf = serf_context_create_ex(baton,
 878                                     s_socket_add,
 879                                     s_socket_remove, p);
 880
 881     ap_register_provider(p, "mpm_serf",
 882                          "instance", "0", g_serf);
 883
 884 #endif
 885
 886     return APR_SUCCESS;
 887 }
 888
 889 static apr_status_t push_timer2worker(timer_event_t* te)
 890 {
 891     return ap_queue_push_timer(worker_queue, te);
 892 }
 893
 894 static apr_status_t push2worker(const apr_pollfd_t * pfd,
 895                                 apr_pollset_t * pollset)
 896 {
 897     listener_poll_type *pt = (listener_poll_type *) pfd->client_data;
 898     conn_state_t *cs = (conn_state_t *) pt->baton;
 899     apr_status_t rc;
 900
 901     if (pt->bypass_push) {
 902         return APR_SUCCESS;
 903     }
 904
 905     pt->bypass_push = 1;
 906
 907     rc = apr_pollset_remove(pollset, pfd);
 908
 909     /*
 910      * Some of the pollset backends, like KQueue or Epoll
 911      * automagically remove the FD if the socket is closed,
 912      * therefore, we can accept _SUCCESS or _NOTFOUND,
 913      * and we still want to keep going
 914      */
 915     if (rc != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rc)) {
 916         cs->state = CONN_STATE_LINGER;
 917     }
 918
 919     rc = ap_queue_push(worker_queue, cs->pfd.desc.s, cs, cs->p);
 920     if (rc != APR_SUCCESS) {
 921         /* trash the connection; we couldn't queue the connected
 922          * socket to a worker
 923          */
 924         apr_bucket_alloc_destroy(cs->bucket_alloc);
 925         apr_socket_close(cs->pfd.desc.s);
 926         ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
 927                      ap_server_conf, "push2worker: ap_queue_push failed");
 928         apr_pool_clear(cs->p);
 929         ap_push_pool(worker_queue_info, cs->p);
 930     }
 931
 932     return rc;
 933 }
 934
 935 /* get_worker:
 936  *     reserve a worker thread, block if all are currently busy.
 937  *     this prevents the worker queue from overflowing and lets
 938  *     other processes accept new connections in the mean time.
 939  */
 940 static int get_worker(int *have_idle_worker_p)
 941 {
 942     apr_status_t rc;
 943
 944     if (!*have_idle_worker_p) {
 945         rc = ap_queue_info_wait_for_idler(worker_queue_info);
 946
 947         if (rc == APR_SUCCESS) {
 948             *have_idle_worker_p = 1;
 949             return 1;
 950         }
 951         else {
 952             if (!APR_STATUS_IS_EOF(rc)) {
 953                 ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
 954                              "ap_queue_info_wait_for_idler failed.  "
 955                              "Attempting to shutdown process gracefully");
 956                 signal_threads(ST_GRACEFUL);
 957             }
 958             return 0;
 959         }
 960     }
 961     else {
 962         /* already reserved a worker thread - must have hit a
 963          * transient error on a previous pass
 964          */
 965         return 1;
 966     }
 967 }
 968
 969 /* XXXXXX: Convert to skiplist or other better data structure
 970  * (yes, this is VERY VERY VERY VERY BAD)
 971  */
 972
 973 /* Structures to reuse */
 974 static APR_RING_HEAD(timer_free_ring_t, timer_event_t) timer_free_ring;
 975 /* Active timers */
 976 static APR_RING_HEAD(timer_ring_t, timer_event_t) timer_ring;
 977
 978 static apr_thread_mutex_t *g_timer_ring_mtx;
 979
 980 static apr_status_t event_register_timed_callback(apr_time_t t,
 981                                                   ap_mpm_callback_fn_t *cbfn,
 982                                                   void *baton)
 983 {
 984     int inserted = 0;
 985     timer_event_t *ep;
 986     timer_event_t *te;
 987     /* oh yeah, and make locking smarter/fine grained. */
 988     apr_thread_mutex_lock(g_timer_ring_mtx);
 989
 990     if (!APR_RING_EMPTY(&timer_free_ring, timer_event_t, link)) {
 991         te = APR_RING_FIRST(&timer_free_ring);
 992         APR_RING_REMOVE(te, link);
 993     }
 994     else {
 995         /* XXXXX: lol, pool allocation without a context from any thread.Yeah. Right. MPMs Suck. */
 996         te = malloc(sizeof(timer_event_t));
 997         APR_RING_ELEM_INIT(te, link);
 998     }
 999
1000     te->cbfunc = cbfn;
1001     te->baton = baton;
1002     /* XXXXX: optimize */
1003     te->when = t + apr_time_now();
1004
1005     /* Okay, insert sorted by when.. */
1006     for (ep = APR_RING_FIRST(&timer_ring);
1007          ep != APR_RING_SENTINEL(&timer_ring,
1008                                  timer_event_t, link);
1009          ep = APR_RING_NEXT(ep, link))
1010     {
1011         if (ep->when > te->when) {
1012             inserted = 1;
1013             APR_RING_INSERT_BEFORE(ep, te, link);
1014             break;
1015         }
1016     }
1017
1018     if (!inserted) {
1019         APR_RING_INSERT_TAIL(&timer_ring, te, timer_event_t, link);
1020     }
1021
1022     apr_thread_mutex_unlock(g_timer_ring_mtx);
1023
1024     return APR_SUCCESS;
1025 }
1026
1027 static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
1028 {
1029     timer_event_t *ep;
1030     timer_event_t *te;
1031     apr_status_t rc;
1032     proc_info *ti = dummy;
1033     int process_slot = ti->pid;
1034     apr_pool_t *tpool = apr_thread_pool_get(thd);
1035     void *csd = NULL;
1036     apr_pool_t *ptrans;         /* Pool for per-transaction stuff */
1037     ap_listen_rec *lr;
1038     int have_idle_worker = 0;
1039     conn_state_t *cs;
1040     const apr_pollfd_t *out_pfd;
1041     apr_int32_t num = 0;
1042     apr_time_t time_now = 0;
1043     apr_interval_time_t timeout_interval;
1044     apr_time_t timeout_time;
1045     listener_poll_type *pt;
1046
1047     free(ti);
1048
1049     /* the following times out events that are really close in the future
1050      *   to prevent extra poll calls
1051      *
1052      * current value is .1 second
1053      */
1054 #define TIMEOUT_FUDGE_FACTOR 100000
1055 #define EVENT_FUDGE_FACTOR 10000
1056
1057     rc = init_pollset(tpool);
1058     if (rc != APR_SUCCESS) {
1059         ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
1060                      "failed to initialize pollset, "
1061                      "attempting to shutdown process gracefully");
1062         signal_threads(ST_GRACEFUL);
1063         return NULL;
1064     }
1065
1066     /* Unblock the signal used to wake this thread up, and set a handler for
1067      * it.
1068      */
1069     unblock_signal(LISTENER_SIGNAL);
1070     apr_signal(LISTENER_SIGNAL, dummy_signal_handler);
1071
1072     while (!listener_may_exit) {
1073
1074         if (requests_this_child <= 0) {
1075             check_infinite_requests();
1076         }
1077
1078
1079         {
1080             apr_time_t now = apr_time_now();
1081             apr_thread_mutex_lock(g_timer_ring_mtx);
1082
1083             if (!APR_RING_EMPTY(&timer_ring, timer_event_t, link)) {
1084                 te = APR_RING_FIRST(&timer_ring);
1085                 if (te->when > now) {
1086                     timeout_interval = te->when - now;
1087                 }
1088                 else {
1089                     timeout_interval = 1;
1090                 }
1091             }
1092             else {
1093                 timeout_interval = apr_time_from_msec(100);
1094             }
1095             apr_thread_mutex_unlock(g_timer_ring_mtx);
1096         }
1097
1098 #if HAVE_SERF
1099         rc = serf_context_prerun(g_serf);
1100         if (rc != APR_SUCCESS) {
1101             /* TOOD: what should do here? ugh. */
1102         }
1103
1104 #endif
1105         rc = apr_pollset_poll(event_pollset, timeout_interval, &num,
1106                               &out_pfd);
1107
1108         if (rc != APR_SUCCESS) {
1109             if (APR_STATUS_IS_EINTR(rc)) {
1110                 continue;
1111             }
1112             if (!APR_STATUS_IS_TIMEUP(rc)) {
1113                 ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
1114                              "apr_pollset_poll failed.  Attempting to "
1115                              "shutdown process gracefully");
1116                 signal_threads(ST_GRACEFUL);
1117             }
1118         }
1119
1120         if (listener_may_exit)
1121             break;
1122
1123         {
1124             apr_time_t now = apr_time_now();
1125             apr_thread_mutex_lock(g_timer_ring_mtx);
1126             for (ep = APR_RING_FIRST(&timer_ring);
1127                  ep != APR_RING_SENTINEL(&timer_ring,
1128                                          timer_event_t, link);
1129                  ep = APR_RING_FIRST(&timer_ring))
1130             {
1131                 if (ep->when < now + EVENT_FUDGE_FACTOR) {
1132                     APR_RING_REMOVE(ep, link);
1133                     push_timer2worker(ep);
1134                 }
1135                 else {
1136                     break;
1137                 }
1138             }
1139             apr_thread_mutex_unlock(g_timer_ring_mtx);
1140         }
1141
1142         while (num && get_worker(&have_idle_worker)) {
1143             pt = (listener_poll_type *) out_pfd->client_data;
1144             if (pt->type == PT_CSD) {
1145                 /* one of the sockets is readable */
1146                 cs = (conn_state_t *) pt->baton;
1147                 switch (cs->state) {
1148                 case CONN_STATE_CHECK_REQUEST_LINE_READABLE:
1149                     cs->state = CONN_STATE_READ_REQUEST_LINE;
1150                     break;
1151                 case CONN_STATE_WRITE_COMPLETION:
1152                     break;
1153                 default:
1154                     ap_log_error(APLOG_MARK, APLOG_ERR, rc,
1155                                  ap_server_conf,
1156                                  "event_loop: unexpected state %d",
1157                                  cs->state);
1158                 }
1159
1160                 apr_thread_mutex_lock(timeout_mutex);
1161                 APR_RING_REMOVE(cs, timeout_list);
1162                 apr_thread_mutex_unlock(timeout_mutex);
1163                 APR_RING_ELEM_INIT(cs, timeout_list);
1164
1165                 rc = push2worker(out_pfd, event_pollset);
1166                 if (rc != APR_SUCCESS) {
1167                     ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1168                                  ap_server_conf, "push2worker failed");
1169                 }
1170                 else {
1171                     have_idle_worker = 0;
1172                 }
1173             }
1174             else if (pt->type == PT_ACCEPT) {
1175                 /* A Listener Socket is ready for an accept() */
1176
1177                 lr = (ap_listen_rec *) pt->baton;
1178
1179                 ap_pop_pool(&ptrans, worker_queue_info);
1180
1181                 if (ptrans == NULL) {
1182                     /* create a new transaction pool for each accepted socket */
1183                     apr_allocator_t *allocator;
1184
1185                     apr_allocator_create(&allocator);
1186                     apr_allocator_max_free_set(allocator,
1187                                                ap_max_mem_free);
1188                     apr_pool_create_ex(&ptrans, pconf, NULL, allocator);
1189                     apr_allocator_owner_set(allocator, ptrans);
1190                     if (ptrans == NULL) {
1191                         ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1192                                      ap_server_conf,
1193                                      "Failed to create transaction pool");
1194                         signal_threads(ST_GRACEFUL);
1195                         return NULL;
1196                     }
1197                 }
1198                 apr_pool_tag(ptrans, "transaction");
1199
1200                 rc = lr->accept_func(&csd, lr, ptrans);
1201
1202                 /* later we trash rv and rely on csd to indicate
1203                  * success/failure
1204                  */
1205                 AP_DEBUG_ASSERT(rc == APR_SUCCESS || !csd);
1206
1207                 if (rc == APR_EGENERAL) {
1208                     /* E[NM]FILE, ENOMEM, etc */
1209                     resource_shortage = 1;
1210                     signal_threads(ST_GRACEFUL);
1211                 }
1212
1213                 if (csd != NULL) {
1214                     rc = ap_queue_push(worker_queue, csd, NULL, ptrans);
1215                     if (rc != APR_SUCCESS) {
1216                         /* trash the connection; we couldn't queue the connected
1217                          * socket to a worker
1218                          */
1219                         apr_socket_close(csd);
1220                         ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1221                                      ap_server_conf,
1222                                      "ap_queue_push failed");
1223                         apr_pool_clear(ptrans);
1224                         ap_push_pool(worker_queue_info, ptrans);
1225                     }
1226                     else {
1227                         have_idle_worker = 0;
1228                     }
1229                 }
1230                 else {
1231                     apr_pool_clear(ptrans);
1232                     ap_push_pool(worker_queue_info, ptrans);
1233                 }
1234             }               /* if:else on pt->type */
1235 #if HAVE_SERF
1236             else if (pt->type == PT_SERF) {
1237                 /* send socket to serf. */
1238                 /* XXXX: this doesn't require get_worker(&have_idle_worker) */
1239                 serf_event_trigger(g_serf, pt->baton, out_pfd);
1240             }
1241 #endif
1242             out_pfd++;
1243             num--;
1244         }                   /* while for processing poll */
1245
1246         /* XXX possible optimization: stash the current time for use as
1247          * r->request_time for new requests
1248          */
1249         time_now = apr_time_now();
1250
1251         /* handle timed out sockets */
1252         apr_thread_mutex_lock(timeout_mutex);
1253
1254         /* Step 1: keepalive timeouts */
1255         cs = APR_RING_FIRST(&keepalive_timeout_head);
1256         timeout_time = time_now + TIMEOUT_FUDGE_FACTOR;
1257         while (!APR_RING_EMPTY(&keepalive_timeout_head, conn_state_t, timeout_list)
1258                && cs->expiration_time < timeout_time) {
1259
1260             cs->state = CONN_STATE_LINGER;
1261
1262             APR_RING_REMOVE(cs, timeout_list);
1263             apr_thread_mutex_unlock(timeout_mutex);
1264
1265             if (!get_worker(&have_idle_worker)) {
1266                 apr_thread_mutex_lock(timeout_mutex);
1267                 APR_RING_INSERT_HEAD(&keepalive_timeout_head, cs,
1268                                      conn_state_t, timeout_list);
1269                 break;
1270             }
1271
1272             rc = push2worker(&cs->pfd, event_pollset);
1273
1274             if (rc != APR_SUCCESS) {
1275                 return NULL;
1276                 /* XXX return NULL looks wrong - not an init failure
1277                  * that bypasses all the cleanup outside the main loop
1278                  * break seems more like it
1279                  * need to evaluate seriousness of push2worker failures
1280                  */
1281             }
1282             have_idle_worker = 0;
1283             apr_thread_mutex_lock(timeout_mutex);
1284             cs = APR_RING_FIRST(&keepalive_timeout_head);
1285         }
1286
1287         /* Step 2: write completion timeouts */
1288         cs = APR_RING_FIRST(&timeout_head);
1289         while (!APR_RING_EMPTY(&timeout_head, conn_state_t, timeout_list)
1290                && cs->expiration_time < timeout_time) {
1291
1292             cs->state = CONN_STATE_LINGER;
1293             APR_RING_REMOVE(cs, timeout_list);
1294             apr_thread_mutex_unlock(timeout_mutex);
1295
1296             if (!get_worker(&have_idle_worker)) {
1297                 apr_thread_mutex_lock(timeout_mutex);
1298                 APR_RING_INSERT_HEAD(&timeout_head, cs,
1299                                      conn_state_t, timeout_list);
1300                 break;
1301             }
1302
1303             rc = push2worker(&cs->pfd, event_pollset);
1304             if (rc != APR_SUCCESS) {
1305                 return NULL;
1306             }
1307             have_idle_worker = 0;
1308             apr_thread_mutex_lock(timeout_mutex);
1309             cs = APR_RING_FIRST(&timeout_head);
1310         }
1311
1312         apr_thread_mutex_unlock(timeout_mutex);
1313
1314     }     /* listener main loop */
1315
1316     ap_close_listeners();
1317     ap_queue_term(worker_queue);
1318     dying = 1;
1319     ap_scoreboard_image->parent[process_slot].quiescing = 1;
1320
1321     /* wake up the main thread */
1322     kill(ap_my_pid, SIGTERM);
1323
1324     apr_thread_exit(thd, APR_SUCCESS);
1325     return NULL;
1326 }
1327
1328 /* XXX For ungraceful termination/restart, we definitely don't want to
1329  *     wait for active connections to finish but we may want to wait
1330  *     for idle workers to get out of the queue code and release mutexes,
1331  *     since those mutexes are cleaned up pretty soon and some systems
1332  *     may not react favorably (i.e., segfault) if operations are attempted
1333  *     on cleaned-up mutexes.
1334  */
1335 static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy)
1336 {
1337     proc_info *ti = dummy;
1338     int process_slot = ti->pid;
1339     int thread_slot = ti->tid;
1340     apr_socket_t *csd = NULL;
1341     conn_state_t *cs;
1342     apr_pool_t *ptrans;         /* Pool for per-transaction stuff */
1343     apr_status_t rv;
1344     int is_idle = 0;
1345     timer_event_t *te = NULL;
1346
1347     free(ti);
1348
1349     ap_scoreboard_image->servers[process_slot][thread_slot].pid = ap_my_pid;
1350     ap_scoreboard_image->servers[process_slot][thread_slot].tid = apr_os_thread_current();
1351     ap_scoreboard_image->servers[process_slot][thread_slot].generation = my_generation;
1352     ap_update_child_status_from_indexes(process_slot, thread_slot,
1353                                         SERVER_STARTING, NULL);
1354
1355     while (!workers_may_exit) {
1356         if (!is_idle) {
1357             rv = ap_queue_info_set_idle(worker_queue_info, NULL);
1358             if (rv != APR_SUCCESS) {
1359                 ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf,
1360                              "ap_queue_info_set_idle failed. Attempting to "
1361                              "shutdown process gracefully.");
1362                 signal_threads(ST_GRACEFUL);
1363                 break;
1364             }
1365             is_idle = 1;
1366         }
1367
1368         ap_update_child_status_from_indexes(process_slot, thread_slot,
1369                                             SERVER_READY, NULL);
1370       worker_pop:
1371         if (workers_may_exit) {
1372             break;
1373         }
1374
1375         te = NULL;
1376
1377         rv = ap_queue_pop_something(worker_queue, &csd, &cs, &ptrans, &te);
1378
1379         if (rv != APR_SUCCESS) {
1380             /* We get APR_EOF during a graceful shutdown once all the
1381              * connections accepted by this server process have been handled.
1382              */
1383             if (APR_STATUS_IS_EOF(rv)) {
1384                 break;
1385             }
1386             /* We get APR_EINTR whenever ap_queue_pop() has been interrupted
1387              * from an explicit call to ap_queue_interrupt_all(). This allows
1388              * us to unblock threads stuck in ap_queue_pop() when a shutdown
1389              * is pending.
1390              *
1391              * If workers_may_exit is set and this is ungraceful termination/
1392              * restart, we are bound to get an error on some systems (e.g.,
1393              * AIX, which sanity-checks mutex operations) since the queue
1394              * may have already been cleaned up.  Don't log the "error" if
1395              * workers_may_exit is set.
1396              */
1397             else if (APR_STATUS_IS_EINTR(rv)) {
1398                 goto worker_pop;
1399             }
1400             /* We got some other error. */
1401             else if (!workers_may_exit) {
1402                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
1403                              "ap_queue_pop failed");
1404             }
1405             continue;
1406         }
1407         if (te != NULL) {
1408
1409             te->cbfunc(te->baton);
1410
1411             {
1412                 apr_thread_mutex_lock(g_timer_ring_mtx);
1413                 APR_RING_INSERT_TAIL(&timer_free_ring, te, timer_event_t, link);
1414                 apr_thread_mutex_unlock(g_timer_ring_mtx);
1415             }
1416         }
1417         else {
1418             is_idle = 0;
1419             worker_sockets[thread_slot] = csd;
1420             rv = process_socket(thd, ptrans, csd, cs, process_slot, thread_slot);
1421             if (!rv) {
1422                 requests_this_child--;
1423             }
1424             worker_sockets[thread_slot] = NULL;
1425         }
1426     }
1427
1428     ap_update_child_status_from_indexes(process_slot, thread_slot,
1429                                         (dying) ? SERVER_DEAD :
1430                                         SERVER_GRACEFUL,
1431                                         (request_rec *) NULL);
1432
1433     apr_thread_exit(thd, APR_SUCCESS);
1434     return NULL;
1435 }
1436
1437 static int check_signal(int signum)
1438 {
1439     switch (signum) {
1440     case SIGTERM:
1441     case SIGINT:
1442         return 1;
1443     }
1444     return 0;
1445 }
1446
1447
1448
1449 static void create_listener_thread(thread_starter * ts)
1450 {
1451     int my_child_num = ts->child_num_arg;
1452     apr_threadattr_t *thread_attr = ts->threadattr;
1453     proc_info *my_info;
1454     apr_status_t rv;
1455
1456     my_info = (proc_info *) malloc(sizeof(proc_info));
1457     my_info->pid = my_child_num;
1458     my_info->tid = -1;          /* listener thread doesn't have a thread slot */
1459     my_info->sd = 0;
1460     rv = apr_thread_create(&ts->listener, thread_attr, listener_thread,
1461                            my_info, pchild);
1462     if (rv != APR_SUCCESS) {
1463         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
1464                      "apr_thread_create: unable to create listener thread");
1465         /* let the parent decide how bad this really is */
1466         clean_child_exit(APEXIT_CHILDSICK);
1467     }
1468     apr_os_thread_get(&listener_os_thread, ts->listener);
1469 }
1470
1471 /* XXX under some circumstances not understood, children can get stuck
1472  *     in start_threads forever trying to take over slots which will
1473  *     never be cleaned up; for now there is an APLOG_DEBUG message issued
1474  *     every so often when this condition occurs
1475  */
1476 static void *APR_THREAD_FUNC start_threads(apr_thread_t * thd, void *dummy)
1477 {
1478     thread_starter *ts = dummy;
1479     apr_thread_t **threads = ts->threads;
1480     apr_threadattr_t *thread_attr = ts->threadattr;
1481     int child_num_arg = ts->child_num_arg;
1482     int my_child_num = child_num_arg;
1483     proc_info *my_info;
1484     apr_status_t rv;
1485     int i;
1486     int threads_created = 0;
1487     int listener_started = 0;
1488     int loops;
1489     int prev_threads_created;
1490
1491     /* We must create the fd queues before we start up the listener
1492      * and worker threads. */
1493     worker_queue = apr_pcalloc(pchild, sizeof(*worker_queue));
1494     rv = ap_queue_init(worker_queue, threads_per_child, pchild);
1495     if (rv != APR_SUCCESS) {
1496         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
1497                      "ap_queue_init() failed");
1498         clean_child_exit(APEXIT_CHILDFATAL);
1499     }
1500
1501     rv = ap_queue_info_create(&worker_queue_info, pchild,
1502                               threads_per_child);
1503     if (rv != APR_SUCCESS) {
1504         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
1505                      "ap_queue_info_create() failed");
1506         clean_child_exit(APEXIT_CHILDFATAL);
1507     }
1508
1509     worker_sockets = apr_pcalloc(pchild, threads_per_child
1510                                  * sizeof(apr_socket_t *));
1511
1512     loops = prev_threads_created = 0;
1513     while (1) {
1514         /* threads_per_child does not include the listener thread */
1515         for (i = 0; i < threads_per_child; i++) {
1516             int status =
1517                 ap_scoreboard_image->servers[child_num_arg][i].status;
1518
1519             if (status != SERVER_GRACEFUL && status != SERVER_DEAD) {
1520                 continue;
1521             }
1522
1523             my_info = (proc_info *) malloc(sizeof(proc_info));
1524             if (my_info == NULL) {
1525                 ap_log_error(APLOG_MARK, APLOG_ALERT, errno, ap_server_conf,
1526                              "malloc: out of memory");
1527                 clean_child_exit(APEXIT_CHILDFATAL);
1528             }
1529             my_info->pid = my_child_num;
1530             my_info->tid = i;
1531             my_info->sd = 0;
1532
1533             /* We are creating threads right now */
1534             ap_update_child_status_from_indexes(my_child_num, i,
1535                                                 SERVER_STARTING, NULL);
1536             /* We let each thread update its own scoreboard entry.  This is
1537              * done because it lets us deal with tid better.
1538              */
1539             rv = apr_thread_create(&threads[i], thread_attr,
1540                                    worker_thread, my_info, pchild);
1541             if (rv != APR_SUCCESS) {
1542                 ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
1543                              "apr_thread_create: unable to create worker thread");
1544                 /* let the parent decide how bad this really is */
1545                 clean_child_exit(APEXIT_CHILDSICK);
1546             }
1547             threads_created++;
1548         }
1549
1550         /* Start the listener only when there are workers available */
1551         if (!listener_started && threads_created) {
1552             create_listener_thread(ts);
1553             listener_started = 1;
1554         }
1555
1556
1557         if (start_thread_may_exit || threads_created == threads_per_child) {
1558             break;
1559         }
1560         /* wait for previous generation to clean up an entry */
1561         apr_sleep(apr_time_from_sec(1));
1562         ++loops;
1563         if (loops % 120 == 0) { /* every couple of minutes */
1564             if (prev_threads_created == threads_created) {
1565                 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
1566                              "child %" APR_PID_T_FMT " isn't taking over "
1567                              "slots very quickly (%d of %d)",
1568                              ap_my_pid, threads_created,
1569                              threads_per_child);
1570             }
1571             prev_threads_created = threads_created;
1572         }
1573     }
1574
1575     /* What state should this child_main process be listed as in the
1576      * scoreboard...?
1577      *  ap_update_child_status_from_indexes(my_child_num, i, SERVER_STARTING,
1578      *                                      (request_rec *) NULL);
1579      *
1580      *  This state should be listed separately in the scoreboard, in some kind
1581      *  of process_status, not mixed in with the worker threads' status.
1582      *  "life_status" is almost right, but it's in the worker's structure, and
1583      *  the name could be clearer.   gla
1584      */
1585     apr_thread_exit(thd, APR_SUCCESS);
1586     return NULL;
1587 }
1588
1589 static void join_workers(apr_thread_t * listener, apr_thread_t ** threads)
1590 {
1591     int i;
1592     apr_status_t rv, thread_rv;
1593
1594     if (listener) {
1595         int iter;
1596
1597         /* deal with a rare timing window which affects waking up the
1598          * listener thread...  if the signal sent to the listener thread
1599          * is delivered between the time it verifies that the
1600          * listener_may_exit flag is clear and the time it enters a
1601          * blocking syscall, the signal didn't do any good...  work around
1602          * that by sleeping briefly and sending it again
1603          */
1604
1605         iter = 0;
1606         while (iter < 10 &&
1607 #ifdef HAVE_PTHREAD_KILL
1608                pthread_kill(*listener_os_thread, 0)
1609 #else
1610                kill(ap_my_pid, 0)
1611 #endif
1612                == 0) {
1613             /* listener not dead yet */
1614             apr_sleep(apr_time_make(0, 500000));
1615             wakeup_listener();
1616             ++iter;
1617         }
1618         if (iter >= 10) {
1619             ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
1620                          "the listener thread didn't exit");
1621         }
1622         else {
1623             rv = apr_thread_join(&thread_rv, listener);
1624             if (rv != APR_SUCCESS) {
1625                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
1626                              "apr_thread_join: unable to join listener thread");
1627             }
1628         }
1629     }
1630
1631     for (i = 0; i < threads_per_child; i++) {
1632         if (threads[i]) {       /* if we ever created this thread */
1633             rv = apr_thread_join(&thread_rv, threads[i]);
1634             if (rv != APR_SUCCESS) {
1635                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
1636                              "apr_thread_join: unable to join worker "
1637                              "thread %d", i);
1638             }
1639         }
1640     }
1641 }
1642
1643 static void join_start_thread(apr_thread_t * start_thread_id)
1644 {
1645     apr_status_t rv, thread_rv;
1646
1647     start_thread_may_exit = 1;  /* tell it to give up in case it is still
1648                                  * trying to take over slots from a
1649                                  * previous generation
1650                                  */
1651     rv = apr_thread_join(&thread_rv, start_thread_id);
1652     if (rv != APR_SUCCESS) {
1653         ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
1654                      "apr_thread_join: unable to join the start " "thread");
1655     }
1656 }
1657
1658 static void child_main(int child_num_arg)
1659 {
1660     apr_thread_t **threads;
1661     apr_status_t rv;
1662     thread_starter *ts;
1663     apr_threadattr_t *thread_attr;
1664     apr_thread_t *start_thread_id;
1665
1666     mpm_state = AP_MPMQ_STARTING;       /* for benefit of any hooks that run as this
1667                                          * child initializes
1668                                          */
1669     ap_my_pid = getpid();
1670     ap_fatal_signal_child_setup(ap_server_conf);
1671     apr_pool_create(&pchild, pconf);
1672
1673     /*stuff to do before we switch id's, so we have permissions. */
1674     ap_reopen_scoreboard(pchild, NULL, 0);
1675
1676     if (ap_run_drop_privileges(pchild, ap_server_conf)) {
1677         clean_child_exit(APEXIT_CHILDFATAL);
1678     }
1679
1680     apr_thread_mutex_create(&g_timer_ring_mtx, APR_THREAD_MUTEX_DEFAULT, pchild);
1681     APR_RING_INIT(&timer_free_ring, timer_event_t, link);
1682     APR_RING_INIT(&timer_ring, timer_event_t, link);
1683
1684     ap_run_child_init(pchild, ap_server_conf);
1685
1686     /* done with init critical section */
1687
1688     /* Just use the standard apr_setup_signal_thread to block all signals
1689      * from being received.  The child processes no longer use signals for
1690      * any communication with the parent process.
1691      */
1692     rv = apr_setup_signal_thread();
1693     if (rv != APR_SUCCESS) {
1694         ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf,
1695                      "Couldn't initialize signal thread");
1696         clean_child_exit(APEXIT_CHILDFATAL);
1697     }
1698
1699     if (ap_max_requests_per_child) {
1700         requests_this_child = ap_max_requests_per_child;
1701     }
1702     else {
1703         /* coding a value of zero means infinity */
1704         requests_this_child = INT_MAX;
1705     }
1706
1707     /* Setup worker threads */
1708
1709     /* clear the storage; we may not create all our threads immediately,
1710      * and we want a 0 entry to indicate a thread which was not created
1711      */
1712     threads = (apr_thread_t **) calloc(1,
1713                                        sizeof(apr_thread_t *) *
1714                                        threads_per_child);
1715     if (threads == NULL) {
1716         ap_log_error(APLOG_MARK, APLOG_ALERT, errno, ap_server_conf,
1717                      "malloc: out of memory");
1718         clean_child_exit(APEXIT_CHILDFATAL);
1719     }
1720
1721     ts = (thread_starter *) apr_palloc(pchild, sizeof(*ts));
1722
1723     apr_threadattr_create(&thread_attr, pchild);
1724     /* 0 means PTHREAD_CREATE_JOINABLE */
1725     apr_threadattr_detach_set(thread_attr, 0);
1726
1727     if (ap_thread_stacksize != 0) {
1728         apr_threadattr_stacksize_set(thread_attr, ap_thread_stacksize);
1729     }
1730
1731     ts->threads = threads;
1732     ts->listener = NULL;
1733     ts->child_num_arg = child_num_arg;
1734     ts->threadattr = thread_attr;
1735
1736     rv = apr_thread_create(&start_thread_id, thread_attr, start_threads,
1737                            ts, pchild);
1738     if (rv != APR_SUCCESS) {
1739         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
1740                      "apr_thread_create: unable to create worker thread");
1741         /* let the parent decide how bad this really is */
1742         clean_child_exit(APEXIT_CHILDSICK);
1743     }
1744
1745     mpm_state = AP_MPMQ_RUNNING;
1746
1747     /* If we are only running in one_process mode, we will want to
1748      * still handle signals. */
1749     if (one_process) {
1750         /* Block until we get a terminating signal. */
1751         apr_signal_thread(check_signal);
1752         /* make sure the start thread has finished; signal_threads()
1753          * and join_workers() depend on that
1754          */
1755         /* XXX join_start_thread() won't be awakened if one of our
1756          *     threads encounters a critical error and attempts to
1757          *     shutdown this child
1758          */
1759         join_start_thread(start_thread_id);
1760
1761         /* helps us terminate a little more quickly than the dispatch of the
1762          * signal thread; beats the Pipe of Death and the browsers
1763          */
1764         signal_threads(ST_UNGRACEFUL);
1765
1766         /* A terminating signal was received. Now join each of the
1767          * workers to clean them up.
1768          *   If the worker already exited, then the join frees
1769          *   their resources and returns.
1770          *   If the worker hasn't exited, then this blocks until
1771          *   they have (then cleans up).
1772          */
1773         join_workers(ts->listener, threads);
1774     }
1775     else {                      /* !one_process */
1776         /* remove SIGTERM from the set of blocked signals...  if one of
1777          * the other threads in the process needs to take us down
1778          * (e.g., for MaxRequestsPerChild) it will send us SIGTERM
1779          */
1780         unblock_signal(SIGTERM);
1781         apr_signal(SIGTERM, dummy_signal_handler);
1782         /* Watch for any messages from the parent over the POD */
1783         while (1) {
1784             rv = ap_event_pod_check(pod);
1785             if (rv == AP_NORESTART) {
1786                 /* see if termination was triggered while we slept */
1787                 switch (terminate_mode) {
1788                 case ST_GRACEFUL:
1789                     rv = AP_GRACEFUL;
1790                     break;
1791                 case ST_UNGRACEFUL:
1792                     rv = AP_RESTART;
1793                     break;
1794                 }
1795             }
1796             if (rv == AP_GRACEFUL || rv == AP_RESTART) {
1797                 /* make sure the start thread has finished;
1798                  * signal_threads() and join_workers depend on that
1799                  */
1800                 join_start_thread(start_thread_id);
1801                 signal_threads(rv ==
1802                                AP_GRACEFUL ? ST_GRACEFUL : ST_UNGRACEFUL);
1803                 break;
1804             }
1805         }
1806
1807         /* A terminating signal was received. Now join each of the
1808          * workers to clean them up.
1809          *   If the worker already exited, then the join frees
1810          *   their resources and returns.
1811          *   If the worker hasn't exited, then this blocks until
1812          *   they have (then cleans up).
1813          */
1814         join_workers(ts->listener, threads);
1815     }
1816
1817     free(threads);
1818
1819     clean_child_exit(resource_shortage ? APEXIT_CHILDSICK : 0);
1820 }
1821
1822 static int make_child(server_rec * s, int slot)
1823 {
1824     int pid;
1825
1826     if (slot + 1 > max_daemons_limit) {
1827         max_daemons_limit = slot + 1;
1828     }
1829
1830     if (one_process) {
1831         set_signals();
1832         ap_scoreboard_image->parent[slot].pid = getpid();
1833         child_main(slot);
1834     }
1835
1836     if ((pid = fork()) == -1) {
1837         ap_log_error(APLOG_MARK, APLOG_ERR, errno, s,
1838                      "fork: Unable to fork new process");
1839
1840         /* fork didn't succeed.  There's no need to touch the scoreboard;
1841          * if we were trying to replace a failed child process, then
1842          * server_main_loop() marked its workers SERVER_DEAD, and if
1843          * we were trying to replace a child process that exited normally,
1844          * its worker_thread()s left SERVER_DEAD or SERVER_GRACEFUL behind.
1845          */
1846
1847         /* In case system resources are maxxed out, we don't want
1848            Apache running away with the CPU trying to fork over and
1849            over and over again. */
1850         apr_sleep(apr_time_from_sec(10));
1851
1852         return -1;
1853     }
1854
1855     if (!pid) {
1856 #ifdef HAVE_BINDPROCESSOR
1857         /* By default, AIX binds to a single processor.  This bit unbinds
1858          * children which will then bind to another CPU.
1859          */
1860         int status = bindprocessor(BINDPROCESS, (int) getpid(),
1861                                    PROCESSOR_CLASS_ANY);
1862         if (status != OK)
1863             ap_log_error(APLOG_MARK, APLOG_DEBUG, errno,
1864                          ap_server_conf,
1865                          "processor unbind failed");
1866 #endif
1867         RAISE_SIGSTOP(MAKE_CHILD);
1868
1869         apr_signal(SIGTERM, just_die);
1870         child_main(slot);
1871
1872         clean_child_exit(0);
1873     }
1874     /* else */
1875     if (ap_scoreboard_image->parent[slot].pid != 0) {
1876         /* This new child process is squatting on the scoreboard
1877          * entry owned by an exiting child process, which cannot
1878          * exit until all active requests complete.
1879          * Don't forget about this exiting child process, or we
1880          * won't be able to kill it if it doesn't exit by the
1881          * time the server is shut down.
1882          */
1883         ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
1884                      "taking over scoreboard slot from %" APR_PID_T_FMT "%s",
1885                      ap_scoreboard_image->parent[slot].pid,
1886                      ap_scoreboard_image->parent[slot].quiescing ?
1887                          " (quiescing)" : "");
1888         ap_register_extra_mpm_process(ap_scoreboard_image->parent[slot].pid);
1889     }
1890     ap_scoreboard_image->parent[slot].quiescing = 0;
1891     ap_scoreboard_image->parent[slot].pid = pid;
1892     return 0;
1893 }
1894
1895 /* start up a bunch of children */
1896 static void startup_children(int number_to_start)
1897 {
1898     int i;
1899
1900     for (i = 0; number_to_start && i < ap_daemons_limit; ++i) {
1901         if (ap_scoreboard_image->parent[i].pid != 0) {
1902             continue;
1903         }
1904         if (make_child(ap_server_conf, i) < 0) {
1905             break;
1906         }
1907         --number_to_start;
1908     }
1909 }
1910
1911
1912 /*
1913  * idle_spawn_rate is the number of children that will be spawned on the
1914  * next maintenance cycle if there aren't enough idle servers.  It is
1915  * doubled up to MAX_SPAWN_RATE, and reset only when a cycle goes by
1916  * without the need to spawn.
1917  */
1918 static int idle_spawn_rate = 1;
1919 #ifndef MAX_SPAWN_RATE
1920 #define MAX_SPAWN_RATE        (32)
1921 #endif
1922 static int hold_off_on_exponential_spawning;
1923
1924 static void perform_idle_server_maintenance(void)
1925 {
1926     int i, j;
1927     int idle_thread_count;
1928     worker_score *ws;
1929     process_score *ps;
1930     int free_length;
1931     int totally_free_length = 0;
1932     int free_slots[MAX_SPAWN_RATE];
1933     int last_non_dead;
1934     int total_non_dead;
1935     int active_thread_count = 0;
1936
1937     /* initialize the free_list */
1938     free_length = 0;
1939
1940     idle_thread_count = 0;
1941     last_non_dead = -1;
1942     total_non_dead = 0;
1943
1944     for (i = 0; i < ap_daemons_limit; ++i) {
1945         /* Initialization to satisfy the compiler. It doesn't know
1946          * that threads_per_child is always > 0 */
1947         int status = SERVER_DEAD;
1948         int any_dying_threads = 0;
1949         int any_dead_threads = 0;
1950         int all_dead_threads = 1;
1951
1952         if (i >= max_daemons_limit
1953             && totally_free_length == idle_spawn_rate)
1954             /* short cut if all active processes have been examined and
1955              * enough empty scoreboard slots have been found
1956              */
1957
1958             break;
1959         ps = &ap_scoreboard_image->parent[i];
1960         for (j = 0; j < threads_per_child; j++) {
1961             ws = &ap_scoreboard_image->servers[i][j];
1962             status = ws->status;
1963
1964             /* XXX any_dying_threads is probably no longer needed    GLA */
1965             any_dying_threads = any_dying_threads ||
1966                 (status == SERVER_GRACEFUL);
1967             any_dead_threads = any_dead_threads || (status == SERVER_DEAD);
1968             all_dead_threads = all_dead_threads &&
1969                 (status == SERVER_DEAD || status == SERVER_GRACEFUL);
1970
1971             /* We consider a starting server as idle because we started it
1972              * at least a cycle ago, and if it still hasn't finished starting
1973              * then we're just going to swamp things worse by forking more.
1974              * So we hopefully won't need to fork more if we count it.
1975              * This depends on the ordering of SERVER_READY and SERVER_STARTING.
1976              */
1977             if (ps->pid != 0) { /* XXX just set all_dead_threads in outer
1978                                    for loop if no pid?  not much else matters */
1979                 if (status <= SERVER_READY &&
1980                         !ps->quiescing && ps->generation == my_generation) {
1981                     ++idle_thread_count;
1982                 }
1983                 if (status >= SERVER_READY && status < SERVER_GRACEFUL) {
1984                     ++active_thread_count;
1985                 }
1986             }
1987         }
1988         if (any_dead_threads
1989             && totally_free_length < idle_spawn_rate
1990             && free_length < MAX_SPAWN_RATE
1991             && (!ps->pid      /* no process in the slot */
1992                   || ps->quiescing)) {  /* or at least one is going away */
1993             if (all_dead_threads) {
1994                 /* great! we prefer these, because the new process can
1995                  * start more threads sooner.  So prioritize this slot
1996                  * by putting it ahead of any slots with active threads.
1997                  *
1998                  * first, make room by moving a slot that's potentially still
1999                  * in use to the end of the array
2000                  */
2001                 free_slots[free_length] = free_slots[totally_free_length];
2002                 free_slots[totally_free_length++] = i;
2003             }
2004             else {
2005                 /* slot is still in use - back of the bus
2006                  */
2007                 free_slots[free_length] = i;
2008             }
2009             ++free_length;
2010         }
2011         /* XXX if (!ps->quiescing)     is probably more reliable  GLA */
2012         if (!any_dying_threads) {
2013             last_non_dead = i;
2014             ++total_non_dead;
2015         }
2016     }
2017
2018     if (sick_child_detected) {
2019         if (active_thread_count > 0) {
2020             /* some child processes appear to be working.  don't kill the
2021              * whole server.
2022              */
2023             sick_child_detected = 0;
2024         }
2025         else {
2026             /* looks like a basket case.  give up.
2027              */
2028             shutdown_pending = 1;
2029             child_fatal = 1;
2030             ap_log_error(APLOG_MARK, APLOG_ALERT, 0,
2031                          ap_server_conf,
2032                          "No active workers found..."
2033                          " Apache is exiting!");
2034             /* the child already logged the failure details */
2035             return;
2036         }
2037     }
2038
2039     max_daemons_limit = last_non_dead + 1;
2040
2041     if (idle_thread_count > max_spare_threads) {
2042         /* Kill off one child */
2043         ap_event_pod_signal(pod, TRUE);
2044         idle_spawn_rate = 1;
2045     }
2046     else if (idle_thread_count < min_spare_threads) {
2047         /* terminate the free list */
2048         if (free_length == 0) { /* scoreboard is full, can't fork */
2049
2050             if (active_thread_count >= ap_daemons_limit * threads_per_child) {
2051                 static int reported = 0;
2052                 if (!reported) {
2053                     /* only report this condition once */
2054                     ap_log_error(APLOG_MARK, APLOG_ERR, 0,
2055                                  ap_server_conf,
2056                                  "server reached MaxClients setting, consider"
2057                                  " raising the MaxClients setting");
2058                     reported = 1;
2059                 }
2060             }
2061             else {
2062                 ap_log_error(APLOG_MARK, APLOG_ERR, 0,
2063                              ap_server_conf,
2064                              "scoreboard is full, not at MaxClients");
2065             }
2066             idle_spawn_rate = 1;
2067         }
2068         else {
2069             if (free_length > idle_spawn_rate) {
2070                 free_length = idle_spawn_rate;
2071             }
2072             if (idle_spawn_rate >= 8) {
2073                 ap_log_error(APLOG_MARK, APLOG_INFO, 0,
2074                              ap_server_conf,
2075                              "server seems busy, (you may need "
2076                              "to increase StartServers, ThreadsPerChild "
2077                              "or Min/MaxSpareThreads), "
2078                              "spawning %d children, there are around %d idle "
2079                              "threads, and %d total children", free_length,
2080                              idle_thread_count, total_non_dead);
2081             }
2082             for (i = 0; i < free_length; ++i) {
2083                 make_child(ap_server_conf, free_slots[i]);
2084             }
2085             /* the next time around we want to spawn twice as many if this
2086              * wasn't good enough, but not if we've just done a graceful
2087              */
2088             if (hold_off_on_exponential_spawning) {
2089                 --hold_off_on_exponential_spawning;
2090             }
2091             else if (idle_spawn_rate < MAX_SPAWN_RATE) {
2092                 idle_spawn_rate *= 2;
2093             }
2094         }
2095     }
2096     else {
2097         idle_spawn_rate = 1;
2098     }
2099 }
2100
2101 static void server_main_loop(int remaining_children_to_start)
2102 {
2103     int child_slot;
2104     apr_exit_why_e exitwhy;
2105     int status, processed_status;
2106     apr_proc_t pid;
2107     int i;
2108
2109     while (!restart_pending && !shutdown_pending) {
2110         ap_wait_or_timeout(&exitwhy, &status, &pid, pconf, ap_server_conf);
2111
2112         if (pid.pid != -1) {
2113             processed_status = ap_process_child_status(&pid, exitwhy, status);
2114             if (processed_status == APEXIT_CHILDFATAL) {
2115                 shutdown_pending = 1;
2116                 child_fatal = 1;
2117                 return;
2118             }
2119             else if (processed_status == APEXIT_CHILDSICK) {
2120                 /* tell perform_idle_server_maintenance to check into this
2121                  * on the next timer pop
2122                  */
2123                 sick_child_detected = 1;
2124             }
2125             /* non-fatal death... note that it's gone in the scoreboard. */
2126             child_slot = ap_find_child_by_pid(&pid);
2127             if (child_slot >= 0) {
2128                 for (i = 0; i < threads_per_child; i++)
2129                     ap_update_child_status_from_indexes(child_slot, i,
2130                                                         SERVER_DEAD,
2131                                                         (request_rec *) NULL);
2132
2133                 ap_scoreboard_image->parent[child_slot].pid = 0;
2134                 ap_scoreboard_image->parent[child_slot].quiescing = 0;
2135                 if (processed_status == APEXIT_CHILDSICK) {
2136                     /* resource shortage, minimize the fork rate */
2137                     idle_spawn_rate = 1;
2138                 }
2139                 else if (remaining_children_to_start
2140                          && child_slot < ap_daemons_limit) {
2141                     /* we're still doing a 1-for-1 replacement of dead
2142                      * children with new children
2143                      */
2144                     make_child(ap_server_conf, child_slot);
2145                     --remaining_children_to_start;
2146                 }
2147             }
2148             else if (ap_unregister_extra_mpm_process(pid.pid) == 1) {
2149                 /* handled */
2150 #if APR_HAS_OTHER_CHILD
2151             }
2152             else if (apr_proc_other_child_alert(&pid, APR_OC_REASON_DEATH,
2153                                                 status) == 0) {
2154                 /* handled */
2155 #endif
2156             }
2157             else if (is_graceful) {
2158                 /* Great, we've probably just lost a slot in the
2159                  * scoreboard.  Somehow we don't know about this child.
2160                  */
2161                 ap_log_error(APLOG_MARK, APLOG_WARNING, 0,
2162                              ap_server_conf,
2163                              "long lost child came home! (pid %ld)",
2164                              (long) pid.pid);
2165             }
2166             /* Don't perform idle maintenance when a child dies,
2167              * only do it when there's a timeout.  Remember only a
2168              * finite number of children can die, and it's pretty
2169              * pathological for a lot to die suddenly.
2170              */
2171             continue;
2172         }
2173         else if (remaining_children_to_start) {
2174             /* we hit a 1 second timeout in which none of the previous
2175              * generation of children needed to be reaped... so assume
2176              * they're all done, and pick up the slack if any is left.
2177              */
2178             startup_children(remaining_children_to_start);
2179             remaining_children_to_start = 0;
2180             /* In any event we really shouldn't do the code below because
2181              * few of the servers we just started are in the IDLE state
2182              * yet, so we'd mistakenly create an extra server.
2183              */
2184             continue;
2185         }
2186
2187         perform_idle_server_maintenance();
2188     }
2189 }
2190
2191 static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
2192 {
2193     int remaining_children_to_start;
2194
2195     ap_log_pid(pconf, ap_pid_fname);
2196
2197     if (!is_graceful) {
2198         if (ap_run_pre_mpm(s->process->pool, SB_SHARED) != OK) {
2199             mpm_state = AP_MPMQ_STOPPING;
2200             return DONE;
2201         }
2202         /* fix the generation number in the global score; we just got a new,
2203          * cleared scoreboard
2204          */
2205         ap_scoreboard_image->global->running_generation = my_generation;
2206     }
2207
2208     set_signals();
2209     /* Don't thrash... */
2210     if (max_spare_threads < min_spare_threads + threads_per_child)
2211         max_spare_threads = min_spare_threads + threads_per_child;
2212
2213     /* If we're doing a graceful_restart then we're going to see a lot
2214      * of children exiting immediately when we get into the main loop
2215      * below (because we just sent them AP_SIG_GRACEFUL).  This happens pretty
2216      * rapidly... and for each one that exits we may start a new one, until
2217      * there are at least min_spare_threads idle threads, counting across
2218      * all children.  But we may be permitted to start more children than
2219      * that, so we'll just keep track of how many we're
2220      * supposed to start up without the 1 second penalty between each fork.
2221      */
2222     remaining_children_to_start = ap_daemons_to_start;
2223     if (remaining_children_to_start > ap_daemons_limit) {
2224         remaining_children_to_start = ap_daemons_limit;
2225     }
2226     if (!is_graceful) {
2227         startup_children(remaining_children_to_start);
2228         remaining_children_to_start = 0;
2229     }
2230     else {
2231         /* give the system some time to recover before kicking into
2232          * exponential mode */
2233         hold_off_on_exponential_spawning = 10;
2234     }
2235
2236     ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf,
2237                  "%s configured -- resuming normal operations",
2238                  ap_get_server_description());
2239     ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf,
2240                  "Server built: %s", ap_get_server_built());
2241     ap_log_command_line(plog, s);
2242
2243     restart_pending = shutdown_pending = 0;
2244     mpm_state = AP_MPMQ_RUNNING;
2245
2246     server_main_loop(remaining_children_to_start);
2247     mpm_state = AP_MPMQ_STOPPING;
2248
2249     if (shutdown_pending && !is_graceful) {
2250         /* Time to shut down:
2251          * Kill child processes, tell them to call child_exit, etc...
2252          */
2253         ap_event_pod_killpg(pod, ap_daemons_limit, FALSE);
2254         ap_reclaim_child_processes(1);  /* Start with SIGTERM */
2255
2256         if (!child_fatal) {
2257             /* cleanup pid file on normal shutdown */
2258             const char *pidfile = NULL;
2259             pidfile = ap_server_root_relative(pconf, ap_pid_fname);
2260             if (pidfile != NULL && unlink(pidfile) == 0)
2261                 ap_log_error(APLOG_MARK, APLOG_INFO, 0,
2262                              ap_server_conf,
2263                              "removed PID file %s (pid=%ld)",
2264                              pidfile, (long) getpid());
2265
2266             ap_log_error(APLOG_MARK, APLOG_NOTICE, 0,
2267                          ap_server_conf, "caught SIGTERM, shutting down");
2268         }
2269         return DONE;
2270     } else if (shutdown_pending) {
2271         /* Time to gracefully shut down:
2272          * Kill child processes, tell them to call child_exit, etc...
2273          */
2274         int active_children;
2275         int index;
2276         apr_time_t cutoff = 0;
2277
2278         /* Close our listeners, and then ask our children to do same */
2279         ap_close_listeners();
2280         ap_event_pod_killpg(pod, ap_daemons_limit, TRUE);
2281         ap_relieve_child_processes();
2282
2283         if (!child_fatal) {
2284             /* cleanup pid file on normal shutdown */
2285             const char *pidfile = NULL;
2286             pidfile = ap_server_root_relative (pconf, ap_pid_fname);
2287             if ( pidfile != NULL && unlink(pidfile) == 0)
2288                 ap_log_error(APLOG_MARK, APLOG_INFO, 0,
2289                              ap_server_conf,
2290                              "removed PID file %s (pid=%ld)",
2291                              pidfile, (long)getpid());
2292
2293             ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf,
2294                          "caught " AP_SIG_GRACEFUL_STOP_STRING
2295                          ", shutting down gracefully");
2296         }
2297
2298         if (ap_graceful_shutdown_timeout) {
2299             cutoff = apr_time_now() +
2300                      apr_time_from_sec(ap_graceful_shutdown_timeout);
2301         }
2302
2303         /* Don't really exit until each child has finished */
2304         shutdown_pending = 0;
2305         do {
2306             /* Pause for a second */
2307             apr_sleep(apr_time_from_sec(1));
2308
2309             /* Relieve any children which have now exited */
2310             ap_relieve_child_processes();
2311
2312             active_children = 0;
2313             for (index = 0; index < ap_daemons_limit; ++index) {
2314                 if (ap_mpm_safe_kill(MPM_CHILD_PID(index), 0) == APR_SUCCESS) {
2315                     active_children = 1;
2316                     /* Having just one child is enough to stay around */
2317                     break;
2318                 }
2319             }
2320         } while (!shutdown_pending && active_children &&
2321                  (!ap_graceful_shutdown_timeout || apr_time_now() < cutoff));
2322
2323         /* We might be here because we received SIGTERM, either
2324          * way, try and make sure that all of our processes are
2325          * really dead.
2326          */
2327         ap_event_pod_killpg(pod, ap_daemons_limit, FALSE);
2328         ap_reclaim_child_processes(1);
2329
2330         return DONE;
2331     }
2332
2333     /* we've been told to restart */
2334     apr_signal(SIGHUP, SIG_IGN);
2335
2336     if (one_process) {
2337         /* not worth thinking about */
2338         return DONE;
2339     }
2340
2341     /* advance to the next generation */
2342     /* XXX: we really need to make sure this new generation number isn't in
2343      * use by any of the children.
2344      */
2345     ++my_generation;
2346     ap_scoreboard_image->global->running_generation = my_generation;
2347
2348     if (is_graceful) {
2349         ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf,
2350                      AP_SIG_GRACEFUL_STRING
2351                      " received.  Doing graceful restart");
2352         /* wake up the children...time to die.  But we'll have more soon */
2353         ap_event_pod_killpg(pod, ap_daemons_limit, TRUE);
2354
2355
2356         /* This is mostly for debugging... so that we know what is still
2357          * gracefully dealing with existing request.
2358          */
2359
2360     }
2361     else {
2362         /* Kill 'em all.  Since the child acts the same on the parents SIGTERM
2363          * and a SIGHUP, we may as well use the same signal, because some user
2364          * pthreads are stealing signals from us left and right.
2365          */
2366         ap_event_pod_killpg(pod, ap_daemons_limit, FALSE);
2367
2368         ap_reclaim_child_processes(1);  /* Start with SIGTERM */
2369         ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf,
2370                      "SIGHUP received.  Attempting to restart");
2371     }
2372
2373     return OK;
2374 }
2375
2376 /* This really should be a post_config hook, but the error log is already
2377  * redirected by that point, so we need to do this in the open_logs phase.
2378  */
2379 static int event_open_logs(apr_pool_t * p, apr_pool_t * plog,
2380                            apr_pool_t * ptemp, server_rec * s)
2381 {
2382     int startup = 0;
2383     int level_flags = 0;
2384     apr_status_t rv;
2385
2386     pconf = p;
2387
2388     /* the reverse of pre_config, we want this only the first time around */
2389     if (retained->module_loads == 1) {
2390         startup = 1;
2391         level_flags |= APLOG_STARTUP;
2392     }
2393
2394     if ((num_listensocks = ap_setup_listeners(ap_server_conf)) < 1) {
2395         ap_log_error(APLOG_MARK, APLOG_ALERT | level_flags, 0,
2396                      (startup ? NULL : s),
2397                      "no listening sockets available, shutting down");
2398         return DONE;
2399     }
2400
2401     if (!one_process) {
2402         if ((rv = ap_event_pod_open(pconf, &pod))) {
2403             ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
2404                          (startup ? NULL : s),
2405                          "could not open pipe-of-death");
2406             return DONE;
2407         }
2408     }
2409     return OK;
2410 }
2411
2412 static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
2413                             apr_pool_t * ptemp)
2414 {
2415     int no_detach, debug, foreground;
2416     apr_status_t rv;
2417     const char *userdata_key = "mpm_event_module";
2418
2419     mpm_state = AP_MPMQ_STARTING;
2420
2421     debug = ap_exists_config_define("DEBUG");
2422
2423     if (debug) {
2424         foreground = one_process = 1;
2425         no_detach = 0;
2426     }
2427     else {
2428         one_process = ap_exists_config_define("ONE_PROCESS");
2429         no_detach = ap_exists_config_define("NO_DETACH");
2430         foreground = ap_exists_config_define("FOREGROUND");
2431     }
2432
2433     /* sigh, want this only the second time around */
2434     retained = ap_retained_data_get(userdata_key);
2435     if (!retained) {
2436         retained = ap_retained_data_create(userdata_key, sizeof(*retained));
2437     }
2438     ++retained->module_loads;
2439     if (retained->module_loads == 2) {
2440         is_graceful = 0;
2441         rv = apr_pollset_create(&event_pollset, 1, plog,
2442                                 APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY);
2443         if (rv != APR_SUCCESS) {
2444             ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL,
2445                          "Couldn't create a Thread Safe Pollset. "
2446                          "Is it supported on your platform?"
2447                          "Also check system or user limits!");
2448             return HTTP_INTERNAL_SERVER_ERROR;
2449         }
2450         apr_pollset_destroy(event_pollset);
2451
2452         if (!one_process && !foreground) {
2453             rv = apr_proc_detach(no_detach ? APR_PROC_DETACH_FOREGROUND
2454                                  : APR_PROC_DETACH_DAEMONIZE);
2455             if (rv != APR_SUCCESS) {
2456                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL,
2457                              "apr_proc_detach failed");
2458                 return HTTP_INTERNAL_SERVER_ERROR;
2459             }
2460         }
2461         parent_pid = ap_my_pid = getpid();
2462     }
2463
2464     ap_listen_pre_config();
2465     ap_daemons_to_start = DEFAULT_START_DAEMON;
2466     min_spare_threads = DEFAULT_MIN_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD;
2467     max_spare_threads = DEFAULT_MAX_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD;
2468     server_limit = DEFAULT_SERVER_LIMIT;
2469     thread_limit = DEFAULT_THREAD_LIMIT;
2470     ap_daemons_limit = server_limit;
2471     threads_per_child = DEFAULT_THREADS_PER_CHILD;
2472     max_clients = ap_daemons_limit * threads_per_child;
2473     ap_pid_fname = DEFAULT_PIDLOG;
2474     ap_max_requests_per_child = DEFAULT_MAX_REQUESTS_PER_CHILD;
2475     ap_extended_status = 0;
2476     ap_max_mem_free = APR_ALLOCATOR_MAX_FREE_UNLIMITED;
2477
2478     apr_cpystrn(ap_coredump_dir, ap_server_root, sizeof(ap_coredump_dir));
2479
2480     return OK;
2481 }
2482
2483 static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
2484                               apr_pool_t *ptemp, server_rec *s)
2485 {
2486     int startup = 0;
2487
2488     /* the reverse of pre_config, we want this only the first time around */
2489     if (retained->module_loads == 1) {
2490         startup = 1;
2491     }
2492
2493     if (server_limit > MAX_SERVER_LIMIT) {
2494         if (startup) {
2495             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2496                          "WARNING: ServerLimit of %d exceeds compile-time "
2497                          "limit of", server_limit);
2498             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2499                          " %d servers, decreasing to %d.",
2500                          MAX_SERVER_LIMIT, MAX_SERVER_LIMIT);
2501         } else {
2502             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2503                          "ServerLimit of %d exceeds compile-time limit "
2504                          "of %d, decreasing to match",
2505                          server_limit, MAX_SERVER_LIMIT);
2506         }
2507         server_limit = MAX_SERVER_LIMIT;
2508     }
2509     else if (server_limit < 1) {
2510         if (startup) {
2511             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2512                          "WARNING: ServerLimit of %d not allowed, "
2513                          "increasing to 1.", server_limit);
2514         } else {
2515             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2516                          "ServerLimit of %d not allowed, increasing to 1",
2517                          server_limit);
2518         }
2519         server_limit = 1;
2520     }
2521
2522     /* you cannot change ServerLimit across a restart; ignore
2523      * any such attempts
2524      */
2525     if (!retained->first_server_limit) {
2526         retained->first_server_limit = server_limit;
2527     }
2528     else if (server_limit != retained->first_server_limit) {
2529         /* don't need a startup console version here */
2530         ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2531                      "changing ServerLimit to %d from original value of %d "
2532                      "not allowed during restart",
2533                      server_limit, retained->first_server_limit);
2534         server_limit = retained->first_server_limit;
2535     }
2536
2537     if (thread_limit > MAX_THREAD_LIMIT) {
2538         if (startup) {
2539             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2540                          "WARNING: ThreadLimit of %d exceeds compile-time "
2541                          "limit of", thread_limit);
2542             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2543                          " %d threads, decreasing to %d.",
2544                          MAX_THREAD_LIMIT, MAX_THREAD_LIMIT);
2545         } else {
2546             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2547                          "ThreadLimit of %d exceeds compile-time limit "
2548                          "of %d, decreasing to match",
2549                          thread_limit, MAX_THREAD_LIMIT);
2550         }
2551         thread_limit = MAX_THREAD_LIMIT;
2552     }
2553     else if (thread_limit < 1) {
2554         if (startup) {
2555             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2556                          "WARNING: ThreadLimit of %d not allowed, "
2557                          "increasing to 1.", thread_limit);
2558         } else {
2559             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2560                          "ThreadLimit of %d not allowed, increasing to 1",
2561                          thread_limit);
2562         }
2563         thread_limit = 1;
2564     }
2565
2566     /* you cannot change ThreadLimit across a restart; ignore
2567      * any such attempts
2568      */
2569     if (!retained->first_thread_limit) {
2570         retained->first_thread_limit = thread_limit;
2571     }
2572     else if (thread_limit != retained->first_thread_limit) {
2573         /* don't need a startup console version here */
2574         ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2575                      "changing ThreadLimit to %d from original value of %d "
2576                      "not allowed during restart",
2577                      thread_limit, retained->first_thread_limit);
2578         thread_limit = retained->first_thread_limit;
2579     }
2580
2581     if (threads_per_child > thread_limit) {
2582         if (startup) {
2583             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2584                          "WARNING: ThreadsPerChild of %d exceeds ThreadLimit "
2585                          "of", threads_per_child);
2586             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2587                          " %d threads, decreasing to %d.",
2588                          thread_limit, thread_limit);
2589             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2590                          " To increase, please see the ThreadLimit "
2591                          "directive.");
2592         } else {
2593             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2594                          "ThreadsPerChild of %d exceeds ThreadLimit "
2595                          "of %d, decreasing to match",
2596                          threads_per_child, thread_limit);
2597         }
2598         threads_per_child = thread_limit;
2599     }
2600     else if (threads_per_child < 1) {
2601         if (startup) {
2602             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2603                          "WARNING: ThreadsPerChild of %d not allowed, "
2604                          "increasing to 1.", threads_per_child);
2605         } else {
2606             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2607                          "ThreadsPerChild of %d not allowed, increasing to 1",
2608                          threads_per_child);
2609         }
2610         threads_per_child = 1;
2611     }
2612
2613     if (max_clients < threads_per_child) {
2614         if (startup) {
2615             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2616                          "WARNING: MaxClients of %d is less than "
2617                          "ThreadsPerChild of", max_clients);
2618             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2619                          " %d, increasing to %d.  MaxClients must be at "
2620                          "least as large",
2621                          threads_per_child, threads_per_child);
2622             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2623                          " as the number of threads in a single server.");
2624         } else {
2625             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2626                          "MaxClients of %d is less than ThreadsPerChild "
2627                          "of %d, increasing to match",
2628                          max_clients, threads_per_child);
2629         }
2630         max_clients = threads_per_child;
2631     }
2632
2633     ap_daemons_limit = max_clients / threads_per_child;
2634
2635     if (max_clients % threads_per_child) {
2636         int tmp_max_clients = ap_daemons_limit * threads_per_child;
2637
2638         if (startup) {
2639             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2640                          "WARNING: MaxClients of %d is not an integer "
2641                          "multiple of", max_clients);
2642             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2643                          " ThreadsPerChild of %d, decreasing to nearest "
2644                          "multiple %d,", threads_per_child,
2645                          tmp_max_clients);
2646             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2647                          " for a maximum of %d servers.",
2648                          ap_daemons_limit);
2649         } else {
2650             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2651                          "MaxClients of %d is not an integer multiple of "
2652                          "ThreadsPerChild of %d, decreasing to nearest "
2653                          "multiple %d", max_clients, threads_per_child,
2654                          tmp_max_clients);
2655         }
2656         max_clients = tmp_max_clients;
2657     }
2658
2659     if (ap_daemons_limit > server_limit) {
2660         if (startup) {
2661             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2662                          "WARNING: MaxClients of %d would require %d "
2663                          "servers and ", max_clients, ap_daemons_limit);
2664             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2665                          " would exceed ServerLimit of %d, decreasing to %d.",
2666                          server_limit, server_limit * threads_per_child);
2667             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2668                          " To increase, please see the ServerLimit "
2669                          "directive.");
2670         } else {
2671             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2672                          "MaxClients of %d would require %d servers and "
2673                          "exceed ServerLimit of %d, decreasing to %d",
2674                          max_clients, ap_daemons_limit, server_limit,
2675                          server_limit * threads_per_child);
2676         }
2677         ap_daemons_limit = server_limit;
2678     }
2679
2680     /* ap_daemons_to_start > ap_daemons_limit checked in ap_mpm_run() */
2681     if (ap_daemons_to_start < 0) {
2682         if (startup) {
2683             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2684                          "WARNING: StartServers of %d not allowed, "
2685                          "increasing to 1.", ap_daemons_to_start);
2686         } else {
2687             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2688                          "StartServers of %d not allowed, increasing to 1",
2689                          ap_daemons_to_start);
2690         }
2691         ap_daemons_to_start = 1;
2692     }
2693
2694     if (min_spare_threads < 1) {
2695         if (startup) {
2696             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2697                          "WARNING: MinSpareThreads of %d not allowed, "
2698                          "increasing to 1", min_spare_threads);
2699             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2700                          " to avoid almost certain server failure.");
2701             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2702                          " Please read the documentation.");
2703         } else {
2704             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2705                          "MinSpareThreads of %d not allowed, increasing to 1",
2706                          min_spare_threads);
2707         }
2708         min_spare_threads = 1;
2709     }
2710
2711     /* max_spare_threads < min_spare_threads + threads_per_child
2712      * checked in ap_mpm_run()
2713      */
2714
2715     return OK;
2716 }
2717
2718 static void event_hooks(apr_pool_t * p)
2719 {
2720     /* Our open_logs hook function must run before the core's, or stderr
2721      * will be redirected to a file, and the messages won't print to the
2722      * console.
2723      */
2724     static const char *const aszSucc[] = { "core.c", NULL };
2725     one_process = 0;
2726
2727     ap_hook_open_logs(event_open_logs, NULL, aszSucc, APR_HOOK_REALLY_FIRST);
2728     /* we need to set the MPM state before other pre-config hooks use MPM query
2729      * to retrieve it, so register as REALLY_FIRST
2730      */
2731     ap_hook_pre_config(event_pre_config, NULL, NULL, APR_HOOK_REALLY_FIRST);
2732     ap_hook_check_config(event_check_config, NULL, NULL, APR_HOOK_MIDDLE);
2733     ap_hook_mpm(event_run, NULL, NULL, APR_HOOK_MIDDLE);
2734     ap_hook_mpm_query(event_query, NULL, NULL, APR_HOOK_MIDDLE);
2735     ap_hook_mpm_note_child_killed(event_note_child_killed, NULL, NULL, APR_HOOK_MIDDLE);
2736     ap_hook_mpm_register_timed_callback(event_register_timed_callback, NULL, NULL,
2737                                         APR_HOOK_MIDDLE);
2738     ap_hook_mpm_get_name(event_get_name, NULL, NULL, APR_HOOK_MIDDLE);
2739 }
2740
2741 static const char *set_daemons_to_start(cmd_parms *cmd, void *dummy,
2742                                         const char *arg)
2743 {
2744     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
2745     if (err != NULL) {
2746         return err;
2747     }
2748
2749     ap_daemons_to_start = atoi(arg);
2750     return NULL;
2751 }
2752
2753 static const char *set_min_spare_threads(cmd_parms * cmd, void *dummy,
2754                                          const char *arg)
2755 {
2756     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
2757     if (err != NULL) {
2758         return err;
2759     }
2760
2761     min_spare_threads = atoi(arg);
2762     return NULL;
2763 }
2764
2765 static const char *set_max_spare_threads(cmd_parms * cmd, void *dummy,
2766                                          const char *arg)
2767 {
2768     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
2769     if (err != NULL) {
2770         return err;
2771     }
2772
2773     max_spare_threads = atoi(arg);
2774     return NULL;
2775 }
2776
2777 static const char *set_max_clients(cmd_parms * cmd, void *dummy,
2778                                    const char *arg)
2779 {
2780     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
2781     if (err != NULL) {
2782         return err;
2783     }
2784
2785     max_clients = atoi(arg);
2786     return NULL;
2787 }
2788
2789 static const char *set_threads_per_child(cmd_parms * cmd, void *dummy,
2790                                          const char *arg)
2791 {
2792     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
2793     if (err != NULL) {
2794         return err;
2795     }
2796
2797     threads_per_child = atoi(arg);
2798     return NULL;
2799 }
2800 static const char *set_server_limit (cmd_parms *cmd, void *dummy, const char *arg)
2801 {
2802     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
2803     if (err != NULL) {
2804         return err;
2805     }
2806
2807     server_limit = atoi(arg);
2808     return NULL;
2809 }
2810
2811 static const char *set_thread_limit(cmd_parms * cmd, void *dummy,
2812                                     const char *arg)
2813 {
2814     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
2815     if (err != NULL) {
2816         return err;
2817     }
2818
2819     thread_limit = atoi(arg);
2820     return NULL;
2821 }
2822
2823 static const command_rec event_cmds[] = {
2824     LISTEN_COMMANDS,
2825     AP_INIT_TAKE1("StartServers", set_daemons_to_start, NULL, RSRC_CONF,
2826                   "Number of child processes launched at server startup"),
2827     AP_INIT_TAKE1("ServerLimit", set_server_limit, NULL, RSRC_CONF,
2828                   "Maximum number of child processes for this run of Apache"),
2829     AP_INIT_TAKE1("MinSpareThreads", set_min_spare_threads, NULL, RSRC_CONF,
2830                   "Minimum number of idle threads, to handle request spikes"),
2831     AP_INIT_TAKE1("MaxSpareThreads", set_max_spare_threads, NULL, RSRC_CONF,
2832                   "Maximum number of idle threads"),
2833     AP_INIT_TAKE1("MaxClients", set_max_clients, NULL, RSRC_CONF,
2834                   "Maximum number of threads alive at the same time"),
2835     AP_INIT_TAKE1("ThreadsPerChild", set_threads_per_child, NULL, RSRC_CONF,
2836                   "Number of threads each child creates"),
2837     AP_INIT_TAKE1("ThreadLimit", set_thread_limit, NULL, RSRC_CONF,
2838                   "Maximum number of worker threads per child process for this "
2839                   "run of Apache - Upper limit for ThreadsPerChild"),
2840     AP_GRACEFUL_SHUTDOWN_TIMEOUT_COMMAND,
2841     {NULL}
2842 };
2843
2844 AP_DECLARE_MODULE(mpm_event) = {
2845     MPM20_MODULE_STUFF,
2846     NULL,                       /* hook to run before apache parses args */
2847     NULL,                       /* create per-directory config structure */
2848     NULL,                       /* merge per-directory config structures */
2849     NULL,                       /* create per-server config structure */
2850     NULL,                       /* merge per-server config structures */
2851     event_cmds,                 /* command apr_table_t */
2852     event_hooks                 /* register_hooks */
2853 };