granicus.if.org Git - apache/blob - server/mpm/event/event.c

   1 /* Licensed to the Apache Software Foundation (ASF) under one or more
   2  * contributor license agreements.  See the NOTICE file distributed with
   3  * this work for additional information regarding copyright ownership.
   4  * The ASF licenses this file to You under the Apache License, Version 2.0
   5  * (the "License"); you may not use this file except in compliance with
   6  * the License.  You may obtain a copy of the License at
   7  *
   8  *     http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /**
  18  * This MPM tries to fix the 'keep alive problem' in HTTP.
  19  *
  20  * After a client completes the first request, the client can keep the
  21  * connection open to send more requests with the same socket.  This can save
  22  * signifigant overhead in creating TCP connections.  However, the major
  23  * disadvantage is that Apache traditionally keeps an entire child
  24  * process/thread waiting for data from the client.  To solve this problem,
  25  * this MPM has a dedicated thread for handling both the Listenting sockets,
  26  * and all sockets that are in a Keep Alive status.
  27  *
  28  * The MPM assumes the underlying apr_pollset implementation is somewhat
  29  * threadsafe.  This currently is only compatible with KQueue and EPoll.  This
  30  * enables the MPM to avoid extra high level locking or having to wake up the
  31  * listener thread when a keep-alive socket needs to be sent to it.
  32  *
  33  * This MPM not preform well on older platforms that do not have very good
  34  * threading, like Linux with a 2.4 kernel, but this does not matter, since we
  35  * require EPoll or KQueue.
  36  *
  37  * For FreeBSD, use 5.3.  It is possible to run this MPM on FreeBSD 5.2.1, if
  38  * you use libkse (see `man libmap.conf`).
  39  *
  40  * For NetBSD, use at least 2.0.
  41  *
  42  * For Linux, you should use a 2.6 kernel, and make sure your glibc has epoll
  43  * support compiled in.
  44  *
  45  */
  46
  47 #include "apr.h"
  48 #include "apr_portable.h"
  49 #include "apr_strings.h"
  50 #include "apr_file_io.h"
  51 #include "apr_thread_proc.h"
  52 #include "apr_signal.h"
  53 #include "apr_thread_mutex.h"
  54 #include "apr_poll.h"
  55 #include "apr_ring.h"
  56 #include "apr_queue.h"
  57 #define APR_WANT_STRFUNC
  58 #include "apr_want.h"
  59 #include "apr_version.h"
  60
  61 #if APR_HAVE_UNISTD_H
  62 #include <unistd.h>
  63 #endif
  64 #if APR_HAVE_SYS_SOCKET_H
  65 #include <sys/socket.h>
  66 #endif
  67 #if APR_HAVE_SYS_WAIT_H
  68 #include <sys/wait.h>
  69 #endif
  70 #ifdef HAVE_SYS_PROCESSOR_H
  71 #include <sys/processor.h>      /* for bindprocessor() */
  72 #endif
  73
  74 #if !APR_HAS_THREADS
  75 #error The Event MPM requires APR threads, but they are unavailable.
  76 #endif
  77
  78 #include "ap_config.h"
  79 #include "httpd.h"
  80 #include "http_main.h"
  81 #include "http_log.h"
  82 #include "http_config.h"        /* for read_config */
  83 #include "http_core.h"          /* for get_remote_host */
  84 #include "http_connection.h"
  85 #include "ap_mpm.h"
  86 #include "pod.h"
  87 #include "mpm_common.h"
  88 #include "ap_listen.h"
  89 #include "scoreboard.h"
  90 #include "fdqueue.h"
  91 #include "mpm_default.h"
  92 #include "http_vhost.h"
  93 #include "unixd.h"
  94
  95 #include <signal.h>
  96 #include <limits.h>             /* for INT_MAX */
  97
  98
  99 #if HAVE_SERF
 100 #include "mod_serf.h"
 101 #include "serf.h"
 102 #endif
 103
 104 /* Limit on the total --- clients will be locked out if more servers than
 105  * this are needed.  It is intended solely to keep the server from crashing
 106  * when things get out of hand.
 107  *
 108  * We keep a hard maximum number of servers, for two reasons --- first off,
 109  * in case something goes seriously wrong, we want to stop the fork bomb
 110  * short of actually crashing the machine we're running on by filling some
 111  * kernel table.  Secondly, it keeps the size of the scoreboard file small
 112  * enough that we can read the whole thing without worrying too much about
 113  * the overhead.
 114  */
 115 #ifndef DEFAULT_SERVER_LIMIT
 116 #define DEFAULT_SERVER_LIMIT 16
 117 #endif
 118
 119 /* Admin can't tune ServerLimit beyond MAX_SERVER_LIMIT.  We want
 120  * some sort of compile-time limit to help catch typos.
 121  */
 122 #ifndef MAX_SERVER_LIMIT
 123 #define MAX_SERVER_LIMIT 20000
 124 #endif
 125
 126 /* Limit on the threads per process.  Clients will be locked out if more than
 127  * this are needed.
 128  *
 129  * We keep this for one reason it keeps the size of the scoreboard file small
 130  * enough that we can read the whole thing without worrying too much about
 131  * the overhead.
 132  */
 133 #ifndef DEFAULT_THREAD_LIMIT
 134 #define DEFAULT_THREAD_LIMIT 64
 135 #endif
 136
 137 /* Admin can't tune ThreadLimit beyond MAX_THREAD_LIMIT.  We want
 138  * some sort of compile-time limit to help catch typos.
 139  */
 140 #ifndef MAX_THREAD_LIMIT
 141 #define MAX_THREAD_LIMIT 100000
 142 #endif
 143
 144 #define MPM_CHILD_PID(i) (ap_scoreboard_image->parent[i].pid)
 145
 146 #if !APR_VERSION_AT_LEAST(1,4,0)
 147 #define apr_time_from_msec(x) (x * 1000)
 148 #endif
 149
 150 /*
 151  * Actual definitions of config globals
 152  */
 153
 154 static int threads_per_child = 0;   /* Worker threads per child */
 155 static int ap_daemons_to_start = 0;
 156 static int min_spare_threads = 0;
 157 static int max_spare_threads = 0;
 158 static int ap_daemons_limit = 0;
 159 static int max_clients = 0;
 160 static int server_limit = 0;
 161 static int thread_limit = 0;
 162 static int dying = 0;
 163 static int workers_may_exit = 0;
 164 static int start_thread_may_exit = 0;
 165 static int listener_may_exit = 0;
 166 static int requests_this_child;
 167 static int num_listensocks = 0;
 168 static int resource_shortage = 0;
 169 static fd_queue_t *worker_queue;
 170 static fd_queue_info_t *worker_queue_info;
 171 static int mpm_state = AP_MPMQ_STARTING;
 172
 173 static apr_thread_mutex_t *timeout_mutex;
 174 APR_RING_HEAD(timeout_head_t, conn_state_t);
 175 static struct timeout_head_t timeout_head, keepalive_timeout_head;
 176
 177 static apr_pollset_t *event_pollset;
 178
 179 #if HAVE_SERF
 180 typedef struct {
 181     apr_pollset_t *pollset;
 182     apr_pool_t *pool;
 183 } s_baton_t;
 184
 185 static serf_context_t *g_serf;
 186 #endif
 187
 188 /* The structure used to pass unique initialization info to each thread */
 189 typedef struct
 190 {
 191     int pid;
 192     int tid;
 193     int sd;
 194 } proc_info;
 195
 196 /* Structure used to pass information to the thread responsible for
 197  * creating the rest of the threads.
 198  */
 199 typedef struct
 200 {
 201     apr_thread_t **threads;
 202     apr_thread_t *listener;
 203     int child_num_arg;
 204     apr_threadattr_t *threadattr;
 205 } thread_starter;
 206
 207 typedef enum
 208 {
 209     PT_CSD,
 210     PT_ACCEPT
 211 #if HAVE_SERF
 212     , PT_SERF
 213 #endif
 214 } poll_type_e;
 215
 216 typedef struct
 217 {
 218     poll_type_e type;
 219     int bypass_push;
 220     void *baton;
 221 } listener_poll_type;
 222
 223 /* data retained by event across load/unload of the module
 224  * allocated on first call to pre-config hook; located on
 225  * subsequent calls to pre-config hook
 226  */
 227 typedef struct event_retained_data {
 228     int first_server_limit;
 229     int first_thread_limit;
 230     int module_loads;
 231     int sick_child_detected;
 232     ap_generation_t my_generation;
 233     int volatile is_graceful; /* set from signal handler */
 234     int maxclients_reported;
 235     /*
 236      * The max child slot ever assigned, preserved across restarts.  Necessary
 237      * to deal with MaxClients changes across AP_SIG_GRACEFUL restarts.  We
 238      * use this value to optimize routines that have to scan the entire
 239      * scoreboard.
 240      */
 241     int max_daemons_limit;
 242     /*
 243      * idle_spawn_rate is the number of children that will be spawned on the
 244      * next maintenance cycle if there aren't enough idle servers.  It is
 245      * doubled up to MAX_SPAWN_RATE, and reset only when a cycle goes by
 246      * without the need to spawn.
 247      */
 248     int idle_spawn_rate;
 249 #ifndef MAX_SPAWN_RATE
 250 #define MAX_SPAWN_RATE        (32)
 251 #endif
 252     int hold_off_on_exponential_spawning;
 253 } event_retained_data;
 254 static event_retained_data *retained;
 255
 256 #define ID_FROM_CHILD_THREAD(c, t)    ((c * thread_limit) + t)
 257
 258 static ap_event_pod_t *pod;
 259
 260 /* The event MPM respects a couple of runtime flags that can aid
 261  * in debugging. Setting the -DNO_DETACH flag will prevent the root process
 262  * from detaching from its controlling terminal. Additionally, setting
 263  * the -DONE_PROCESS flag (which implies -DNO_DETACH) will get you the
 264  * child_main loop running in the process which originally started up.
 265  * This gives you a pretty nice debugging environment.  (You'll get a SIGHUP
 266  * early in standalone_main; just continue through.  This is the server
 267  * trying to kill off any child processes which it might have lying
 268  * around --- Apache doesn't keep track of their pids, it just sends
 269  * SIGHUP to the process group, ignoring it in the root process.
 270  * Continue through and you'll be fine.).
 271  */
 272
 273 static int one_process = 0;
 274
 275 #ifdef DEBUG_SIGSTOP
 276 int raise_sigstop_flags;
 277 #endif
 278
 279 static apr_pool_t *pconf;       /* Pool for config stuff */
 280 static apr_pool_t *pchild;      /* Pool for httpd child stuff */
 281
 282 static pid_t ap_my_pid;         /* Linux getpid() doesn't work except in main
 283                                    thread. Use this instead */
 284 static pid_t parent_pid;
 285 static apr_os_thread_t *listener_os_thread;
 286
 287 /* The LISTENER_SIGNAL signal will be sent from the main thread to the
 288  * listener thread to wake it up for graceful termination (what a child
 289  * process from an old generation does when the admin does "apachectl
 290  * graceful").  This signal will be blocked in all threads of a child
 291  * process except for the listener thread.
 292  */
 293 #define LISTENER_SIGNAL     SIGHUP
 294
 295 /* An array of socket descriptors in use by each thread used to
 296  * perform a non-graceful (forced) shutdown of the server.
 297  */
 298 static apr_socket_t **worker_sockets;
 299
 300 static void close_worker_sockets(void)
 301 {
 302     int i;
 303     for (i = 0; i < threads_per_child; i++) {
 304         if (worker_sockets[i]) {
 305             apr_socket_close(worker_sockets[i]);
 306             worker_sockets[i] = NULL;
 307         }
 308     }
 309 }
 310
 311 static void wakeup_listener(void)
 312 {
 313     listener_may_exit = 1;
 314     if (!listener_os_thread) {
 315         /* XXX there is an obscure path that this doesn't handle perfectly:
 316          *     right after listener thread is created but before
 317          *     listener_os_thread is set, the first worker thread hits an
 318          *     error and starts graceful termination
 319          */
 320         return;
 321     }
 322
 323     /* unblock the listener if it's waiting for a worker */
 324     ap_queue_info_term(worker_queue_info);
 325
 326     /*
 327      * we should just be able to "kill(ap_my_pid, LISTENER_SIGNAL)" on all
 328      * platforms and wake up the listener thread since it is the only thread
 329      * with SIGHUP unblocked, but that doesn't work on Linux
 330      */
 331 #ifdef HAVE_PTHREAD_KILL
 332     pthread_kill(*listener_os_thread, LISTENER_SIGNAL);
 333 #else
 334     kill(ap_my_pid, LISTENER_SIGNAL);
 335 #endif
 336 }
 337
 338 #define ST_INIT              0
 339 #define ST_GRACEFUL          1
 340 #define ST_UNGRACEFUL        2
 341
 342 static int terminate_mode = ST_INIT;
 343
 344 static void signal_threads(int mode)
 345 {
 346     if (terminate_mode == mode) {
 347         return;
 348     }
 349     terminate_mode = mode;
 350     mpm_state = AP_MPMQ_STOPPING;
 351
 352     /* in case we weren't called from the listener thread, wake up the
 353      * listener thread
 354      */
 355     wakeup_listener();
 356
 357     /* for ungraceful termination, let the workers exit now;
 358      * for graceful termination, the listener thread will notify the
 359      * workers to exit once it has stopped accepting new connections
 360      */
 361     if (mode == ST_UNGRACEFUL) {
 362         workers_may_exit = 1;
 363         ap_queue_interrupt_all(worker_queue);
 364         close_worker_sockets(); /* forcefully kill all current connections */
 365     }
 366 }
 367
 368 static int event_query(int query_code, int *result, apr_status_t *rv)
 369 {
 370     *rv = APR_SUCCESS;
 371     switch (query_code) {
 372     case AP_MPMQ_MAX_DAEMON_USED:
 373         *result = retained->max_daemons_limit;
 374         break;
 375     case AP_MPMQ_IS_THREADED:
 376         *result = AP_MPMQ_STATIC;
 377         break;
 378     case AP_MPMQ_IS_FORKED:
 379         *result = AP_MPMQ_DYNAMIC;
 380         break;
 381     case AP_MPMQ_IS_ASYNC:
 382         *result = 1;
 383         break;
 384     case AP_MPMQ_HAS_SERF:
 385         *result = 1;
 386         break;
 387     case AP_MPMQ_HARD_LIMIT_DAEMONS:
 388         *result = server_limit;
 389         break;
 390     case AP_MPMQ_HARD_LIMIT_THREADS:
 391         *result = thread_limit;
 392         break;
 393     case AP_MPMQ_MAX_THREADS:
 394         *result = threads_per_child;
 395         break;
 396     case AP_MPMQ_MIN_SPARE_DAEMONS:
 397         *result = 0;
 398         break;
 399     case AP_MPMQ_MIN_SPARE_THREADS:
 400         *result = min_spare_threads;
 401         break;
 402     case AP_MPMQ_MAX_SPARE_DAEMONS:
 403         *result = 0;
 404         break;
 405     case AP_MPMQ_MAX_SPARE_THREADS:
 406         *result = max_spare_threads;
 407         break;
 408     case AP_MPMQ_MAX_REQUESTS_DAEMON:
 409         *result = ap_max_requests_per_child;
 410         break;
 411     case AP_MPMQ_MAX_DAEMONS:
 412         *result = ap_daemons_limit;
 413         break;
 414     case AP_MPMQ_MPM_STATE:
 415         *result = mpm_state;
 416         break;
 417     case AP_MPMQ_GENERATION:
 418         *result = retained->my_generation;
 419         break;
 420     default:
 421         *rv = APR_ENOTIMPL;
 422         break;
 423     }
 424     return OK;
 425 }
 426
 427 static apr_status_t event_note_child_killed(int childnum)
 428 {
 429     ap_scoreboard_image->parent[childnum].pid = 0;
 430     return APR_SUCCESS;
 431 }
 432
 433 static const char *event_get_name(void)
 434 {
 435     return "event";
 436 }
 437
 438 /* a clean exit from a child with proper cleanup */
 439 static void clean_child_exit(int code) __attribute__ ((noreturn));
 440 static void clean_child_exit(int code)
 441 {
 442     mpm_state = AP_MPMQ_STOPPING;
 443     if (pchild) {
 444         apr_pool_destroy(pchild);
 445     }
 446     exit(code);
 447 }
 448
 449 static void just_die(int sig)
 450 {
 451     clean_child_exit(0);
 452 }
 453
 454 /*****************************************************************
 455  * Connection structures and accounting...
 456  */
 457
 458 static int child_fatal;
 459
 460 /* volatile because they're updated from a signal handler */
 461 static int volatile shutdown_pending;
 462 static int volatile restart_pending;
 463
 464 /*
 465  * ap_start_shutdown() and ap_start_restart(), below, are a first stab at
 466  * functions to initiate shutdown or restart without relying on signals.
 467  * Previously this was initiated in sig_term() and restart() signal handlers,
 468  * but we want to be able to start a shutdown/restart from other sources --
 469  * e.g. on Win32, from the service manager. Now the service manager can
 470  * call ap_start_shutdown() or ap_start_restart() as appropiate.  Note that
 471  * these functions can also be called by the child processes, since global
 472  * variables are no longer used to pass on the required action to the parent.
 473  *
 474  * These should only be called from the parent process itself, since the
 475  * parent process will use the shutdown_pending and restart_pending variables
 476  * to determine whether to shutdown or restart. The child process should
 477  * call signal_parent() directly to tell the parent to die -- this will
 478  * cause neither of those variable to be set, which the parent will
 479  * assume means something serious is wrong (which it will be, for the
 480  * child to force an exit) and so do an exit anyway.
 481  */
 482
 483 static void ap_start_shutdown(int graceful)
 484 {
 485     mpm_state = AP_MPMQ_STOPPING;
 486     if (shutdown_pending == 1) {
 487         /* Um, is this _probably_ not an error, if the user has
 488          * tried to do a shutdown twice quickly, so we won't
 489          * worry about reporting it.
 490          */
 491         return;
 492     }
 493     shutdown_pending = 1;
 494     retained->is_graceful = graceful;
 495 }
 496
 497 /* do a graceful restart if graceful == 1 */
 498 static void ap_start_restart(int graceful)
 499 {
 500     mpm_state = AP_MPMQ_STOPPING;
 501     if (restart_pending == 1) {
 502         /* Probably not an error - don't bother reporting it */
 503         return;
 504     }
 505     restart_pending = 1;
 506     retained->is_graceful = graceful;
 507 }
 508
 509 static void sig_term(int sig)
 510 {
 511     ap_start_shutdown(sig == AP_SIG_GRACEFUL_STOP);
 512 }
 513
 514 static void restart(int sig)
 515 {
 516     ap_start_restart(sig == AP_SIG_GRACEFUL);
 517 }
 518
 519 static void set_signals(void)
 520 {
 521 #ifndef NO_USE_SIGACTION
 522     struct sigaction sa;
 523 #endif
 524
 525     if (!one_process) {
 526         ap_fatal_signal_setup(ap_server_conf, pconf);
 527     }
 528
 529 #ifndef NO_USE_SIGACTION
 530     sigemptyset(&sa.sa_mask);
 531     sa.sa_flags = 0;
 532
 533     sa.sa_handler = sig_term;
 534     if (sigaction(SIGTERM, &sa, NULL) < 0)
 535         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 536                      "sigaction(SIGTERM)");
 537 #ifdef AP_SIG_GRACEFUL_STOP
 538     if (sigaction(AP_SIG_GRACEFUL_STOP, &sa, NULL) < 0)
 539         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 540                      "sigaction(" AP_SIG_GRACEFUL_STOP_STRING ")");
 541 #endif
 542 #ifdef SIGINT
 543     if (sigaction(SIGINT, &sa, NULL) < 0)
 544         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 545                      "sigaction(SIGINT)");
 546 #endif
 547 #ifdef SIGXCPU
 548     sa.sa_handler = SIG_DFL;
 549     if (sigaction(SIGXCPU, &sa, NULL) < 0)
 550         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 551                      "sigaction(SIGXCPU)");
 552 #endif
 553 #ifdef SIGXFSZ
 554     /* For systems following the LFS standard, ignoring SIGXFSZ allows
 555      * a write() beyond the 2GB limit to fail gracefully with E2BIG
 556      * rather than terminate the process. */
 557     sa.sa_handler = SIG_IGN;
 558     if (sigaction(SIGXFSZ, &sa, NULL) < 0)
 559         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 560                      "sigaction(SIGXFSZ)");
 561 #endif
 562 #ifdef SIGPIPE
 563     sa.sa_handler = SIG_IGN;
 564     if (sigaction(SIGPIPE, &sa, NULL) < 0)
 565         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 566                      "sigaction(SIGPIPE)");
 567 #endif
 568
 569     /* we want to ignore HUPs and AP_SIG_GRACEFUL while we're busy
 570      * processing one */
 571     sigaddset(&sa.sa_mask, SIGHUP);
 572     sigaddset(&sa.sa_mask, AP_SIG_GRACEFUL);
 573     sa.sa_handler = restart;
 574     if (sigaction(SIGHUP, &sa, NULL) < 0)
 575         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 576                      "sigaction(SIGHUP)");
 577     if (sigaction(AP_SIG_GRACEFUL, &sa, NULL) < 0)
 578         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 579                      "sigaction(" AP_SIG_GRACEFUL_STRING ")");
 580 #else
 581     if (!one_process) {
 582 #ifdef SIGXCPU
 583         apr_signal(SIGXCPU, SIG_DFL);
 584 #endif /* SIGXCPU */
 585 #ifdef SIGXFSZ
 586         apr_signal(SIGXFSZ, SIG_IGN);
 587 #endif /* SIGXFSZ */
 588     }
 589
 590     apr_signal(SIGTERM, sig_term);
 591 #ifdef SIGHUP
 592     apr_signal(SIGHUP, restart);
 593 #endif /* SIGHUP */
 594 #ifdef AP_SIG_GRACEFUL
 595     apr_signal(AP_SIG_GRACEFUL, restart);
 596 #endif /* AP_SIG_GRACEFUL */
 597 #ifdef AP_SIG_GRACEFUL_STOP
 598      apr_signal(AP_SIG_GRACEFUL_STOP, sig_term);
 599 #endif /* AP_SIG_GRACEFUL_STOP */
 600 #ifdef SIGPIPE
 601     apr_signal(SIGPIPE, SIG_IGN);
 602 #endif /* SIGPIPE */
 603
 604 #endif
 605 }
 606
 607 /*****************************************************************
 608  * Child process main loop.
 609  */
 610
 611 static int process_socket(apr_thread_t *thd, apr_pool_t * p, apr_socket_t * sock,
 612                           conn_state_t * cs, int my_child_num,
 613                           int my_thread_num)
 614 {
 615     conn_rec *c;
 616     listener_poll_type *pt;
 617     long conn_id = ID_FROM_CHILD_THREAD(my_child_num, my_thread_num);
 618     int rc;
 619     ap_sb_handle_t *sbh;
 620
 621     ap_create_sb_handle(&sbh, p, my_child_num, my_thread_num);
 622
 623     if (cs == NULL) {           /* This is a new connection */
 624
 625         cs = apr_pcalloc(p, sizeof(conn_state_t));
 626
 627         pt = apr_pcalloc(p, sizeof(*pt));
 628
 629         cs->bucket_alloc = apr_bucket_alloc_create(p);
 630         c = ap_run_create_connection(p, ap_server_conf, sock,
 631                                      conn_id, sbh, cs->bucket_alloc);
 632         c->current_thread = thd;
 633         cs->c = c;
 634         c->cs = cs;
 635         cs->p = p;
 636         cs->pfd.desc_type = APR_POLL_SOCKET;
 637         cs->pfd.reqevents = APR_POLLIN;
 638         cs->pfd.desc.s = sock;
 639         pt->type = PT_CSD;
 640         pt->bypass_push = 1;
 641         pt->baton = cs;
 642         cs->pfd.client_data = pt;
 643         APR_RING_ELEM_INIT(cs, timeout_list);
 644
 645         ap_update_vhost_given_ip(c);
 646
 647         rc = ap_run_pre_connection(c, sock);
 648         if (rc != OK && rc != DONE) {
 649             ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
 650                          "process_socket: connection aborted");
 651             c->aborted = 1;
 652         }
 653
 654         /**
 655          * XXX If the platform does not have a usable way of bundling
 656          * accept() with a socket readability check, like Win32,
 657          * and there are measurable delays before the
 658          * socket is readable due to the first data packet arriving,
 659          * it might be better to create the cs on the listener thread
 660          * with the state set to CONN_STATE_CHECK_REQUEST_LINE_READABLE
 661          *
 662          * FreeBSD users will want to enable the HTTP accept filter
 663          * module in their kernel for the highest performance
 664          * When the accept filter is active, sockets are kept in the
 665          * kernel until a HTTP request is received.
 666          */
 667         cs->state = CONN_STATE_READ_REQUEST_LINE;
 668
 669     }
 670     else {
 671         c = cs->c;
 672         c->sbh = sbh;
 673         pt = cs->pfd.client_data;
 674         c->current_thread = thd;
 675     }
 676
 677     if (c->clogging_input_filters && !c->aborted) {
 678         /* Since we have an input filter which 'cloggs' the input stream,
 679          * like mod_ssl, lets just do the normal read from input filters,
 680          * like the Worker MPM does.
 681          */
 682         ap_run_process_connection(c);
 683         if (cs->state != CONN_STATE_SUSPENDED) {
 684             cs->state = CONN_STATE_LINGER;
 685         }
 686     }
 687
 688 read_request:
 689     if (cs->state == CONN_STATE_READ_REQUEST_LINE) {
 690         if (!c->aborted) {
 691             ap_run_process_connection(c);
 692
 693             /* state will be updated upon return
 694              * fall thru to either wait for readability/timeout or
 695              * do lingering close
 696              */
 697         }
 698         else {
 699             cs->state = CONN_STATE_LINGER;
 700         }
 701     }
 702
 703     if (cs->state == CONN_STATE_WRITE_COMPLETION) {
 704         ap_filter_t *output_filter = c->output_filters;
 705         apr_status_t rv;
 706         while (output_filter->next != NULL) {
 707             output_filter = output_filter->next;
 708         }
 709         rv = output_filter->frec->filter_func.out_func(output_filter, NULL);
 710         if (rv != APR_SUCCESS) {
 711             ap_log_error(APLOG_MARK, APLOG_WARNING, rv, ap_server_conf,
 712                      "network write failure in core output filter");
 713             cs->state = CONN_STATE_LINGER;
 714         }
 715         else if (c->data_in_output_filters) {
 716             /* Still in WRITE_COMPLETION_STATE:
 717              * Set a write timeout for this connection, and let the
 718              * event thread poll for writeability.
 719              */
 720             cs->expiration_time = ap_server_conf->timeout + apr_time_now();
 721             apr_thread_mutex_lock(timeout_mutex);
 722             APR_RING_INSERT_TAIL(&timeout_head, cs, conn_state_t, timeout_list);
 723             apr_thread_mutex_unlock(timeout_mutex);
 724             pt->bypass_push = 0;
 725             cs->pfd.reqevents = APR_POLLOUT | APR_POLLHUP | APR_POLLERR;
 726             rc = apr_pollset_add(event_pollset, &cs->pfd);
 727             return 1;
 728         }
 729         else if (c->keepalive != AP_CONN_KEEPALIVE || c->aborted ||
 730             listener_may_exit) {
 731             c->cs->state = CONN_STATE_LINGER;
 732         }
 733         else if (c->data_in_input_filters) {
 734             cs->state = CONN_STATE_READ_REQUEST_LINE;
 735             goto read_request;
 736         }
 737         else {
 738             cs->state = CONN_STATE_CHECK_REQUEST_LINE_READABLE;
 739         }
 740     }
 741
 742     if (cs->state == CONN_STATE_LINGER) {
 743         ap_lingering_close(c);
 744         apr_pool_clear(p);
 745         ap_push_pool(worker_queue_info, p);
 746         return 0;
 747     }
 748     else if (cs->state == CONN_STATE_CHECK_REQUEST_LINE_READABLE) {
 749         apr_status_t rc;
 750         listener_poll_type *pt = (listener_poll_type *) cs->pfd.client_data;
 751
 752         /* It greatly simplifies the logic to use a single timeout value here
 753          * because the new element can just be added to the end of the list and
 754          * it will stay sorted in expiration time sequence.  If brand new
 755          * sockets are sent to the event thread for a readability check, this
 756          * will be a slight behavior change - they use the non-keepalive
 757          * timeout today.  With a normal client, the socket will be readable in
 758          * a few milliseconds anyway.
 759          */
 760         cs->expiration_time = ap_server_conf->keep_alive_timeout +
 761                               apr_time_now();
 762         apr_thread_mutex_lock(timeout_mutex);
 763         APR_RING_INSERT_TAIL(&keepalive_timeout_head, cs, conn_state_t, timeout_list);
 764         apr_thread_mutex_unlock(timeout_mutex);
 765
 766         pt->bypass_push = 0;
 767         /* Add work to pollset. */
 768         cs->pfd.reqevents = APR_POLLIN;
 769         rc = apr_pollset_add(event_pollset, &cs->pfd);
 770
 771         if (rc != APR_SUCCESS) {
 772             ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
 773                          "process_socket: apr_pollset_add failure");
 774             AP_DEBUG_ASSERT(rc == APR_SUCCESS);
 775         }
 776     }
 777     return 1;
 778 }
 779
 780 /* requests_this_child has gone to zero or below.  See if the admin coded
 781    "MaxConnectionsPerChild 0", and keep going in that case.  Doing it this way
 782    simplifies the hot path in worker_thread */
 783 static void check_infinite_requests(void)
 784 {
 785     if (ap_max_requests_per_child) {
 786         signal_threads(ST_GRACEFUL);
 787     }
 788     else {
 789         requests_this_child = INT_MAX;  /* keep going */
 790     }
 791 }
 792
 793 static void unblock_signal(int sig)
 794 {
 795     sigset_t sig_mask;
 796
 797     sigemptyset(&sig_mask);
 798     sigaddset(&sig_mask, sig);
 799 #if defined(SIGPROCMASK_SETS_THREAD_MASK)
 800     sigprocmask(SIG_UNBLOCK, &sig_mask, NULL);
 801 #else
 802     pthread_sigmask(SIG_UNBLOCK, &sig_mask, NULL);
 803 #endif
 804 }
 805
 806 static void dummy_signal_handler(int sig)
 807 {
 808     /* XXX If specifying SIG_IGN is guaranteed to unblock a syscall,
 809      *     then we don't need this goofy function.
 810      */
 811 }
 812
 813
 814 #if HAVE_SERF
 815 static apr_status_t s_socket_add(void *user_baton,
 816                                  apr_pollfd_t *pfd,
 817                                  void *serf_baton)
 818 {
 819     s_baton_t *s = (s_baton_t*)user_baton;
 820     /* XXXXX: recycle listener_poll_types */
 821     listener_poll_type *pt = malloc(sizeof(*pt));
 822     pt->type = PT_SERF;
 823     pt->baton = serf_baton;
 824     pfd->client_data = pt;
 825     return apr_pollset_add(s->pollset, pfd);
 826 }
 827
 828 static apr_status_t s_socket_remove(void *user_baton,
 829                                     apr_pollfd_t *pfd,
 830                                     void *serf_baton)
 831 {
 832     s_baton_t *s = (s_baton_t*)user_baton;
 833     listener_poll_type *pt = pfd->client_data;
 834     free(pt);
 835     return apr_pollset_remove(s->pollset, pfd);
 836 }
 837 #endif
 838
 839 static apr_status_t init_pollset(apr_pool_t *p)
 840 {
 841 #if HAVE_SERF
 842     s_baton_t *baton = NULL;
 843 #endif
 844     ap_listen_rec *lr;
 845     listener_poll_type *pt;
 846
 847     APR_RING_INIT(&timeout_head, conn_state_t, timeout_list);
 848     APR_RING_INIT(&keepalive_timeout_head, conn_state_t, timeout_list);
 849
 850     for (lr = ap_listeners; lr != NULL; lr = lr->next) {
 851         apr_pollfd_t *pfd = apr_palloc(p, sizeof(*pfd));
 852         pt = apr_pcalloc(p, sizeof(*pt));
 853         pfd->desc_type = APR_POLL_SOCKET;
 854         pfd->desc.s = lr->sd;
 855         pfd->reqevents = APR_POLLIN;
 856
 857         pt->type = PT_ACCEPT;
 858         pt->baton = lr;
 859
 860         pfd->client_data = pt;
 861
 862         apr_socket_opt_set(pfd->desc.s, APR_SO_NONBLOCK, 1);
 863         apr_pollset_add(event_pollset, pfd);
 864
 865         lr->accept_func = ap_unixd_accept;
 866     }
 867
 868 #if HAVE_SERF
 869     baton = apr_pcalloc(p, sizeof(*baton));
 870     baton->pollset = event_pollset;
 871     /* TODO: subpools, threads, reuse, etc.  -- currently use malloc() inside :( */
 872     baton->pool = p;
 873
 874     g_serf = serf_context_create_ex(baton,
 875                                     s_socket_add,
 876                                     s_socket_remove, p);
 877
 878     ap_register_provider(p, "mpm_serf",
 879                          "instance", "0", g_serf);
 880
 881 #endif
 882
 883     return APR_SUCCESS;
 884 }
 885
 886 static apr_status_t push_timer2worker(timer_event_t* te)
 887 {
 888     return ap_queue_push_timer(worker_queue, te);
 889 }
 890
 891 static apr_status_t push2worker(const apr_pollfd_t * pfd,
 892                                 apr_pollset_t * pollset)
 893 {
 894     listener_poll_type *pt = (listener_poll_type *) pfd->client_data;
 895     conn_state_t *cs = (conn_state_t *) pt->baton;
 896     apr_status_t rc;
 897
 898     if (pt->bypass_push) {
 899         return APR_SUCCESS;
 900     }
 901
 902     pt->bypass_push = 1;
 903
 904     rc = apr_pollset_remove(pollset, pfd);
 905
 906     /*
 907      * Some of the pollset backends, like KQueue or Epoll
 908      * automagically remove the FD if the socket is closed,
 909      * therefore, we can accept _SUCCESS or _NOTFOUND,
 910      * and we still want to keep going
 911      */
 912     if (rc != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rc)) {
 913         cs->state = CONN_STATE_LINGER;
 914     }
 915
 916     rc = ap_queue_push(worker_queue, cs->pfd.desc.s, cs, cs->p);
 917     if (rc != APR_SUCCESS) {
 918         /* trash the connection; we couldn't queue the connected
 919          * socket to a worker
 920          */
 921         apr_bucket_alloc_destroy(cs->bucket_alloc);
 922         apr_socket_close(cs->pfd.desc.s);
 923         ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
 924                      ap_server_conf, "push2worker: ap_queue_push failed");
 925         apr_pool_clear(cs->p);
 926         ap_push_pool(worker_queue_info, cs->p);
 927     }
 928
 929     return rc;
 930 }
 931
 932 /* get_worker:
 933  *     reserve a worker thread, block if all are currently busy.
 934  *     this prevents the worker queue from overflowing and lets
 935  *     other processes accept new connections in the mean time.
 936  */
 937 static int get_worker(int *have_idle_worker_p)
 938 {
 939     apr_status_t rc;
 940
 941     if (!*have_idle_worker_p) {
 942         rc = ap_queue_info_wait_for_idler(worker_queue_info);
 943
 944         if (rc == APR_SUCCESS) {
 945             *have_idle_worker_p = 1;
 946             return 1;
 947         }
 948         else {
 949             if (!APR_STATUS_IS_EOF(rc)) {
 950                 ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
 951                              "ap_queue_info_wait_for_idler failed.  "
 952                              "Attempting to shutdown process gracefully");
 953                 signal_threads(ST_GRACEFUL);
 954             }
 955             return 0;
 956         }
 957     }
 958     else {
 959         /* already reserved a worker thread - must have hit a
 960          * transient error on a previous pass
 961          */
 962         return 1;
 963     }
 964 }
 965
 966 /* XXXXXX: Convert to skiplist or other better data structure
 967  * (yes, this is VERY VERY VERY VERY BAD)
 968  */
 969
 970 /* Structures to reuse */
 971 static APR_RING_HEAD(timer_free_ring_t, timer_event_t) timer_free_ring;
 972 /* Active timers */
 973 static APR_RING_HEAD(timer_ring_t, timer_event_t) timer_ring;
 974
 975 static apr_thread_mutex_t *g_timer_ring_mtx;
 976
 977 static apr_status_t event_register_timed_callback(apr_time_t t,
 978                                                   ap_mpm_callback_fn_t *cbfn,
 979                                                   void *baton)
 980 {
 981     int inserted = 0;
 982     timer_event_t *ep;
 983     timer_event_t *te;
 984     /* oh yeah, and make locking smarter/fine grained. */
 985     apr_thread_mutex_lock(g_timer_ring_mtx);
 986
 987     if (!APR_RING_EMPTY(&timer_free_ring, timer_event_t, link)) {
 988         te = APR_RING_FIRST(&timer_free_ring);
 989         APR_RING_REMOVE(te, link);
 990     }
 991     else {
 992         /* XXXXX: lol, pool allocation without a context from any thread.Yeah. Right. MPMs Suck. */
 993         te = malloc(sizeof(timer_event_t));
 994         APR_RING_ELEM_INIT(te, link);
 995     }
 996
 997     te->cbfunc = cbfn;
 998     te->baton = baton;
 999     /* XXXXX: optimize */
1000     te->when = t + apr_time_now();
1001
1002     /* Okay, insert sorted by when.. */
1003     for (ep = APR_RING_FIRST(&timer_ring);
1004          ep != APR_RING_SENTINEL(&timer_ring,
1005                                  timer_event_t, link);
1006          ep = APR_RING_NEXT(ep, link))
1007     {
1008         if (ep->when > te->when) {
1009             inserted = 1;
1010             APR_RING_INSERT_BEFORE(ep, te, link);
1011             break;
1012         }
1013     }
1014
1015     if (!inserted) {
1016         APR_RING_INSERT_TAIL(&timer_ring, te, timer_event_t, link);
1017     }
1018
1019     apr_thread_mutex_unlock(g_timer_ring_mtx);
1020
1021     return APR_SUCCESS;
1022 }
1023
1024 static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
1025 {
1026     timer_event_t *ep;
1027     timer_event_t *te;
1028     apr_status_t rc;
1029     proc_info *ti = dummy;
1030     int process_slot = ti->pid;
1031     apr_pool_t *tpool = apr_thread_pool_get(thd);
1032     void *csd = NULL;
1033     apr_pool_t *ptrans;         /* Pool for per-transaction stuff */
1034     ap_listen_rec *lr;
1035     int have_idle_worker = 0;
1036     conn_state_t *cs;
1037     const apr_pollfd_t *out_pfd;
1038     apr_int32_t num = 0;
1039     apr_time_t time_now = 0;
1040     apr_interval_time_t timeout_interval;
1041     apr_time_t timeout_time;
1042     listener_poll_type *pt;
1043
1044     free(ti);
1045
1046     /* the following times out events that are really close in the future
1047      *   to prevent extra poll calls
1048      *
1049      * current value is .1 second
1050      */
1051 #define TIMEOUT_FUDGE_FACTOR 100000
1052 #define EVENT_FUDGE_FACTOR 10000
1053
1054     rc = init_pollset(tpool);
1055     if (rc != APR_SUCCESS) {
1056         ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
1057                      "failed to initialize pollset, "
1058                      "attempting to shutdown process gracefully");
1059         signal_threads(ST_GRACEFUL);
1060         return NULL;
1061     }
1062
1063     /* Unblock the signal used to wake this thread up, and set a handler for
1064      * it.
1065      */
1066     unblock_signal(LISTENER_SIGNAL);
1067     apr_signal(LISTENER_SIGNAL, dummy_signal_handler);
1068
1069     while (!listener_may_exit) {
1070
1071         if (requests_this_child <= 0) {
1072             check_infinite_requests();
1073         }
1074
1075
1076         {
1077             apr_time_t now = apr_time_now();
1078             apr_thread_mutex_lock(g_timer_ring_mtx);
1079
1080             if (!APR_RING_EMPTY(&timer_ring, timer_event_t, link)) {
1081                 te = APR_RING_FIRST(&timer_ring);
1082                 if (te->when > now) {
1083                     timeout_interval = te->when - now;
1084                 }
1085                 else {
1086                     timeout_interval = 1;
1087                 }
1088             }
1089             else {
1090                 timeout_interval = apr_time_from_msec(100);
1091             }
1092             apr_thread_mutex_unlock(g_timer_ring_mtx);
1093         }
1094
1095 #if HAVE_SERF
1096         rc = serf_context_prerun(g_serf);
1097         if (rc != APR_SUCCESS) {
1098             /* TOOD: what should do here? ugh. */
1099         }
1100
1101 #endif
1102         rc = apr_pollset_poll(event_pollset, timeout_interval, &num,
1103                               &out_pfd);
1104
1105         if (rc != APR_SUCCESS) {
1106             if (APR_STATUS_IS_EINTR(rc)) {
1107                 continue;
1108             }
1109             if (!APR_STATUS_IS_TIMEUP(rc)) {
1110                 ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
1111                              "apr_pollset_poll failed.  Attempting to "
1112                              "shutdown process gracefully");
1113                 signal_threads(ST_GRACEFUL);
1114             }
1115         }
1116
1117         if (listener_may_exit)
1118             break;
1119
1120         {
1121             apr_time_t now = apr_time_now();
1122             apr_thread_mutex_lock(g_timer_ring_mtx);
1123             for (ep = APR_RING_FIRST(&timer_ring);
1124                  ep != APR_RING_SENTINEL(&timer_ring,
1125                                          timer_event_t, link);
1126                  ep = APR_RING_FIRST(&timer_ring))
1127             {
1128                 if (ep->when < now + EVENT_FUDGE_FACTOR) {
1129                     APR_RING_REMOVE(ep, link);
1130                     push_timer2worker(ep);
1131                 }
1132                 else {
1133                     break;
1134                 }
1135             }
1136             apr_thread_mutex_unlock(g_timer_ring_mtx);
1137         }
1138
1139         while (num && get_worker(&have_idle_worker)) {
1140             pt = (listener_poll_type *) out_pfd->client_data;
1141             if (pt->type == PT_CSD) {
1142                 /* one of the sockets is readable */
1143                 cs = (conn_state_t *) pt->baton;
1144                 switch (cs->state) {
1145                 case CONN_STATE_CHECK_REQUEST_LINE_READABLE:
1146                     cs->state = CONN_STATE_READ_REQUEST_LINE;
1147                     break;
1148                 case CONN_STATE_WRITE_COMPLETION:
1149                     break;
1150                 default:
1151                     ap_log_error(APLOG_MARK, APLOG_ERR, rc,
1152                                  ap_server_conf,
1153                                  "event_loop: unexpected state %d",
1154                                  cs->state);
1155                     AP_DEBUG_ASSERT(0);
1156                 }
1157
1158                 apr_thread_mutex_lock(timeout_mutex);
1159                 APR_RING_REMOVE(cs, timeout_list);
1160                 apr_thread_mutex_unlock(timeout_mutex);
1161                 APR_RING_ELEM_INIT(cs, timeout_list);
1162
1163                 rc = push2worker(out_pfd, event_pollset);
1164                 if (rc != APR_SUCCESS) {
1165                     ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1166                                  ap_server_conf, "push2worker failed");
1167                 }
1168                 else {
1169                     have_idle_worker = 0;
1170                 }
1171             }
1172             else if (pt->type == PT_ACCEPT) {
1173                 /* A Listener Socket is ready for an accept() */
1174
1175                 lr = (ap_listen_rec *) pt->baton;
1176
1177                 ap_pop_pool(&ptrans, worker_queue_info);
1178
1179                 if (ptrans == NULL) {
1180                     /* create a new transaction pool for each accepted socket */
1181                     apr_allocator_t *allocator;
1182
1183                     apr_allocator_create(&allocator);
1184                     apr_allocator_max_free_set(allocator,
1185                                                ap_max_mem_free);
1186                     apr_pool_create_ex(&ptrans, pconf, NULL, allocator);
1187                     apr_allocator_owner_set(allocator, ptrans);
1188                     if (ptrans == NULL) {
1189                         ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1190                                      ap_server_conf,
1191                                      "Failed to create transaction pool");
1192                         signal_threads(ST_GRACEFUL);
1193                         return NULL;
1194                     }
1195                 }
1196                 apr_pool_tag(ptrans, "transaction");
1197
1198                 rc = lr->accept_func(&csd, lr, ptrans);
1199
1200                 /* later we trash rv and rely on csd to indicate
1201                  * success/failure
1202                  */
1203                 AP_DEBUG_ASSERT(rc == APR_SUCCESS || !csd);
1204
1205                 if (rc == APR_EGENERAL) {
1206                     /* E[NM]FILE, ENOMEM, etc */
1207                     resource_shortage = 1;
1208                     signal_threads(ST_GRACEFUL);
1209                 }
1210
1211                 if (csd != NULL) {
1212                     rc = ap_queue_push(worker_queue, csd, NULL, ptrans);
1213                     if (rc != APR_SUCCESS) {
1214                         /* trash the connection; we couldn't queue the connected
1215                          * socket to a worker
1216                          */
1217                         apr_socket_close(csd);
1218                         ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1219                                      ap_server_conf,
1220                                      "ap_queue_push failed");
1221                         apr_pool_clear(ptrans);
1222                         ap_push_pool(worker_queue_info, ptrans);
1223                     }
1224                     else {
1225                         have_idle_worker = 0;
1226                     }
1227                 }
1228                 else {
1229                     apr_pool_clear(ptrans);
1230                     ap_push_pool(worker_queue_info, ptrans);
1231                 }
1232             }               /* if:else on pt->type */
1233 #if HAVE_SERF
1234             else if (pt->type == PT_SERF) {
1235                 /* send socket to serf. */
1236                 /* XXXX: this doesn't require get_worker(&have_idle_worker) */
1237                 serf_event_trigger(g_serf, pt->baton, out_pfd);
1238             }
1239 #endif
1240             out_pfd++;
1241             num--;
1242         }                   /* while for processing poll */
1243
1244         /* XXX possible optimization: stash the current time for use as
1245          * r->request_time for new requests
1246          */
1247         time_now = apr_time_now();
1248
1249         /* handle timed out sockets */
1250         apr_thread_mutex_lock(timeout_mutex);
1251
1252         /* Step 1: keepalive timeouts */
1253         cs = APR_RING_FIRST(&keepalive_timeout_head);
1254         timeout_time = time_now + TIMEOUT_FUDGE_FACTOR;
1255         while (!APR_RING_EMPTY(&keepalive_timeout_head, conn_state_t, timeout_list)
1256                && cs->expiration_time < timeout_time) {
1257
1258             cs->state = CONN_STATE_LINGER;
1259
1260             APR_RING_REMOVE(cs, timeout_list);
1261             apr_thread_mutex_unlock(timeout_mutex);
1262
1263             if (!get_worker(&have_idle_worker)) {
1264                 apr_thread_mutex_lock(timeout_mutex);
1265                 APR_RING_INSERT_HEAD(&keepalive_timeout_head, cs,
1266                                      conn_state_t, timeout_list);
1267                 break;
1268             }
1269
1270             rc = push2worker(&cs->pfd, event_pollset);
1271
1272             if (rc != APR_SUCCESS) {
1273                 return NULL;
1274                 /* XXX return NULL looks wrong - not an init failure
1275                  * that bypasses all the cleanup outside the main loop
1276                  * break seems more like it
1277                  * need to evaluate seriousness of push2worker failures
1278                  */
1279             }
1280             have_idle_worker = 0;
1281             apr_thread_mutex_lock(timeout_mutex);
1282             cs = APR_RING_FIRST(&keepalive_timeout_head);
1283         }
1284
1285         /* Step 2: write completion timeouts */
1286         cs = APR_RING_FIRST(&timeout_head);
1287         while (!APR_RING_EMPTY(&timeout_head, conn_state_t, timeout_list)
1288                && cs->expiration_time < timeout_time) {
1289
1290             cs->state = CONN_STATE_LINGER;
1291             APR_RING_REMOVE(cs, timeout_list);
1292             apr_thread_mutex_unlock(timeout_mutex);
1293
1294             if (!get_worker(&have_idle_worker)) {
1295                 apr_thread_mutex_lock(timeout_mutex);
1296                 APR_RING_INSERT_HEAD(&timeout_head, cs,
1297                                      conn_state_t, timeout_list);
1298                 break;
1299             }
1300
1301             rc = push2worker(&cs->pfd, event_pollset);
1302             if (rc != APR_SUCCESS) {
1303                 return NULL;
1304             }
1305             have_idle_worker = 0;
1306             apr_thread_mutex_lock(timeout_mutex);
1307             cs = APR_RING_FIRST(&timeout_head);
1308         }
1309
1310         apr_thread_mutex_unlock(timeout_mutex);
1311
1312     }     /* listener main loop */
1313
1314     ap_close_listeners();
1315     ap_queue_term(worker_queue);
1316     dying = 1;
1317     ap_scoreboard_image->parent[process_slot].quiescing = 1;
1318
1319     /* wake up the main thread */
1320     kill(ap_my_pid, SIGTERM);
1321
1322     apr_thread_exit(thd, APR_SUCCESS);
1323     return NULL;
1324 }
1325
1326 /* XXX For ungraceful termination/restart, we definitely don't want to
1327  *     wait for active connections to finish but we may want to wait
1328  *     for idle workers to get out of the queue code and release mutexes,
1329  *     since those mutexes are cleaned up pretty soon and some systems
1330  *     may not react favorably (i.e., segfault) if operations are attempted
1331  *     on cleaned-up mutexes.
1332  */
1333 static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy)
1334 {
1335     proc_info *ti = dummy;
1336     int process_slot = ti->pid;
1337     int thread_slot = ti->tid;
1338     apr_socket_t *csd = NULL;
1339     conn_state_t *cs;
1340     apr_pool_t *ptrans;         /* Pool for per-transaction stuff */
1341     apr_status_t rv;
1342     int is_idle = 0;
1343     timer_event_t *te = NULL;
1344
1345     free(ti);
1346
1347     ap_scoreboard_image->servers[process_slot][thread_slot].pid = ap_my_pid;
1348     ap_scoreboard_image->servers[process_slot][thread_slot].tid = apr_os_thread_current();
1349     ap_scoreboard_image->servers[process_slot][thread_slot].generation = retained->my_generation;
1350     ap_update_child_status_from_indexes(process_slot, thread_slot,
1351                                         SERVER_STARTING, NULL);
1352
1353     while (!workers_may_exit) {
1354         if (!is_idle) {
1355             rv = ap_queue_info_set_idle(worker_queue_info, NULL);
1356             if (rv != APR_SUCCESS) {
1357                 ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf,
1358                              "ap_queue_info_set_idle failed. Attempting to "
1359                              "shutdown process gracefully.");
1360                 signal_threads(ST_GRACEFUL);
1361                 break;
1362             }
1363             is_idle = 1;
1364         }
1365
1366         ap_update_child_status_from_indexes(process_slot, thread_slot,
1367                                             SERVER_READY, NULL);
1368       worker_pop:
1369         if (workers_may_exit) {
1370             break;
1371         }
1372
1373         te = NULL;
1374
1375         rv = ap_queue_pop_something(worker_queue, &csd, &cs, &ptrans, &te);
1376
1377         if (rv != APR_SUCCESS) {
1378             /* We get APR_EOF during a graceful shutdown once all the
1379              * connections accepted by this server process have been handled.
1380              */
1381             if (APR_STATUS_IS_EOF(rv)) {
1382                 break;
1383             }
1384             /* We get APR_EINTR whenever ap_queue_pop() has been interrupted
1385              * from an explicit call to ap_queue_interrupt_all(). This allows
1386              * us to unblock threads stuck in ap_queue_pop() when a shutdown
1387              * is pending.
1388              *
1389              * If workers_may_exit is set and this is ungraceful termination/
1390              * restart, we are bound to get an error on some systems (e.g.,
1391              * AIX, which sanity-checks mutex operations) since the queue
1392              * may have already been cleaned up.  Don't log the "error" if
1393              * workers_may_exit is set.
1394              */
1395             else if (APR_STATUS_IS_EINTR(rv)) {
1396                 goto worker_pop;
1397             }
1398             /* We got some other error. */
1399             else if (!workers_may_exit) {
1400                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
1401                              "ap_queue_pop failed");
1402             }
1403             continue;
1404         }
1405         if (te != NULL) {
1406
1407             te->cbfunc(te->baton);
1408
1409             {
1410                 apr_thread_mutex_lock(g_timer_ring_mtx);
1411                 APR_RING_INSERT_TAIL(&timer_free_ring, te, timer_event_t, link);
1412                 apr_thread_mutex_unlock(g_timer_ring_mtx);
1413             }
1414         }
1415         else {
1416             is_idle = 0;
1417             worker_sockets[thread_slot] = csd;
1418             rv = process_socket(thd, ptrans, csd, cs, process_slot, thread_slot);
1419             if (!rv) {
1420                 requests_this_child--;
1421             }
1422             worker_sockets[thread_slot] = NULL;
1423         }
1424     }
1425
1426     ap_update_child_status_from_indexes(process_slot, thread_slot,
1427                                         (dying) ? SERVER_DEAD :
1428                                         SERVER_GRACEFUL,
1429                                         (request_rec *) NULL);
1430
1431     apr_thread_exit(thd, APR_SUCCESS);
1432     return NULL;
1433 }
1434
1435 static int check_signal(int signum)
1436 {
1437     switch (signum) {
1438     case SIGTERM:
1439     case SIGINT:
1440         return 1;
1441     }
1442     return 0;
1443 }
1444
1445
1446
1447 static void create_listener_thread(thread_starter * ts)
1448 {
1449     int my_child_num = ts->child_num_arg;
1450     apr_threadattr_t *thread_attr = ts->threadattr;
1451     proc_info *my_info;
1452     apr_status_t rv;
1453
1454     my_info = (proc_info *) malloc(sizeof(proc_info));
1455     my_info->pid = my_child_num;
1456     my_info->tid = -1;          /* listener thread doesn't have a thread slot */
1457     my_info->sd = 0;
1458     rv = apr_thread_create(&ts->listener, thread_attr, listener_thread,
1459                            my_info, pchild);
1460     if (rv != APR_SUCCESS) {
1461         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
1462                      "apr_thread_create: unable to create listener thread");
1463         /* let the parent decide how bad this really is */
1464         clean_child_exit(APEXIT_CHILDSICK);
1465     }
1466     apr_os_thread_get(&listener_os_thread, ts->listener);
1467 }
1468
1469 /* XXX under some circumstances not understood, children can get stuck
1470  *     in start_threads forever trying to take over slots which will
1471  *     never be cleaned up; for now there is an APLOG_DEBUG message issued
1472  *     every so often when this condition occurs
1473  */
1474 static void *APR_THREAD_FUNC start_threads(apr_thread_t * thd, void *dummy)
1475 {
1476     thread_starter *ts = dummy;
1477     apr_thread_t **threads = ts->threads;
1478     apr_threadattr_t *thread_attr = ts->threadattr;
1479     int child_num_arg = ts->child_num_arg;
1480     int my_child_num = child_num_arg;
1481     proc_info *my_info;
1482     apr_status_t rv;
1483     int i;
1484     int threads_created = 0;
1485     int listener_started = 0;
1486     int loops;
1487     int prev_threads_created;
1488
1489     /* We must create the fd queues before we start up the listener
1490      * and worker threads. */
1491     worker_queue = apr_pcalloc(pchild, sizeof(*worker_queue));
1492     rv = ap_queue_init(worker_queue, threads_per_child, pchild);
1493     if (rv != APR_SUCCESS) {
1494         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
1495                      "ap_queue_init() failed");
1496         clean_child_exit(APEXIT_CHILDFATAL);
1497     }
1498
1499     rv = ap_queue_info_create(&worker_queue_info, pchild,
1500                               threads_per_child);
1501     if (rv != APR_SUCCESS) {
1502         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
1503                      "ap_queue_info_create() failed");
1504         clean_child_exit(APEXIT_CHILDFATAL);
1505     }
1506
1507     /* Create the timeout mutex and main pollset before the listener
1508      * thread starts.
1509      */
1510     rv = apr_thread_mutex_create(&timeout_mutex, APR_THREAD_MUTEX_DEFAULT,
1511                                  pchild);
1512     if (rv != APR_SUCCESS) {
1513         ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf,
1514                      "creation of the timeout mutex failed.");
1515         clean_child_exit(APEXIT_CHILDFATAL);
1516     }
1517
1518     /* Create the main pollset */
1519     rv = apr_pollset_create(&event_pollset,
1520                             threads_per_child,
1521                             pchild, APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY);
1522     if (rv != APR_SUCCESS) {
1523         ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf,
1524                      "apr_pollset_create with Thread Safety failed.");
1525         clean_child_exit(APEXIT_CHILDFATAL);
1526     }
1527
1528     worker_sockets = apr_pcalloc(pchild, threads_per_child
1529                                  * sizeof(apr_socket_t *));
1530
1531     loops = prev_threads_created = 0;
1532     while (1) {
1533         /* threads_per_child does not include the listener thread */
1534         for (i = 0; i < threads_per_child; i++) {
1535             int status =
1536                 ap_scoreboard_image->servers[child_num_arg][i].status;
1537
1538             if (status != SERVER_GRACEFUL && status != SERVER_DEAD) {
1539                 continue;
1540             }
1541
1542             my_info = (proc_info *) malloc(sizeof(proc_info));
1543             if (my_info == NULL) {
1544                 ap_log_error(APLOG_MARK, APLOG_ALERT, errno, ap_server_conf,
1545                              "malloc: out of memory");
1546                 clean_child_exit(APEXIT_CHILDFATAL);
1547             }
1548             my_info->pid = my_child_num;
1549             my_info->tid = i;
1550             my_info->sd = 0;
1551
1552             /* We are creating threads right now */
1553             ap_update_child_status_from_indexes(my_child_num, i,
1554                                                 SERVER_STARTING, NULL);
1555             /* We let each thread update its own scoreboard entry.  This is
1556              * done because it lets us deal with tid better.
1557              */
1558             rv = apr_thread_create(&threads[i], thread_attr,
1559                                    worker_thread, my_info, pchild);
1560             if (rv != APR_SUCCESS) {
1561                 ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
1562                              "apr_thread_create: unable to create worker thread");
1563                 /* let the parent decide how bad this really is */
1564                 clean_child_exit(APEXIT_CHILDSICK);
1565             }
1566             threads_created++;
1567         }
1568
1569         /* Start the listener only when there are workers available */
1570         if (!listener_started && threads_created) {
1571             create_listener_thread(ts);
1572             listener_started = 1;
1573         }
1574
1575
1576         if (start_thread_may_exit || threads_created == threads_per_child) {
1577             break;
1578         }
1579         /* wait for previous generation to clean up an entry */
1580         apr_sleep(apr_time_from_sec(1));
1581         ++loops;
1582         if (loops % 120 == 0) { /* every couple of minutes */
1583             if (prev_threads_created == threads_created) {
1584                 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
1585                              "child %" APR_PID_T_FMT " isn't taking over "
1586                              "slots very quickly (%d of %d)",
1587                              ap_my_pid, threads_created,
1588                              threads_per_child);
1589             }
1590             prev_threads_created = threads_created;
1591         }
1592     }
1593
1594     /* What state should this child_main process be listed as in the
1595      * scoreboard...?
1596      *  ap_update_child_status_from_indexes(my_child_num, i, SERVER_STARTING,
1597      *                                      (request_rec *) NULL);
1598      *
1599      *  This state should be listed separately in the scoreboard, in some kind
1600      *  of process_status, not mixed in with the worker threads' status.
1601      *  "life_status" is almost right, but it's in the worker's structure, and
1602      *  the name could be clearer.   gla
1603      */
1604     apr_thread_exit(thd, APR_SUCCESS);
1605     return NULL;
1606 }
1607
1608 static void join_workers(apr_thread_t * listener, apr_thread_t ** threads)
1609 {
1610     int i;
1611     apr_status_t rv, thread_rv;
1612
1613     if (listener) {
1614         int iter;
1615
1616         /* deal with a rare timing window which affects waking up the
1617          * listener thread...  if the signal sent to the listener thread
1618          * is delivered between the time it verifies that the
1619          * listener_may_exit flag is clear and the time it enters a
1620          * blocking syscall, the signal didn't do any good...  work around
1621          * that by sleeping briefly and sending it again
1622          */
1623
1624         iter = 0;
1625         while (iter < 10 &&
1626 #ifdef HAVE_PTHREAD_KILL
1627                pthread_kill(*listener_os_thread, 0)
1628 #else
1629                kill(ap_my_pid, 0)
1630 #endif
1631                == 0) {
1632             /* listener not dead yet */
1633             apr_sleep(apr_time_make(0, 500000));
1634             wakeup_listener();
1635             ++iter;
1636         }
1637         if (iter >= 10) {
1638             ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
1639                          "the listener thread didn't exit");
1640         }
1641         else {
1642             rv = apr_thread_join(&thread_rv, listener);
1643             if (rv != APR_SUCCESS) {
1644                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
1645                              "apr_thread_join: unable to join listener thread");
1646             }
1647         }
1648     }
1649
1650     for (i = 0; i < threads_per_child; i++) {
1651         if (threads[i]) {       /* if we ever created this thread */
1652             rv = apr_thread_join(&thread_rv, threads[i]);
1653             if (rv != APR_SUCCESS) {
1654                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
1655                              "apr_thread_join: unable to join worker "
1656                              "thread %d", i);
1657             }
1658         }
1659     }
1660 }
1661
1662 static void join_start_thread(apr_thread_t * start_thread_id)
1663 {
1664     apr_status_t rv, thread_rv;
1665
1666     start_thread_may_exit = 1;  /* tell it to give up in case it is still
1667                                  * trying to take over slots from a
1668                                  * previous generation
1669                                  */
1670     rv = apr_thread_join(&thread_rv, start_thread_id);
1671     if (rv != APR_SUCCESS) {
1672         ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
1673                      "apr_thread_join: unable to join the start " "thread");
1674     }
1675 }
1676
1677 static void child_main(int child_num_arg)
1678 {
1679     apr_thread_t **threads;
1680     apr_status_t rv;
1681     thread_starter *ts;
1682     apr_threadattr_t *thread_attr;
1683     apr_thread_t *start_thread_id;
1684
1685     mpm_state = AP_MPMQ_STARTING;       /* for benefit of any hooks that run as this
1686                                          * child initializes
1687                                          */
1688     ap_my_pid = getpid();
1689     ap_fatal_signal_child_setup(ap_server_conf);
1690     apr_pool_create(&pchild, pconf);
1691
1692     /*stuff to do before we switch id's, so we have permissions. */
1693     ap_reopen_scoreboard(pchild, NULL, 0);
1694
1695     if (ap_run_drop_privileges(pchild, ap_server_conf)) {
1696         clean_child_exit(APEXIT_CHILDFATAL);
1697     }
1698
1699     apr_thread_mutex_create(&g_timer_ring_mtx, APR_THREAD_MUTEX_DEFAULT, pchild);
1700     APR_RING_INIT(&timer_free_ring, timer_event_t, link);
1701     APR_RING_INIT(&timer_ring, timer_event_t, link);
1702
1703     ap_run_child_init(pchild, ap_server_conf);
1704
1705     /* done with init critical section */
1706
1707     /* Just use the standard apr_setup_signal_thread to block all signals
1708      * from being received.  The child processes no longer use signals for
1709      * any communication with the parent process.
1710      */
1711     rv = apr_setup_signal_thread();
1712     if (rv != APR_SUCCESS) {
1713         ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf,
1714                      "Couldn't initialize signal thread");
1715         clean_child_exit(APEXIT_CHILDFATAL);
1716     }
1717
1718     if (ap_max_requests_per_child) {
1719         requests_this_child = ap_max_requests_per_child;
1720     }
1721     else {
1722         /* coding a value of zero means infinity */
1723         requests_this_child = INT_MAX;
1724     }
1725
1726     /* Setup worker threads */
1727
1728     /* clear the storage; we may not create all our threads immediately,
1729      * and we want a 0 entry to indicate a thread which was not created
1730      */
1731     threads = (apr_thread_t **) calloc(1,
1732                                        sizeof(apr_thread_t *) *
1733                                        threads_per_child);
1734     if (threads == NULL) {
1735         ap_log_error(APLOG_MARK, APLOG_ALERT, errno, ap_server_conf,
1736                      "malloc: out of memory");
1737         clean_child_exit(APEXIT_CHILDFATAL);
1738     }
1739
1740     ts = (thread_starter *) apr_palloc(pchild, sizeof(*ts));
1741
1742     apr_threadattr_create(&thread_attr, pchild);
1743     /* 0 means PTHREAD_CREATE_JOINABLE */
1744     apr_threadattr_detach_set(thread_attr, 0);
1745
1746     if (ap_thread_stacksize != 0) {
1747         apr_threadattr_stacksize_set(thread_attr, ap_thread_stacksize);
1748     }
1749
1750     ts->threads = threads;
1751     ts->listener = NULL;
1752     ts->child_num_arg = child_num_arg;
1753     ts->threadattr = thread_attr;
1754
1755     rv = apr_thread_create(&start_thread_id, thread_attr, start_threads,
1756                            ts, pchild);
1757     if (rv != APR_SUCCESS) {
1758         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
1759                      "apr_thread_create: unable to create worker thread");
1760         /* let the parent decide how bad this really is */
1761         clean_child_exit(APEXIT_CHILDSICK);
1762     }
1763
1764     mpm_state = AP_MPMQ_RUNNING;
1765
1766     /* If we are only running in one_process mode, we will want to
1767      * still handle signals. */
1768     if (one_process) {
1769         /* Block until we get a terminating signal. */
1770         apr_signal_thread(check_signal);
1771         /* make sure the start thread has finished; signal_threads()
1772          * and join_workers() depend on that
1773          */
1774         /* XXX join_start_thread() won't be awakened if one of our
1775          *     threads encounters a critical error and attempts to
1776          *     shutdown this child
1777          */
1778         join_start_thread(start_thread_id);
1779
1780         /* helps us terminate a little more quickly than the dispatch of the
1781          * signal thread; beats the Pipe of Death and the browsers
1782          */
1783         signal_threads(ST_UNGRACEFUL);
1784
1785         /* A terminating signal was received. Now join each of the
1786          * workers to clean them up.
1787          *   If the worker already exited, then the join frees
1788          *   their resources and returns.
1789          *   If the worker hasn't exited, then this blocks until
1790          *   they have (then cleans up).
1791          */
1792         join_workers(ts->listener, threads);
1793     }
1794     else {                      /* !one_process */
1795         /* remove SIGTERM from the set of blocked signals...  if one of
1796          * the other threads in the process needs to take us down
1797          * (e.g., for MaxConnectionsPerChild) it will send us SIGTERM
1798          */
1799         unblock_signal(SIGTERM);
1800         apr_signal(SIGTERM, dummy_signal_handler);
1801         /* Watch for any messages from the parent over the POD */
1802         while (1) {
1803             rv = ap_event_pod_check(pod);
1804             if (rv == AP_NORESTART) {
1805                 /* see if termination was triggered while we slept */
1806                 switch (terminate_mode) {
1807                 case ST_GRACEFUL:
1808                     rv = AP_GRACEFUL;
1809                     break;
1810                 case ST_UNGRACEFUL:
1811                     rv = AP_RESTART;
1812                     break;
1813                 }
1814             }
1815             if (rv == AP_GRACEFUL || rv == AP_RESTART) {
1816                 /* make sure the start thread has finished;
1817                  * signal_threads() and join_workers depend on that
1818                  */
1819                 join_start_thread(start_thread_id);
1820                 signal_threads(rv ==
1821                                AP_GRACEFUL ? ST_GRACEFUL : ST_UNGRACEFUL);
1822                 break;
1823             }
1824         }
1825
1826         /* A terminating signal was received. Now join each of the
1827          * workers to clean them up.
1828          *   If the worker already exited, then the join frees
1829          *   their resources and returns.
1830          *   If the worker hasn't exited, then this blocks until
1831          *   they have (then cleans up).
1832          */
1833         join_workers(ts->listener, threads);
1834     }
1835
1836     free(threads);
1837
1838     clean_child_exit(resource_shortage ? APEXIT_CHILDSICK : 0);
1839 }
1840
1841 static int make_child(server_rec * s, int slot)
1842 {
1843     int pid;
1844
1845     if (slot + 1 > retained->max_daemons_limit) {
1846         retained->max_daemons_limit = slot + 1;
1847     }
1848
1849     if (one_process) {
1850         set_signals();
1851         ap_scoreboard_image->parent[slot].pid = getpid();
1852         child_main(slot);
1853         /* NOTREACHED */
1854     }
1855
1856     if ((pid = fork()) == -1) {
1857         ap_log_error(APLOG_MARK, APLOG_ERR, errno, s,
1858                      "fork: Unable to fork new process");
1859
1860         /* fork didn't succeed.  There's no need to touch the scoreboard;
1861          * if we were trying to replace a failed child process, then
1862          * server_main_loop() marked its workers SERVER_DEAD, and if
1863          * we were trying to replace a child process that exited normally,
1864          * its worker_thread()s left SERVER_DEAD or SERVER_GRACEFUL behind.
1865          */
1866
1867         /* In case system resources are maxxed out, we don't want
1868            Apache running away with the CPU trying to fork over and
1869            over and over again. */
1870         apr_sleep(apr_time_from_sec(10));
1871
1872         return -1;
1873     }
1874
1875     if (!pid) {
1876 #ifdef HAVE_BINDPROCESSOR
1877         /* By default, AIX binds to a single processor.  This bit unbinds
1878          * children which will then bind to another CPU.
1879          */
1880         int status = bindprocessor(BINDPROCESS, (int) getpid(),
1881                                    PROCESSOR_CLASS_ANY);
1882         if (status != OK)
1883             ap_log_error(APLOG_MARK, APLOG_DEBUG, errno,
1884                          ap_server_conf,
1885                          "processor unbind failed");
1886 #endif
1887         RAISE_SIGSTOP(MAKE_CHILD);
1888
1889         apr_signal(SIGTERM, just_die);
1890         child_main(slot);
1891         /* NOTREACHED */
1892     }
1893     /* else */
1894     if (ap_scoreboard_image->parent[slot].pid != 0) {
1895         /* This new child process is squatting on the scoreboard
1896          * entry owned by an exiting child process, which cannot
1897          * exit until all active requests complete.
1898          * Don't forget about this exiting child process, or we
1899          * won't be able to kill it if it doesn't exit by the
1900          * time the server is shut down.
1901          */
1902         ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
1903                      "taking over scoreboard slot from %" APR_PID_T_FMT "%s",
1904                      ap_scoreboard_image->parent[slot].pid,
1905                      ap_scoreboard_image->parent[slot].quiescing ?
1906                          " (quiescing)" : "");
1907         ap_register_extra_mpm_process(ap_scoreboard_image->parent[slot].pid);
1908     }
1909     ap_scoreboard_image->parent[slot].quiescing = 0;
1910     ap_scoreboard_image->parent[slot].pid = pid;
1911     return 0;
1912 }
1913
1914 /* start up a bunch of children */
1915 static void startup_children(int number_to_start)
1916 {
1917     int i;
1918
1919     for (i = 0; number_to_start && i < ap_daemons_limit; ++i) {
1920         if (ap_scoreboard_image->parent[i].pid != 0) {
1921             continue;
1922         }
1923         if (make_child(ap_server_conf, i) < 0) {
1924             break;
1925         }
1926         --number_to_start;
1927     }
1928 }
1929
1930 static void perform_idle_server_maintenance(void)
1931 {
1932     int i, j;
1933     int idle_thread_count;
1934     worker_score *ws;
1935     process_score *ps;
1936     int free_length;
1937     int totally_free_length = 0;
1938     int free_slots[MAX_SPAWN_RATE];
1939     int last_non_dead;
1940     int total_non_dead;
1941     int active_thread_count = 0;
1942
1943     /* initialize the free_list */
1944     free_length = 0;
1945
1946     idle_thread_count = 0;
1947     last_non_dead = -1;
1948     total_non_dead = 0;
1949
1950     for (i = 0; i < ap_daemons_limit; ++i) {
1951         /* Initialization to satisfy the compiler. It doesn't know
1952          * that threads_per_child is always > 0 */
1953         int status = SERVER_DEAD;
1954         int any_dying_threads = 0;
1955         int any_dead_threads = 0;
1956         int all_dead_threads = 1;
1957
1958         if (i >= retained->max_daemons_limit
1959             && totally_free_length == retained->idle_spawn_rate)
1960             /* short cut if all active processes have been examined and
1961              * enough empty scoreboard slots have been found
1962              */
1963
1964             break;
1965         ps = &ap_scoreboard_image->parent[i];
1966         for (j = 0; j < threads_per_child; j++) {
1967             ws = &ap_scoreboard_image->servers[i][j];
1968             status = ws->status;
1969
1970             /* XXX any_dying_threads is probably no longer needed    GLA */
1971             any_dying_threads = any_dying_threads ||
1972                 (status == SERVER_GRACEFUL);
1973             any_dead_threads = any_dead_threads || (status == SERVER_DEAD);
1974             all_dead_threads = all_dead_threads &&
1975                 (status == SERVER_DEAD || status == SERVER_GRACEFUL);
1976
1977             /* We consider a starting server as idle because we started it
1978              * at least a cycle ago, and if it still hasn't finished starting
1979              * then we're just going to swamp things worse by forking more.
1980              * So we hopefully won't need to fork more if we count it.
1981              * This depends on the ordering of SERVER_READY and SERVER_STARTING.
1982              */
1983             if (ps->pid != 0) { /* XXX just set all_dead_threads in outer
1984                                    for loop if no pid?  not much else matters */
1985                 if (status <= SERVER_READY &&
1986                         !ps->quiescing && ps->generation == retained->my_generation) {
1987                     ++idle_thread_count;
1988                 }
1989                 if (status >= SERVER_READY && status < SERVER_GRACEFUL) {
1990                     ++active_thread_count;
1991                 }
1992             }
1993         }
1994         if (any_dead_threads
1995             && totally_free_length < retained->idle_spawn_rate
1996             && free_length < MAX_SPAWN_RATE
1997             && (!ps->pid      /* no process in the slot */
1998                   || ps->quiescing)) {  /* or at least one is going away */
1999             if (all_dead_threads) {
2000                 /* great! we prefer these, because the new process can
2001                  * start more threads sooner.  So prioritize this slot
2002                  * by putting it ahead of any slots with active threads.
2003                  *
2004                  * first, make room by moving a slot that's potentially still
2005                  * in use to the end of the array
2006                  */
2007                 free_slots[free_length] = free_slots[totally_free_length];
2008                 free_slots[totally_free_length++] = i;
2009             }
2010             else {
2011                 /* slot is still in use - back of the bus
2012                  */
2013                 free_slots[free_length] = i;
2014             }
2015             ++free_length;
2016         }
2017         /* XXX if (!ps->quiescing)     is probably more reliable  GLA */
2018         if (!any_dying_threads) {
2019             last_non_dead = i;
2020             ++total_non_dead;
2021         }
2022     }
2023
2024     if (retained->sick_child_detected) {
2025         if (active_thread_count > 0) {
2026             /* some child processes appear to be working.  don't kill the
2027              * whole server.
2028              */
2029             retained->sick_child_detected = 0;
2030         }
2031         else {
2032             /* looks like a basket case.  give up.
2033              */
2034             shutdown_pending = 1;
2035             child_fatal = 1;
2036             ap_log_error(APLOG_MARK, APLOG_ALERT, 0,
2037                          ap_server_conf,
2038                          "No active workers found..."
2039                          " Apache is exiting!");
2040             /* the child already logged the failure details */
2041             return;
2042         }
2043     }
2044
2045     retained->max_daemons_limit = last_non_dead + 1;
2046
2047     if (idle_thread_count > max_spare_threads) {
2048         /* Kill off one child */
2049         ap_event_pod_signal(pod, TRUE);
2050         retained->idle_spawn_rate = 1;
2051     }
2052     else if (idle_thread_count < min_spare_threads) {
2053         /* terminate the free list */
2054         if (free_length == 0) { /* scoreboard is full, can't fork */
2055
2056             if (active_thread_count >= ap_daemons_limit * threads_per_child) {
2057                 if (!retained->maxclients_reported) {
2058                     /* only report this condition once */
2059                     ap_log_error(APLOG_MARK, APLOG_ERR, 0,
2060                                  ap_server_conf,
2061                                  "server reached MaxClients setting, consider"
2062                                  " raising the MaxClients setting");
2063                     retained->maxclients_reported = 1;
2064                 }
2065             }
2066             else {
2067                 ap_log_error(APLOG_MARK, APLOG_ERR, 0,
2068                              ap_server_conf,
2069                              "scoreboard is full, not at MaxClients");
2070             }
2071             retained->idle_spawn_rate = 1;
2072         }
2073         else {
2074             if (free_length > retained->idle_spawn_rate) {
2075                 free_length = retained->idle_spawn_rate;
2076             }
2077             if (retained->idle_spawn_rate >= 8) {
2078                 ap_log_error(APLOG_MARK, APLOG_INFO, 0,
2079                              ap_server_conf,
2080                              "server seems busy, (you may need "
2081                              "to increase StartServers, ThreadsPerChild "
2082                              "or Min/MaxSpareThreads), "
2083                              "spawning %d children, there are around %d idle "
2084                              "threads, and %d total children", free_length,
2085                              idle_thread_count, total_non_dead);
2086             }
2087             for (i = 0; i < free_length; ++i) {
2088                 make_child(ap_server_conf, free_slots[i]);
2089             }
2090             /* the next time around we want to spawn twice as many if this
2091              * wasn't good enough, but not if we've just done a graceful
2092              */
2093             if (retained->hold_off_on_exponential_spawning) {
2094                 --retained->hold_off_on_exponential_spawning;
2095             }
2096             else if (retained->idle_spawn_rate < MAX_SPAWN_RATE) {
2097                 retained->idle_spawn_rate *= 2;
2098             }
2099         }
2100     }
2101     else {
2102         retained->idle_spawn_rate = 1;
2103     }
2104 }
2105
2106 static void server_main_loop(int remaining_children_to_start)
2107 {
2108     int child_slot;
2109     apr_exit_why_e exitwhy;
2110     int status, processed_status;
2111     apr_proc_t pid;
2112     int i;
2113
2114     while (!restart_pending && !shutdown_pending) {
2115         ap_wait_or_timeout(&exitwhy, &status, &pid, pconf, ap_server_conf);
2116
2117         if (pid.pid != -1) {
2118             processed_status = ap_process_child_status(&pid, exitwhy, status);
2119             if (processed_status == APEXIT_CHILDFATAL) {
2120                 shutdown_pending = 1;
2121                 child_fatal = 1;
2122                 return;
2123             }
2124             else if (processed_status == APEXIT_CHILDSICK) {
2125                 /* tell perform_idle_server_maintenance to check into this
2126                  * on the next timer pop
2127                  */
2128                 retained->sick_child_detected = 1;
2129             }
2130             /* non-fatal death... note that it's gone in the scoreboard. */
2131             child_slot = ap_find_child_by_pid(&pid);
2132             if (child_slot >= 0) {
2133                 for (i = 0; i < threads_per_child; i++)
2134                     ap_update_child_status_from_indexes(child_slot, i,
2135                                                         SERVER_DEAD,
2136                                                         (request_rec *) NULL);
2137
2138                 ap_scoreboard_image->parent[child_slot].pid = 0;
2139                 ap_scoreboard_image->parent[child_slot].quiescing = 0;
2140                 if (processed_status == APEXIT_CHILDSICK) {
2141                     /* resource shortage, minimize the fork rate */
2142                     retained->idle_spawn_rate = 1;
2143                 }
2144                 else if (remaining_children_to_start
2145                          && child_slot < ap_daemons_limit) {
2146                     /* we're still doing a 1-for-1 replacement of dead
2147                      * children with new children
2148                      */
2149                     make_child(ap_server_conf, child_slot);
2150                     --remaining_children_to_start;
2151                 }
2152             }
2153             else if (ap_unregister_extra_mpm_process(pid.pid) == 1) {
2154                 /* handled */
2155 #if APR_HAS_OTHER_CHILD
2156             }
2157             else if (apr_proc_other_child_alert(&pid, APR_OC_REASON_DEATH,
2158                                                 status) == 0) {
2159                 /* handled */
2160 #endif
2161             }
2162             else if (retained->is_graceful) {
2163                 /* Great, we've probably just lost a slot in the
2164                  * scoreboard.  Somehow we don't know about this child.
2165                  */
2166                 ap_log_error(APLOG_MARK, APLOG_WARNING, 0,
2167                              ap_server_conf,
2168                              "long lost child came home! (pid %ld)",
2169                              (long) pid.pid);
2170             }
2171             /* Don't perform idle maintenance when a child dies,
2172              * only do it when there's a timeout.  Remember only a
2173              * finite number of children can die, and it's pretty
2174              * pathological for a lot to die suddenly.
2175              */
2176             continue;
2177         }
2178         else if (remaining_children_to_start) {
2179             /* we hit a 1 second timeout in which none of the previous
2180              * generation of children needed to be reaped... so assume
2181              * they're all done, and pick up the slack if any is left.
2182              */
2183             startup_children(remaining_children_to_start);
2184             remaining_children_to_start = 0;
2185             /* In any event we really shouldn't do the code below because
2186              * few of the servers we just started are in the IDLE state
2187              * yet, so we'd mistakenly create an extra server.
2188              */
2189             continue;
2190         }
2191
2192         perform_idle_server_maintenance();
2193     }
2194 }
2195
2196 static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
2197 {
2198     int remaining_children_to_start;
2199
2200     ap_log_pid(pconf, ap_pid_fname);
2201
2202     if (!retained->is_graceful) {
2203         if (ap_run_pre_mpm(s->process->pool, SB_SHARED) != OK) {
2204             mpm_state = AP_MPMQ_STOPPING;
2205             return DONE;
2206         }
2207         /* fix the generation number in the global score; we just got a new,
2208          * cleared scoreboard
2209          */
2210         ap_scoreboard_image->global->running_generation = retained->my_generation;
2211     }
2212
2213     set_signals();
2214     /* Don't thrash... */
2215     if (max_spare_threads < min_spare_threads + threads_per_child)
2216         max_spare_threads = min_spare_threads + threads_per_child;
2217
2218     /* If we're doing a graceful_restart then we're going to see a lot
2219      * of children exiting immediately when we get into the main loop
2220      * below (because we just sent them AP_SIG_GRACEFUL).  This happens pretty
2221      * rapidly... and for each one that exits we may start a new one, until
2222      * there are at least min_spare_threads idle threads, counting across
2223      * all children.  But we may be permitted to start more children than
2224      * that, so we'll just keep track of how many we're
2225      * supposed to start up without the 1 second penalty between each fork.
2226      */
2227     remaining_children_to_start = ap_daemons_to_start;
2228     if (remaining_children_to_start > ap_daemons_limit) {
2229         remaining_children_to_start = ap_daemons_limit;
2230     }
2231     if (!retained->is_graceful) {
2232         startup_children(remaining_children_to_start);
2233         remaining_children_to_start = 0;
2234     }
2235     else {
2236         /* give the system some time to recover before kicking into
2237          * exponential mode */
2238         retained->hold_off_on_exponential_spawning = 10;
2239     }
2240
2241     ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf,
2242                  "%s configured -- resuming normal operations",
2243                  ap_get_server_description());
2244     ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf,
2245                  "Server built: %s", ap_get_server_built());
2246     ap_log_command_line(plog, s);
2247
2248     restart_pending = shutdown_pending = 0;
2249     mpm_state = AP_MPMQ_RUNNING;
2250
2251     server_main_loop(remaining_children_to_start);
2252     mpm_state = AP_MPMQ_STOPPING;
2253
2254     if (shutdown_pending && !retained->is_graceful) {
2255         /* Time to shut down:
2256          * Kill child processes, tell them to call child_exit, etc...
2257          */
2258         ap_event_pod_killpg(pod, ap_daemons_limit, FALSE);
2259         ap_reclaim_child_processes(1);  /* Start with SIGTERM */
2260
2261         if (!child_fatal) {
2262             /* cleanup pid file on normal shutdown */
2263             const char *pidfile = NULL;
2264             pidfile = ap_server_root_relative(pconf, ap_pid_fname);
2265             if (pidfile != NULL && unlink(pidfile) == 0)
2266                 ap_log_error(APLOG_MARK, APLOG_INFO, 0,
2267                              ap_server_conf,
2268                              "removed PID file %s (pid=%ld)",
2269                              pidfile, (long) getpid());
2270
2271             ap_log_error(APLOG_MARK, APLOG_NOTICE, 0,
2272                          ap_server_conf, "caught SIGTERM, shutting down");
2273         }
2274         return DONE;
2275     } else if (shutdown_pending) {
2276         /* Time to gracefully shut down:
2277          * Kill child processes, tell them to call child_exit, etc...
2278          */
2279         int active_children;
2280         int index;
2281         apr_time_t cutoff = 0;
2282
2283         /* Close our listeners, and then ask our children to do same */
2284         ap_close_listeners();
2285         ap_event_pod_killpg(pod, ap_daemons_limit, TRUE);
2286         ap_relieve_child_processes();
2287
2288         if (!child_fatal) {
2289             /* cleanup pid file on normal shutdown */
2290             const char *pidfile = NULL;
2291             pidfile = ap_server_root_relative (pconf, ap_pid_fname);
2292             if ( pidfile != NULL && unlink(pidfile) == 0)
2293                 ap_log_error(APLOG_MARK, APLOG_INFO, 0,
2294                              ap_server_conf,
2295                              "removed PID file %s (pid=%ld)",
2296                              pidfile, (long)getpid());
2297
2298             ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf,
2299                          "caught " AP_SIG_GRACEFUL_STOP_STRING
2300                          ", shutting down gracefully");
2301         }
2302
2303         if (ap_graceful_shutdown_timeout) {
2304             cutoff = apr_time_now() +
2305                      apr_time_from_sec(ap_graceful_shutdown_timeout);
2306         }
2307
2308         /* Don't really exit until each child has finished */
2309         shutdown_pending = 0;
2310         do {
2311             /* Pause for a second */
2312             apr_sleep(apr_time_from_sec(1));
2313
2314             /* Relieve any children which have now exited */
2315             ap_relieve_child_processes();
2316
2317             active_children = 0;
2318             for (index = 0; index < ap_daemons_limit; ++index) {
2319                 if (ap_mpm_safe_kill(MPM_CHILD_PID(index), 0) == APR_SUCCESS) {
2320                     active_children = 1;
2321                     /* Having just one child is enough to stay around */
2322                     break;
2323                 }
2324             }
2325         } while (!shutdown_pending && active_children &&
2326                  (!ap_graceful_shutdown_timeout || apr_time_now() < cutoff));
2327
2328         /* We might be here because we received SIGTERM, either
2329          * way, try and make sure that all of our processes are
2330          * really dead.
2331          */
2332         ap_event_pod_killpg(pod, ap_daemons_limit, FALSE);
2333         ap_reclaim_child_processes(1);
2334
2335         return DONE;
2336     }
2337
2338     /* we've been told to restart */
2339     apr_signal(SIGHUP, SIG_IGN);
2340
2341     if (one_process) {
2342         /* not worth thinking about */
2343         return DONE;
2344     }
2345
2346     /* advance to the next generation */
2347     /* XXX: we really need to make sure this new generation number isn't in
2348      * use by any of the children.
2349      */
2350     ++retained->my_generation;
2351     ap_scoreboard_image->global->running_generation = retained->my_generation;
2352
2353     if (retained->is_graceful) {
2354         ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf,
2355                      AP_SIG_GRACEFUL_STRING
2356                      " received.  Doing graceful restart");
2357         /* wake up the children...time to die.  But we'll have more soon */
2358         ap_event_pod_killpg(pod, ap_daemons_limit, TRUE);
2359
2360
2361         /* This is mostly for debugging... so that we know what is still
2362          * gracefully dealing with existing request.
2363          */
2364
2365     }
2366     else {
2367         /* Kill 'em all.  Since the child acts the same on the parents SIGTERM
2368          * and a SIGHUP, we may as well use the same signal, because some user
2369          * pthreads are stealing signals from us left and right.
2370          */
2371         ap_event_pod_killpg(pod, ap_daemons_limit, FALSE);
2372
2373         ap_reclaim_child_processes(1);  /* Start with SIGTERM */
2374         ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf,
2375                      "SIGHUP received.  Attempting to restart");
2376     }
2377
2378     return OK;
2379 }
2380
2381 /* This really should be a post_config hook, but the error log is already
2382  * redirected by that point, so we need to do this in the open_logs phase.
2383  */
2384 static int event_open_logs(apr_pool_t * p, apr_pool_t * plog,
2385                            apr_pool_t * ptemp, server_rec * s)
2386 {
2387     int startup = 0;
2388     int level_flags = 0;
2389     apr_status_t rv;
2390
2391     pconf = p;
2392
2393     /* the reverse of pre_config, we want this only the first time around */
2394     if (retained->module_loads == 1) {
2395         startup = 1;
2396         level_flags |= APLOG_STARTUP;
2397     }
2398
2399     if ((num_listensocks = ap_setup_listeners(ap_server_conf)) < 1) {
2400         ap_log_error(APLOG_MARK, APLOG_ALERT | level_flags, 0,
2401                      (startup ? NULL : s),
2402                      "no listening sockets available, shutting down");
2403         return DONE;
2404     }
2405
2406     if (!one_process) {
2407         if ((rv = ap_event_pod_open(pconf, &pod))) {
2408             ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
2409                          (startup ? NULL : s),
2410                          "could not open pipe-of-death");
2411             return DONE;
2412         }
2413     }
2414     return OK;
2415 }
2416
2417 static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
2418                             apr_pool_t * ptemp)
2419 {
2420     int no_detach, debug, foreground;
2421     apr_status_t rv;
2422     const char *userdata_key = "mpm_event_module";
2423
2424     mpm_state = AP_MPMQ_STARTING;
2425
2426     debug = ap_exists_config_define("DEBUG");
2427
2428     if (debug) {
2429         foreground = one_process = 1;
2430         no_detach = 0;
2431     }
2432     else {
2433         one_process = ap_exists_config_define("ONE_PROCESS");
2434         no_detach = ap_exists_config_define("NO_DETACH");
2435         foreground = ap_exists_config_define("FOREGROUND");
2436     }
2437
2438     /* sigh, want this only the second time around */
2439     retained = ap_retained_data_get(userdata_key);
2440     if (!retained) {
2441         retained = ap_retained_data_create(userdata_key, sizeof(*retained));
2442         retained->max_daemons_limit = -1;
2443         retained->idle_spawn_rate = 1;
2444     }
2445     ++retained->module_loads;
2446     if (retained->module_loads == 2) {
2447         rv = apr_pollset_create(&event_pollset, 1, plog,
2448                                 APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY);
2449         if (rv != APR_SUCCESS) {
2450             ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL,
2451                          "Couldn't create a Thread Safe Pollset. "
2452                          "Is it supported on your platform?"
2453                          "Also check system or user limits!");
2454             return HTTP_INTERNAL_SERVER_ERROR;
2455         }
2456         apr_pollset_destroy(event_pollset);
2457
2458         if (!one_process && !foreground) {
2459             rv = apr_proc_detach(no_detach ? APR_PROC_DETACH_FOREGROUND
2460                                  : APR_PROC_DETACH_DAEMONIZE);
2461             if (rv != APR_SUCCESS) {
2462                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL,
2463                              "apr_proc_detach failed");
2464                 return HTTP_INTERNAL_SERVER_ERROR;
2465             }
2466         }
2467     }
2468
2469     parent_pid = ap_my_pid = getpid();
2470
2471     ap_listen_pre_config();
2472     ap_daemons_to_start = DEFAULT_START_DAEMON;
2473     min_spare_threads = DEFAULT_MIN_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD;
2474     max_spare_threads = DEFAULT_MAX_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD;
2475     server_limit = DEFAULT_SERVER_LIMIT;
2476     thread_limit = DEFAULT_THREAD_LIMIT;
2477     ap_daemons_limit = server_limit;
2478     threads_per_child = DEFAULT_THREADS_PER_CHILD;
2479     max_clients = ap_daemons_limit * threads_per_child;
2480     ap_pid_fname = DEFAULT_PIDLOG;
2481     ap_max_requests_per_child = DEFAULT_MAX_REQUESTS_PER_CHILD;
2482     ap_extended_status = 0;
2483     ap_max_mem_free = APR_ALLOCATOR_MAX_FREE_UNLIMITED;
2484
2485     apr_cpystrn(ap_coredump_dir, ap_server_root, sizeof(ap_coredump_dir));
2486
2487     return OK;
2488 }
2489
2490 static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
2491                               apr_pool_t *ptemp, server_rec *s)
2492 {
2493     int startup = 0;
2494
2495     /* the reverse of pre_config, we want this only the first time around */
2496     if (retained->module_loads == 1) {
2497         startup = 1;
2498     }
2499
2500     if (server_limit > MAX_SERVER_LIMIT) {
2501         if (startup) {
2502             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2503                          "WARNING: ServerLimit of %d exceeds compile-time "
2504                          "limit of", server_limit);
2505             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2506                          " %d servers, decreasing to %d.",
2507                          MAX_SERVER_LIMIT, MAX_SERVER_LIMIT);
2508         } else {
2509             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2510                          "ServerLimit of %d exceeds compile-time limit "
2511                          "of %d, decreasing to match",
2512                          server_limit, MAX_SERVER_LIMIT);
2513         }
2514         server_limit = MAX_SERVER_LIMIT;
2515     }
2516     else if (server_limit < 1) {
2517         if (startup) {
2518             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2519                          "WARNING: ServerLimit of %d not allowed, "
2520                          "increasing to 1.", server_limit);
2521         } else {
2522             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2523                          "ServerLimit of %d not allowed, increasing to 1",
2524                          server_limit);
2525         }
2526         server_limit = 1;
2527     }
2528
2529     /* you cannot change ServerLimit across a restart; ignore
2530      * any such attempts
2531      */
2532     if (!retained->first_server_limit) {
2533         retained->first_server_limit = server_limit;
2534     }
2535     else if (server_limit != retained->first_server_limit) {
2536         /* don't need a startup console version here */
2537         ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2538                      "changing ServerLimit to %d from original value of %d "
2539                      "not allowed during restart",
2540                      server_limit, retained->first_server_limit);
2541         server_limit = retained->first_server_limit;
2542     }
2543
2544     if (thread_limit > MAX_THREAD_LIMIT) {
2545         if (startup) {
2546             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2547                          "WARNING: ThreadLimit of %d exceeds compile-time "
2548                          "limit of", thread_limit);
2549             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2550                          " %d threads, decreasing to %d.",
2551                          MAX_THREAD_LIMIT, MAX_THREAD_LIMIT);
2552         } else {
2553             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2554                          "ThreadLimit of %d exceeds compile-time limit "
2555                          "of %d, decreasing to match",
2556                          thread_limit, MAX_THREAD_LIMIT);
2557         }
2558         thread_limit = MAX_THREAD_LIMIT;
2559     }
2560     else if (thread_limit < 1) {
2561         if (startup) {
2562             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2563                          "WARNING: ThreadLimit of %d not allowed, "
2564                          "increasing to 1.", thread_limit);
2565         } else {
2566             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2567                          "ThreadLimit of %d not allowed, increasing to 1",
2568                          thread_limit);
2569         }
2570         thread_limit = 1;
2571     }
2572
2573     /* you cannot change ThreadLimit across a restart; ignore
2574      * any such attempts
2575      */
2576     if (!retained->first_thread_limit) {
2577         retained->first_thread_limit = thread_limit;
2578     }
2579     else if (thread_limit != retained->first_thread_limit) {
2580         /* don't need a startup console version here */
2581         ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2582                      "changing ThreadLimit to %d from original value of %d "
2583                      "not allowed during restart",
2584                      thread_limit, retained->first_thread_limit);
2585         thread_limit = retained->first_thread_limit;
2586     }
2587
2588     if (threads_per_child > thread_limit) {
2589         if (startup) {
2590             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2591                          "WARNING: ThreadsPerChild of %d exceeds ThreadLimit "
2592                          "of", threads_per_child);
2593             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2594                          " %d threads, decreasing to %d.",
2595                          thread_limit, thread_limit);
2596             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2597                          " To increase, please see the ThreadLimit "
2598                          "directive.");
2599         } else {
2600             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2601                          "ThreadsPerChild of %d exceeds ThreadLimit "
2602                          "of %d, decreasing to match",
2603                          threads_per_child, thread_limit);
2604         }
2605         threads_per_child = thread_limit;
2606     }
2607     else if (threads_per_child < 1) {
2608         if (startup) {
2609             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2610                          "WARNING: ThreadsPerChild of %d not allowed, "
2611                          "increasing to 1.", threads_per_child);
2612         } else {
2613             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2614                          "ThreadsPerChild of %d not allowed, increasing to 1",
2615                          threads_per_child);
2616         }
2617         threads_per_child = 1;
2618     }
2619
2620     if (max_clients < threads_per_child) {
2621         if (startup) {
2622             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2623                          "WARNING: MaxClients of %d is less than "
2624                          "ThreadsPerChild of", max_clients);
2625             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2626                          " %d, increasing to %d.  MaxClients must be at "
2627                          "least as large",
2628                          threads_per_child, threads_per_child);
2629             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2630                          " as the number of threads in a single server.");
2631         } else {
2632             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2633                          "MaxClients of %d is less than ThreadsPerChild "
2634                          "of %d, increasing to match",
2635                          max_clients, threads_per_child);
2636         }
2637         max_clients = threads_per_child;
2638     }
2639
2640     ap_daemons_limit = max_clients / threads_per_child;
2641
2642     if (max_clients % threads_per_child) {
2643         int tmp_max_clients = ap_daemons_limit * threads_per_child;
2644
2645         if (startup) {
2646             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2647                          "WARNING: MaxClients of %d is not an integer "
2648                          "multiple of", max_clients);
2649             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2650                          " ThreadsPerChild of %d, decreasing to nearest "
2651                          "multiple %d,", threads_per_child,
2652                          tmp_max_clients);
2653             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2654                          " for a maximum of %d servers.",
2655                          ap_daemons_limit);
2656         } else {
2657             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2658                          "MaxClients of %d is not an integer multiple of "
2659                          "ThreadsPerChild of %d, decreasing to nearest "
2660                          "multiple %d", max_clients, threads_per_child,
2661                          tmp_max_clients);
2662         }
2663         max_clients = tmp_max_clients;
2664     }
2665
2666     if (ap_daemons_limit > server_limit) {
2667         if (startup) {
2668             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2669                          "WARNING: MaxClients of %d would require %d "
2670                          "servers and ", max_clients, ap_daemons_limit);
2671             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2672                          " would exceed ServerLimit of %d, decreasing to %d.",
2673                          server_limit, server_limit * threads_per_child);
2674             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2675                          " To increase, please see the ServerLimit "
2676                          "directive.");
2677         } else {
2678             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2679                          "MaxClients of %d would require %d servers and "
2680                          "exceed ServerLimit of %d, decreasing to %d",
2681                          max_clients, ap_daemons_limit, server_limit,
2682                          server_limit * threads_per_child);
2683         }
2684         ap_daemons_limit = server_limit;
2685     }
2686
2687     /* ap_daemons_to_start > ap_daemons_limit checked in ap_mpm_run() */
2688     if (ap_daemons_to_start < 0) {
2689         if (startup) {
2690             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2691                          "WARNING: StartServers of %d not allowed, "
2692                          "increasing to 1.", ap_daemons_to_start);
2693         } else {
2694             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2695                          "StartServers of %d not allowed, increasing to 1",
2696                          ap_daemons_to_start);
2697         }
2698         ap_daemons_to_start = 1;
2699     }
2700
2701     if (min_spare_threads < 1) {
2702         if (startup) {
2703             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2704                          "WARNING: MinSpareThreads of %d not allowed, "
2705                          "increasing to 1", min_spare_threads);
2706             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2707                          " to avoid almost certain server failure.");
2708             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2709                          " Please read the documentation.");
2710         } else {
2711             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2712                          "MinSpareThreads of %d not allowed, increasing to 1",
2713                          min_spare_threads);
2714         }
2715         min_spare_threads = 1;
2716     }
2717
2718     /* max_spare_threads < min_spare_threads + threads_per_child
2719      * checked in ap_mpm_run()
2720      */
2721
2722     return OK;
2723 }
2724
2725 static void event_hooks(apr_pool_t * p)
2726 {
2727     /* Our open_logs hook function must run before the core's, or stderr
2728      * will be redirected to a file, and the messages won't print to the
2729      * console.
2730      */
2731     static const char *const aszSucc[] = { "core.c", NULL };
2732     one_process = 0;
2733
2734     ap_hook_open_logs(event_open_logs, NULL, aszSucc, APR_HOOK_REALLY_FIRST);
2735     /* we need to set the MPM state before other pre-config hooks use MPM query
2736      * to retrieve it, so register as REALLY_FIRST
2737      */
2738     ap_hook_pre_config(event_pre_config, NULL, NULL, APR_HOOK_REALLY_FIRST);
2739     ap_hook_check_config(event_check_config, NULL, NULL, APR_HOOK_MIDDLE);
2740     ap_hook_mpm(event_run, NULL, NULL, APR_HOOK_MIDDLE);
2741     ap_hook_mpm_query(event_query, NULL, NULL, APR_HOOK_MIDDLE);
2742     ap_hook_mpm_note_child_killed(event_note_child_killed, NULL, NULL, APR_HOOK_MIDDLE);
2743     ap_hook_mpm_register_timed_callback(event_register_timed_callback, NULL, NULL,
2744                                         APR_HOOK_MIDDLE);
2745     ap_hook_mpm_get_name(event_get_name, NULL, NULL, APR_HOOK_MIDDLE);
2746 }
2747
2748 static const char *set_daemons_to_start(cmd_parms *cmd, void *dummy,
2749                                         const char *arg)
2750 {
2751     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
2752     if (err != NULL) {
2753         return err;
2754     }
2755
2756     ap_daemons_to_start = atoi(arg);
2757     return NULL;
2758 }
2759
2760 static const char *set_min_spare_threads(cmd_parms * cmd, void *dummy,
2761                                          const char *arg)
2762 {
2763     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
2764     if (err != NULL) {
2765         return err;
2766     }
2767
2768     min_spare_threads = atoi(arg);
2769     return NULL;
2770 }
2771
2772 static const char *set_max_spare_threads(cmd_parms * cmd, void *dummy,
2773                                          const char *arg)
2774 {
2775     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
2776     if (err != NULL) {
2777         return err;
2778     }
2779
2780     max_spare_threads = atoi(arg);
2781     return NULL;
2782 }
2783
2784 static const char *set_max_clients(cmd_parms * cmd, void *dummy,
2785                                    const char *arg)
2786 {
2787     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
2788     if (err != NULL) {
2789         return err;
2790     }
2791
2792     max_clients = atoi(arg);
2793     return NULL;
2794 }
2795
2796 static const char *set_threads_per_child(cmd_parms * cmd, void *dummy,
2797                                          const char *arg)
2798 {
2799     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
2800     if (err != NULL) {
2801         return err;
2802     }
2803
2804     threads_per_child = atoi(arg);
2805     return NULL;
2806 }
2807 static const char *set_server_limit (cmd_parms *cmd, void *dummy, const char *arg)
2808 {
2809     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
2810     if (err != NULL) {
2811         return err;
2812     }
2813
2814     server_limit = atoi(arg);
2815     return NULL;
2816 }
2817
2818 static const char *set_thread_limit(cmd_parms * cmd, void *dummy,
2819                                     const char *arg)
2820 {
2821     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
2822     if (err != NULL) {
2823         return err;
2824     }
2825
2826     thread_limit = atoi(arg);
2827     return NULL;
2828 }
2829
2830 static const command_rec event_cmds[] = {
2831     LISTEN_COMMANDS,
2832     AP_INIT_TAKE1("StartServers", set_daemons_to_start, NULL, RSRC_CONF,
2833                   "Number of child processes launched at server startup"),
2834     AP_INIT_TAKE1("ServerLimit", set_server_limit, NULL, RSRC_CONF,
2835                   "Maximum number of child processes for this run of Apache"),
2836     AP_INIT_TAKE1("MinSpareThreads", set_min_spare_threads, NULL, RSRC_CONF,
2837                   "Minimum number of idle threads, to handle request spikes"),
2838     AP_INIT_TAKE1("MaxSpareThreads", set_max_spare_threads, NULL, RSRC_CONF,
2839                   "Maximum number of idle threads"),
2840     AP_INIT_TAKE1("MaxClients", set_max_clients, NULL, RSRC_CONF,
2841                   "Maximum number of threads alive at the same time"),
2842     AP_INIT_TAKE1("ThreadsPerChild", set_threads_per_child, NULL, RSRC_CONF,
2843                   "Number of threads each child creates"),
2844     AP_INIT_TAKE1("ThreadLimit", set_thread_limit, NULL, RSRC_CONF,
2845                   "Maximum number of worker threads per child process for this "
2846                   "run of Apache - Upper limit for ThreadsPerChild"),
2847     AP_GRACEFUL_SHUTDOWN_TIMEOUT_COMMAND,
2848     {NULL}
2849 };
2850
2851 AP_DECLARE_MODULE(mpm_event) = {
2852     MPM20_MODULE_STUFF,
2853     NULL,                       /* hook to run before apache parses args */
2854     NULL,                       /* create per-directory config structure */
2855     NULL,                       /* merge per-directory config structures */
2856     NULL,                       /* create per-server config structure */
2857     NULL,                       /* merge per-server config structures */
2858     event_cmds,                 /* command apr_table_t */
2859     event_hooks                 /* register_hooks */
2860 };