granicus.if.org Git - apache/blob - server/mpm/event/event.c

   1 /* Licensed to the Apache Software Foundation (ASF) under one or more
   2  * contributor license agreements.  See the NOTICE file distributed with
   3  * this work for additional information regarding copyright ownership.
   4  * The ASF licenses this file to You under the Apache License, Version 2.0
   5  * (the "License"); you may not use this file except in compliance with
   6  * the License.  You may obtain a copy of the License at
   7  *
   8  *     http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /**
  18  * This MPM tries to fix the 'keep alive problem' in HTTP.
  19  *
  20  * After a client completes the first request, the client can keep the
  21  * connection open to send more requests with the same socket.  This can save
  22  * signifigant overhead in creating TCP connections.  However, the major
  23  * disadvantage is that Apache traditionally keeps an entire child
  24  * process/thread waiting for data from the client.  To solve this problem,
  25  * this MPM has a dedicated thread for handling both the Listenting sockets,
  26  * and all sockets that are in a Keep Alive status.
  27  *
  28  * The MPM assumes the underlying apr_pollset implmentation is somewhat
  29  * threadsafe.  This currently is only compatible with KQueue and EPoll.  This
  30  * enables the MPM to avoid extra high level locking or having to wake up the
  31  * listener thread when a keep-alive socket needs to be sent to it.
  32  *
  33  * This MPM not preform well on older platforms that do not have very good
  34  * threading, like Linux with a 2.4 kernel, but this does not matter, since we
  35  * require EPoll or KQueue.
  36  *
  37  * For FreeBSD, use 5.3.  It is possible to run this MPM on FreeBSD 5.2.1, if
  38  * you use libkse (see `man libmap.conf`).
  39  *
  40  * For NetBSD, use at least 2.0.
  41  *
  42  * For Linux, you should use a 2.6 kernel, and make sure your glibc has epoll
  43  * support compiled in.
  44  *
  45  */
  46
  47 #include "apr.h"
  48 #include "apr_portable.h"
  49 #include "apr_strings.h"
  50 #include "apr_file_io.h"
  51 #include "apr_thread_proc.h"
  52 #include "apr_signal.h"
  53 #include "apr_thread_mutex.h"
  54 #include "apr_proc_mutex.h"
  55 #include "apr_poll.h"
  56 #include "apr_ring.h"
  57 #include "apr_queue.h"
  58 #define APR_WANT_STRFUNC
  59 #include "apr_want.h"
  60
  61 #if APR_HAVE_UNISTD_H
  62 #include <unistd.h>
  63 #endif
  64 #if APR_HAVE_SYS_SOCKET_H
  65 #include <sys/socket.h>
  66 #endif
  67 #if APR_HAVE_SYS_WAIT_H
  68 #include <sys/wait.h>
  69 #endif
  70 #ifdef HAVE_SYS_PROCESSOR_H
  71 #include <sys/processor.h>      /* for bindprocessor() */
  72 #endif
  73
  74 #if !APR_HAS_THREADS
  75 #error The Event MPM requires APR threads, but they are unavailable.
  76 #endif
  77
  78 #include "ap_config.h"
  79 #include "httpd.h"
  80 #include "http_main.h"
  81 #include "http_log.h"
  82 #include "http_config.h"        /* for read_config */
  83 #include "http_core.h"          /* for get_remote_host */
  84 #include "http_connection.h"
  85 #include "ap_mpm.h"
  86 #include "pod.h"
  87 #include "mpm_common.h"
  88 #include "ap_listen.h"
  89 #include "scoreboard.h"
  90 #include "fdqueue.h"
  91 #include "mpm_default.h"
  92 #include "http_vhost.h"
  93 #include "unixd.h"
  94
  95 #include <signal.h>
  96 #include <limits.h>             /* for INT_MAX */
  97
  98 #include "mod_serf.h"
  99
 100 #if AP_HAS_SERF
 101 #include "serf.h"
 102 #endif
 103
 104 /* Limit on the total --- clients will be locked out if more servers than
 105  * this are needed.  It is intended solely to keep the server from crashing
 106  * when things get out of hand.
 107  *
 108  * We keep a hard maximum number of servers, for two reasons --- first off,
 109  * in case something goes seriously wrong, we want to stop the fork bomb
 110  * short of actually crashing the machine we're running on by filling some
 111  * kernel table.  Secondly, it keeps the size of the scoreboard file small
 112  * enough that we can read the whole thing without worrying too much about
 113  * the overhead.
 114  */
 115 #ifndef DEFAULT_SERVER_LIMIT
 116 #define DEFAULT_SERVER_LIMIT 16
 117 #endif
 118
 119 /* Admin can't tune ServerLimit beyond MAX_SERVER_LIMIT.  We want
 120  * some sort of compile-time limit to help catch typos.
 121  */
 122 #ifndef MAX_SERVER_LIMIT
 123 #define MAX_SERVER_LIMIT 20000
 124 #endif
 125
 126 /* Limit on the threads per process.  Clients will be locked out if more than
 127  * this are needed.
 128  *
 129  * We keep this for one reason it keeps the size of the scoreboard file small
 130  * enough that we can read the whole thing without worrying too much about
 131  * the overhead.
 132  */
 133 #ifndef DEFAULT_THREAD_LIMIT
 134 #define DEFAULT_THREAD_LIMIT 64
 135 #endif
 136
 137 /* Admin can't tune ThreadLimit beyond MAX_THREAD_LIMIT.  We want
 138  * some sort of compile-time limit to help catch typos.
 139  */
 140 #ifndef MAX_THREAD_LIMIT
 141 #define MAX_THREAD_LIMIT 100000
 142 #endif
 143
 144 #define MPM_CHILD_PID(i) (ap_scoreboard_image->parent[i].pid)
 145
 146 /*
 147  * Actual definitions of config globals
 148  */
 149
 150 static int threads_per_child = 0;   /* Worker threads per child */
 151 static int ap_daemons_to_start = 0;
 152 static int min_spare_threads = 0;
 153 static int max_spare_threads = 0;
 154 static int ap_daemons_limit = 0;
 155 static int max_clients = 0;
 156 static int server_limit = 0;
 157 static int thread_limit = 0;
 158 static int dying = 0;
 159 static int workers_may_exit = 0;
 160 static int start_thread_may_exit = 0;
 161 static int listener_may_exit = 0;
 162 static int requests_this_child;
 163 static int num_listensocks = 0;
 164 static int resource_shortage = 0;
 165 static fd_queue_t *worker_queue;
 166 static fd_queue_info_t *worker_queue_info;
 167 static int mpm_state = AP_MPMQ_STARTING;
 168 static int sick_child_detected;
 169 static ap_generation_t volatile my_generation = 0;
 170
 171 static apr_thread_mutex_t *timeout_mutex;
 172 APR_RING_HEAD(timeout_head_t, conn_state_t);
 173 static struct timeout_head_t timeout_head, keepalive_timeout_head;
 174
 175 static apr_pollset_t *event_pollset;
 176
 177 #if AP_HAS_SERF
 178 typedef struct {
 179     apr_pollset_t *pollset;
 180     apr_pool_t *pool;
 181 } s_baton_t;
 182
 183 static serf_context_t *g_serf;
 184 #endif
 185
 186 /* The structure used to pass unique initialization info to each thread */
 187 typedef struct
 188 {
 189     int pid;
 190     int tid;
 191     int sd;
 192 } proc_info;
 193
 194 /* Structure used to pass information to the thread responsible for
 195  * creating the rest of the threads.
 196  */
 197 typedef struct
 198 {
 199     apr_thread_t **threads;
 200     apr_thread_t *listener;
 201     int child_num_arg;
 202     apr_threadattr_t *threadattr;
 203 } thread_starter;
 204
 205 typedef enum
 206 {
 207     PT_CSD,
 208     PT_ACCEPT
 209 #if AP_HAS_SERF
 210     , PT_SERF
 211 #endif
 212 } poll_type_e;
 213
 214 typedef struct
 215 {
 216     poll_type_e type;
 217     int bypass_push;
 218     void *baton;
 219 } listener_poll_type;
 220
 221 /* data retained by event across load/unload of the module
 222  * allocated on first call to pre-config hook; located on
 223  * subsequent calls to pre-config hook
 224  */
 225 typedef struct event_retained_data {
 226     int first_server_limit;
 227     int first_thread_limit;
 228     int module_loads;
 229 } event_retained_data;
 230 static event_retained_data *retained;
 231
 232 #define ID_FROM_CHILD_THREAD(c, t)    ((c * thread_limit) + t)
 233
 234 /*
 235  * The max child slot ever assigned, preserved across restarts.  Necessary
 236  * to deal with MaxClients changes across AP_SIG_GRACEFUL restarts.  We
 237  * use this value to optimize routines that have to scan the entire
 238  * scoreboard.
 239  */
 240 static int max_daemons_limit = -1;
 241
 242 static ap_event_pod_t *pod;
 243
 244 /* The event MPM respects a couple of runtime flags that can aid
 245  * in debugging. Setting the -DNO_DETACH flag will prevent the root process
 246  * from detaching from its controlling terminal. Additionally, setting
 247  * the -DONE_PROCESS flag (which implies -DNO_DETACH) will get you the
 248  * child_main loop running in the process which originally started up.
 249  * This gives you a pretty nice debugging environment.  (You'll get a SIGHUP
 250  * early in standalone_main; just continue through.  This is the server
 251  * trying to kill off any child processes which it might have lying
 252  * around --- Apache doesn't keep track of their pids, it just sends
 253  * SIGHUP to the process group, ignoring it in the root process.
 254  * Continue through and you'll be fine.).
 255  */
 256
 257 static int one_process = 0;
 258
 259 #ifdef DEBUG_SIGSTOP
 260 int raise_sigstop_flags;
 261 #endif
 262
 263 static apr_pool_t *pconf;       /* Pool for config stuff */
 264 static apr_pool_t *pchild;      /* Pool for httpd child stuff */
 265
 266 static pid_t ap_my_pid;         /* Linux getpid() doesn't work except in main
 267                                    thread. Use this instead */
 268 static pid_t parent_pid;
 269 static apr_os_thread_t *listener_os_thread;
 270
 271 /* The LISTENER_SIGNAL signal will be sent from the main thread to the
 272  * listener thread to wake it up for graceful termination (what a child
 273  * process from an old generation does when the admin does "apachectl
 274  * graceful").  This signal will be blocked in all threads of a child
 275  * process except for the listener thread.
 276  */
 277 #define LISTENER_SIGNAL     SIGHUP
 278
 279 /* An array of socket descriptors in use by each thread used to
 280  * perform a non-graceful (forced) shutdown of the server.
 281  */
 282 static apr_socket_t **worker_sockets;
 283
 284 static void close_worker_sockets(void)
 285 {
 286     int i;
 287     for (i = 0; i < threads_per_child; i++) {
 288         if (worker_sockets[i]) {
 289             apr_socket_close(worker_sockets[i]);
 290             worker_sockets[i] = NULL;
 291         }
 292     }
 293 }
 294
 295 static void wakeup_listener(void)
 296 {
 297     listener_may_exit = 1;
 298     if (!listener_os_thread) {
 299         /* XXX there is an obscure path that this doesn't handle perfectly:
 300          *     right after listener thread is created but before
 301          *     listener_os_thread is set, the first worker thread hits an
 302          *     error and starts graceful termination
 303          */
 304         return;
 305     }
 306
 307     /* unblock the listener if it's waiting for a worker */
 308     ap_queue_info_term(worker_queue_info);
 309
 310     /*
 311      * we should just be able to "kill(ap_my_pid, LISTENER_SIGNAL)" on all
 312      * platforms and wake up the listener thread since it is the only thread
 313      * with SIGHUP unblocked, but that doesn't work on Linux
 314      */
 315 #ifdef HAVE_PTHREAD_KILL
 316     pthread_kill(*listener_os_thread, LISTENER_SIGNAL);
 317 #else
 318     kill(ap_my_pid, LISTENER_SIGNAL);
 319 #endif
 320 }
 321
 322 #define ST_INIT              0
 323 #define ST_GRACEFUL          1
 324 #define ST_UNGRACEFUL        2
 325
 326 static int terminate_mode = ST_INIT;
 327
 328 static void signal_threads(int mode)
 329 {
 330     if (terminate_mode == mode) {
 331         return;
 332     }
 333     terminate_mode = mode;
 334     mpm_state = AP_MPMQ_STOPPING;
 335
 336     /* in case we weren't called from the listener thread, wake up the
 337      * listener thread
 338      */
 339     wakeup_listener();
 340
 341     /* for ungraceful termination, let the workers exit now;
 342      * for graceful termination, the listener thread will notify the
 343      * workers to exit once it has stopped accepting new connections
 344      */
 345     if (mode == ST_UNGRACEFUL) {
 346         workers_may_exit = 1;
 347         ap_queue_interrupt_all(worker_queue);
 348         close_worker_sockets(); /* forcefully kill all current connections */
 349     }
 350 }
 351
 352 static apr_status_t event_query(int query_code, int *result)
 353 {
 354     switch (query_code) {
 355     case AP_MPMQ_MAX_DAEMON_USED:
 356         *result = max_daemons_limit;
 357         return APR_SUCCESS;
 358     case AP_MPMQ_IS_THREADED:
 359         *result = AP_MPMQ_STATIC;
 360         return APR_SUCCESS;
 361     case AP_MPMQ_IS_FORKED:
 362         *result = AP_MPMQ_DYNAMIC;
 363         return APR_SUCCESS;
 364     case AP_MPMQ_IS_ASYNC:
 365         *result = 1;
 366         return APR_SUCCESS;
 367     case AP_MPMQ_HAS_SERF:
 368         *result = 1;
 369         return APR_SUCCESS;
 370     case AP_MPMQ_HARD_LIMIT_DAEMONS:
 371         *result = server_limit;
 372         return APR_SUCCESS;
 373     case AP_MPMQ_HARD_LIMIT_THREADS:
 374         *result = thread_limit;
 375         return APR_SUCCESS;
 376     case AP_MPMQ_MAX_THREADS:
 377         *result = threads_per_child;
 378         return APR_SUCCESS;
 379     case AP_MPMQ_MIN_SPARE_DAEMONS:
 380         *result = 0;
 381         return APR_SUCCESS;
 382     case AP_MPMQ_MIN_SPARE_THREADS:
 383         *result = min_spare_threads;
 384         return APR_SUCCESS;
 385     case AP_MPMQ_MAX_SPARE_DAEMONS:
 386         *result = 0;
 387         return APR_SUCCESS;
 388     case AP_MPMQ_MAX_SPARE_THREADS:
 389         *result = max_spare_threads;
 390         return APR_SUCCESS;
 391     case AP_MPMQ_MAX_REQUESTS_DAEMON:
 392         *result = ap_max_requests_per_child;
 393         return APR_SUCCESS;
 394     case AP_MPMQ_MAX_DAEMONS:
 395         *result = ap_daemons_limit;
 396         return APR_SUCCESS;
 397     case AP_MPMQ_MPM_STATE:
 398         *result = mpm_state;
 399         return APR_SUCCESS;
 400     case AP_MPMQ_GENERATION:
 401         *result = my_generation;
 402         return APR_SUCCESS;
 403     }
 404     return APR_ENOTIMPL;
 405 }
 406
 407 static apr_status_t event_note_child_killed(int childnum)
 408 {
 409     ap_scoreboard_image->parent[childnum].pid = 0;
 410     return APR_SUCCESS;
 411 }
 412
 413 static const char *event_get_name(void)
 414 {
 415     return "event";
 416 }
 417
 418 /* a clean exit from a child with proper cleanup */
 419 static void clean_child_exit(int code) __attribute__ ((noreturn));
 420 static void clean_child_exit(int code)
 421 {
 422     mpm_state = AP_MPMQ_STOPPING;
 423     if (pchild) {
 424         apr_pool_destroy(pchild);
 425     }
 426     exit(code);
 427 }
 428
 429 static void just_die(int sig)
 430 {
 431     clean_child_exit(0);
 432 }
 433
 434 /*****************************************************************
 435  * Connection structures and accounting...
 436  */
 437
 438 /* volatile just in case */
 439 static int volatile shutdown_pending;
 440 static int volatile restart_pending;
 441 static int volatile is_graceful;
 442 static volatile int child_fatal;
 443
 444 /*
 445  * ap_start_shutdown() and ap_start_restart(), below, are a first stab at
 446  * functions to initiate shutdown or restart without relying on signals.
 447  * Previously this was initiated in sig_term() and restart() signal handlers,
 448  * but we want to be able to start a shutdown/restart from other sources --
 449  * e.g. on Win32, from the service manager. Now the service manager can
 450  * call ap_start_shutdown() or ap_start_restart() as appropiate.  Note that
 451  * these functions can also be called by the child processes, since global
 452  * variables are no longer used to pass on the required action to the parent.
 453  *
 454  * These should only be called from the parent process itself, since the
 455  * parent process will use the shutdown_pending and restart_pending variables
 456  * to determine whether to shutdown or restart. The child process should
 457  * call signal_parent() directly to tell the parent to die -- this will
 458  * cause neither of those variable to be set, which the parent will
 459  * assume means something serious is wrong (which it will be, for the
 460  * child to force an exit) and so do an exit anyway.
 461  */
 462
 463 static void ap_start_shutdown(int graceful)
 464 {
 465     mpm_state = AP_MPMQ_STOPPING;
 466     if (shutdown_pending == 1) {
 467         /* Um, is this _probably_ not an error, if the user has
 468          * tried to do a shutdown twice quickly, so we won't
 469          * worry about reporting it.
 470          */
 471         return;
 472     }
 473     shutdown_pending = 1;
 474     is_graceful = graceful;
 475 }
 476
 477 /* do a graceful restart if graceful == 1 */
 478 static void ap_start_restart(int graceful)
 479 {
 480     mpm_state = AP_MPMQ_STOPPING;
 481     if (restart_pending == 1) {
 482         /* Probably not an error - don't bother reporting it */
 483         return;
 484     }
 485     restart_pending = 1;
 486     is_graceful = graceful;
 487 }
 488
 489 static void sig_term(int sig)
 490 {
 491     ap_start_shutdown(sig == AP_SIG_GRACEFUL_STOP);
 492 }
 493
 494 static void restart(int sig)
 495 {
 496     ap_start_restart(sig == AP_SIG_GRACEFUL);
 497 }
 498
 499 static void set_signals(void)
 500 {
 501 #ifndef NO_USE_SIGACTION
 502     struct sigaction sa;
 503 #endif
 504
 505     if (!one_process) {
 506         ap_fatal_signal_setup(ap_server_conf, pconf);
 507     }
 508
 509 #ifndef NO_USE_SIGACTION
 510     sigemptyset(&sa.sa_mask);
 511     sa.sa_flags = 0;
 512
 513     sa.sa_handler = sig_term;
 514     if (sigaction(SIGTERM, &sa, NULL) < 0)
 515         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 516                      "sigaction(SIGTERM)");
 517 #ifdef AP_SIG_GRACEFUL_STOP
 518     if (sigaction(AP_SIG_GRACEFUL_STOP, &sa, NULL) < 0)
 519         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 520                      "sigaction(" AP_SIG_GRACEFUL_STOP_STRING ")");
 521 #endif
 522 #ifdef SIGINT
 523     if (sigaction(SIGINT, &sa, NULL) < 0)
 524         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 525                      "sigaction(SIGINT)");
 526 #endif
 527 #ifdef SIGXCPU
 528     sa.sa_handler = SIG_DFL;
 529     if (sigaction(SIGXCPU, &sa, NULL) < 0)
 530         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 531                      "sigaction(SIGXCPU)");
 532 #endif
 533 #ifdef SIGXFSZ
 534     sa.sa_handler = SIG_DFL;
 535     if (sigaction(SIGXFSZ, &sa, NULL) < 0)
 536         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 537                      "sigaction(SIGXFSZ)");
 538 #endif
 539 #ifdef SIGPIPE
 540     sa.sa_handler = SIG_IGN;
 541     if (sigaction(SIGPIPE, &sa, NULL) < 0)
 542         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 543                      "sigaction(SIGPIPE)");
 544 #endif
 545
 546     /* we want to ignore HUPs and AP_SIG_GRACEFUL while we're busy
 547      * processing one */
 548     sigaddset(&sa.sa_mask, SIGHUP);
 549     sigaddset(&sa.sa_mask, AP_SIG_GRACEFUL);
 550     sa.sa_handler = restart;
 551     if (sigaction(SIGHUP, &sa, NULL) < 0)
 552         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 553                      "sigaction(SIGHUP)");
 554     if (sigaction(AP_SIG_GRACEFUL, &sa, NULL) < 0)
 555         ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf,
 556                      "sigaction(" AP_SIG_GRACEFUL_STRING ")");
 557 #else
 558     if (!one_process) {
 559 #ifdef SIGXCPU
 560         apr_signal(SIGXCPU, SIG_DFL);
 561 #endif /* SIGXCPU */
 562 #ifdef SIGXFSZ
 563         apr_signal(SIGXFSZ, SIG_DFL);
 564 #endif /* SIGXFSZ */
 565     }
 566
 567     apr_signal(SIGTERM, sig_term);
 568 #ifdef SIGHUP
 569     apr_signal(SIGHUP, restart);
 570 #endif /* SIGHUP */
 571 #ifdef AP_SIG_GRACEFUL
 572     apr_signal(AP_SIG_GRACEFUL, restart);
 573 #endif /* AP_SIG_GRACEFUL */
 574 #ifdef AP_SIG_GRACEFUL_STOP
 575      apr_signal(AP_SIG_GRACEFUL_STOP, sig_term);
 576 #endif /* AP_SIG_GRACEFUL_STOP */
 577 #ifdef SIGPIPE
 578     apr_signal(SIGPIPE, SIG_IGN);
 579 #endif /* SIGPIPE */
 580
 581 #endif
 582 }
 583
 584 /*****************************************************************
 585  * Child process main loop.
 586  */
 587
 588 static int process_socket(apr_thread_t *thd, apr_pool_t * p, apr_socket_t * sock,
 589                           conn_state_t * cs, int my_child_num,
 590                           int my_thread_num)
 591 {
 592     conn_rec *c;
 593     listener_poll_type *pt;
 594     long conn_id = ID_FROM_CHILD_THREAD(my_child_num, my_thread_num);
 595     int rc;
 596     ap_sb_handle_t *sbh;
 597
 598     ap_create_sb_handle(&sbh, p, my_child_num, my_thread_num);
 599
 600     if (cs == NULL) {           /* This is a new connection */
 601
 602         cs = apr_pcalloc(p, sizeof(conn_state_t));
 603
 604         pt = apr_pcalloc(p, sizeof(*pt));
 605
 606         cs->bucket_alloc = apr_bucket_alloc_create(p);
 607         c = ap_run_create_connection(p, ap_server_conf, sock,
 608                                      conn_id, sbh, cs->bucket_alloc);
 609         c->current_thread = thd;
 610         cs->c = c;
 611         c->cs = cs;
 612         cs->p = p;
 613         cs->pfd.desc_type = APR_POLL_SOCKET;
 614         cs->pfd.reqevents = APR_POLLIN;
 615         cs->pfd.desc.s = sock;
 616         pt->type = PT_CSD;
 617         pt->bypass_push = 1;
 618         pt->baton = cs;
 619         cs->pfd.client_data = pt;
 620         APR_RING_ELEM_INIT(cs, timeout_list);
 621
 622         ap_update_vhost_given_ip(c);
 623
 624         rc = ap_run_pre_connection(c, sock);
 625         if (rc != OK && rc != DONE) {
 626             ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
 627                          "process_socket: connection aborted");
 628             c->aborted = 1;
 629         }
 630
 631         /**
 632          * XXX If the platform does not have a usable way of bundling
 633          * accept() with a socket readability check, like Win32,
 634          * and there are measurable delays before the
 635          * socket is readable due to the first data packet arriving,
 636          * it might be better to create the cs on the listener thread
 637          * with the state set to CONN_STATE_CHECK_REQUEST_LINE_READABLE
 638          *
 639          * FreeBSD users will want to enable the HTTP accept filter
 640          * module in their kernel for the highest performance
 641          * When the accept filter is active, sockets are kept in the
 642          * kernel until a HTTP request is received.
 643          */
 644         cs->state = CONN_STATE_READ_REQUEST_LINE;
 645
 646     }
 647     else {
 648         c = cs->c;
 649         c->sbh = sbh;
 650         pt = cs->pfd.client_data;
 651         c->current_thread = thd;
 652     }
 653
 654     if (c->clogging_input_filters && !c->aborted) {
 655         /* Since we have an input filter which 'cloggs' the input stream,
 656          * like mod_ssl, lets just do the normal read from input filters,
 657          * like the Worker MPM does.
 658          */
 659         ap_run_process_connection(c);
 660         if (cs->state != CONN_STATE_SUSPENDED) {
 661             cs->state = CONN_STATE_LINGER;
 662         }
 663     }
 664
 665 read_request:
 666     if (cs->state == CONN_STATE_READ_REQUEST_LINE) {
 667         if (!c->aborted) {
 668             ap_run_process_connection(c);
 669
 670             /* state will be updated upon return
 671              * fall thru to either wait for readability/timeout or
 672              * do lingering close
 673              */
 674         }
 675         else {
 676             cs->state = CONN_STATE_LINGER;
 677         }
 678     }
 679
 680     if (cs->state == CONN_STATE_WRITE_COMPLETION) {
 681         ap_filter_t *output_filter = c->output_filters;
 682         apr_status_t rv;
 683         while (output_filter->next != NULL) {
 684             output_filter = output_filter->next;
 685         }
 686         rv = output_filter->frec->filter_func.out_func(output_filter, NULL);
 687         if (rv != APR_SUCCESS) {
 688             ap_log_error(APLOG_MARK, APLOG_WARNING, rv, ap_server_conf,
 689                      "network write failure in core output filter");
 690             cs->state = CONN_STATE_LINGER;
 691         }
 692         else if (c->data_in_output_filters) {
 693             /* Still in WRITE_COMPLETION_STATE:
 694              * Set a write timeout for this connection, and let the
 695              * event thread poll for writeability.
 696              */
 697             cs->expiration_time = ap_server_conf->timeout + apr_time_now();
 698             apr_thread_mutex_lock(timeout_mutex);
 699             APR_RING_INSERT_TAIL(&timeout_head, cs, conn_state_t, timeout_list);
 700             apr_thread_mutex_unlock(timeout_mutex);
 701             pt->bypass_push = 0;
 702             cs->pfd.reqevents = APR_POLLOUT | APR_POLLHUP | APR_POLLERR;
 703             rc = apr_pollset_add(event_pollset, &cs->pfd);
 704             return 1;
 705         }
 706         else if (c->keepalive != AP_CONN_KEEPALIVE || c->aborted ||
 707             listener_may_exit) {
 708             c->cs->state = CONN_STATE_LINGER;
 709         }
 710         else if (c->data_in_input_filters) {
 711             cs->state = CONN_STATE_READ_REQUEST_LINE;
 712             goto read_request;
 713         }
 714         else {
 715             cs->state = CONN_STATE_CHECK_REQUEST_LINE_READABLE;
 716         }
 717     }
 718
 719     if (cs->state == CONN_STATE_LINGER) {
 720         ap_lingering_close(c);
 721         apr_pool_clear(p);
 722         ap_push_pool(worker_queue_info, p);
 723         return 0;
 724     }
 725     else if (cs->state == CONN_STATE_CHECK_REQUEST_LINE_READABLE) {
 726         apr_status_t rc;
 727         listener_poll_type *pt = (listener_poll_type *) cs->pfd.client_data;
 728
 729         /* It greatly simplifies the logic to use a single timeout value here
 730          * because the new element can just be added to the end of the list and
 731          * it will stay sorted in expiration time sequence.  If brand new
 732          * sockets are sent to the event thread for a readability check, this
 733          * will be a slight behavior change - they use the non-keepalive
 734          * timeout today.  With a normal client, the socket will be readable in
 735          * a few milliseconds anyway.
 736          */
 737         cs->expiration_time = ap_server_conf->keep_alive_timeout +
 738                               apr_time_now();
 739         apr_thread_mutex_lock(timeout_mutex);
 740         APR_RING_INSERT_TAIL(&keepalive_timeout_head, cs, conn_state_t, timeout_list);
 741         apr_thread_mutex_unlock(timeout_mutex);
 742
 743         pt->bypass_push = 0;
 744         /* Add work to pollset. */
 745         cs->pfd.reqevents = APR_POLLIN;
 746         rc = apr_pollset_add(event_pollset, &cs->pfd);
 747
 748         if (rc != APR_SUCCESS) {
 749             ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
 750                          "process_socket: apr_pollset_add failure");
 751             AP_DEBUG_ASSERT(rc == APR_SUCCESS);
 752         }
 753     }
 754     return 1;
 755 }
 756
 757 /* requests_this_child has gone to zero or below.  See if the admin coded
 758    "MaxRequestsPerChild 0", and keep going in that case.  Doing it this way
 759    simplifies the hot path in worker_thread */
 760 static void check_infinite_requests(void)
 761 {
 762     if (ap_max_requests_per_child) {
 763         signal_threads(ST_GRACEFUL);
 764     }
 765     else {
 766         /* wow! if you're executing this code, you may have set a record.
 767          * either this child process has served over 2 billion requests, or
 768          * you're running a threaded 2.0 on a 16 bit machine.
 769          *
 770          * I'll buy pizza and beers at Apachecon for the first person to do
 771          * the former without cheating (dorking with INT_MAX, or running with
 772          * uncommitted performance patches, for example).
 773          *
 774          * for the latter case, you probably deserve a beer too.   Greg Ames
 775          */
 776
 777         requests_this_child = INT_MAX;  /* keep going */
 778     }
 779 }
 780
 781 static void unblock_signal(int sig)
 782 {
 783     sigset_t sig_mask;
 784
 785     sigemptyset(&sig_mask);
 786     sigaddset(&sig_mask, sig);
 787 #if defined(SIGPROCMASK_SETS_THREAD_MASK)
 788     sigprocmask(SIG_UNBLOCK, &sig_mask, NULL);
 789 #else
 790     pthread_sigmask(SIG_UNBLOCK, &sig_mask, NULL);
 791 #endif
 792 }
 793
 794 static void dummy_signal_handler(int sig)
 795 {
 796     /* XXX If specifying SIG_IGN is guaranteed to unblock a syscall,
 797      *     then we don't need this goofy function.
 798      */
 799 }
 800
 801
 802 #if AP_HAS_SERF
 803 static apr_status_t s_socket_add(void *user_baton,
 804                                  apr_pollfd_t *pfd,
 805                                  void *serf_baton)
 806 {
 807     s_baton_t *s = (s_baton_t*)user_baton;
 808     /* XXXXX: recycle listener_poll_types */
 809     listener_poll_type *pt = malloc(sizeof(*pt));
 810     pt->type = PT_SERF;
 811     pt->baton = serf_baton;
 812     pfd->client_data = pt;
 813     return apr_pollset_add(s->pollset, pfd);
 814 }
 815
 816 static apr_status_t s_socket_remove(void *user_baton,
 817                                     apr_pollfd_t *pfd,
 818                                     void *serf_baton)
 819 {
 820     s_baton_t *s = (s_baton_t*)user_baton;
 821     listener_poll_type *pt = pfd->client_data;
 822     free(pt);
 823     return apr_pollset_remove(s->pollset, pfd);
 824 }
 825 #endif
 826
 827 static apr_status_t init_pollset(apr_pool_t *p)
 828 {
 829 #if AP_HAS_SERF
 830     s_baton_t *baton = NULL;
 831 #endif
 832     apr_status_t rv;
 833     ap_listen_rec *lr;
 834     listener_poll_type *pt;
 835
 836     rv = apr_thread_mutex_create(&timeout_mutex, APR_THREAD_MUTEX_DEFAULT, p);
 837     if (rv != APR_SUCCESS) {
 838         ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf,
 839                      "creation of the timeout mutex failed.");
 840         return rv;
 841     }
 842
 843     APR_RING_INIT(&timeout_head, conn_state_t, timeout_list);
 844     APR_RING_INIT(&keepalive_timeout_head, conn_state_t, timeout_list);
 845
 846     /* Create the main pollset */
 847     rv = apr_pollset_create(&event_pollset,
 848                             threads_per_child,
 849                             p, APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY);
 850     if (rv != APR_SUCCESS) {
 851         ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf,
 852                      "apr_pollset_create with Thread Safety failed.");
 853         return rv;
 854     }
 855
 856     for (lr = ap_listeners; lr != NULL; lr = lr->next) {
 857         apr_pollfd_t *pfd = apr_palloc(p, sizeof(*pfd));
 858         pt = apr_pcalloc(p, sizeof(*pt));
 859         pfd->desc_type = APR_POLL_SOCKET;
 860         pfd->desc.s = lr->sd;
 861         pfd->reqevents = APR_POLLIN;
 862
 863         pt->type = PT_ACCEPT;
 864         pt->baton = lr;
 865
 866         pfd->client_data = pt;
 867
 868         apr_socket_opt_set(pfd->desc.s, APR_SO_NONBLOCK, 1);
 869         apr_pollset_add(event_pollset, pfd);
 870
 871         lr->accept_func = ap_unixd_accept;
 872     }
 873
 874 #if AP_HAS_SERF
 875     baton = apr_pcalloc(p, sizeof(*baton));
 876     baton->pollset = event_pollset;
 877     /* TODO: subpools, threads, reuse, etc.  -- currently use malloc() inside :( */
 878     baton->pool = p;
 879
 880     g_serf = serf_context_create_ex(baton,
 881                                     s_socket_add,
 882                                     s_socket_remove, p);
 883
 884     ap_register_provider(p, "mpm_serf",
 885                          "instance", "0", g_serf);
 886
 887 #endif
 888
 889     return APR_SUCCESS;
 890 }
 891
 892 static apr_status_t push_timer2worker(timer_event_t* te)
 893 {
 894     return ap_queue_push_timer(worker_queue, te);
 895 }
 896
 897 static apr_status_t push2worker(const apr_pollfd_t * pfd,
 898                                 apr_pollset_t * pollset)
 899 {
 900     listener_poll_type *pt = (listener_poll_type *) pfd->client_data;
 901     conn_state_t *cs = (conn_state_t *) pt->baton;
 902     apr_status_t rc;
 903
 904     if (pt->bypass_push) {
 905         return APR_SUCCESS;
 906     }
 907
 908     pt->bypass_push = 1;
 909
 910     rc = apr_pollset_remove(pollset, pfd);
 911
 912     /*
 913      * Some of the pollset backends, like KQueue or Epoll
 914      * automagically remove the FD if the socket is closed,
 915      * therefore, we can accept _SUCCESS or _NOTFOUND,
 916      * and we still want to keep going
 917      */
 918     if (rc != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rc)) {
 919         cs->state = CONN_STATE_LINGER;
 920     }
 921
 922     rc = ap_queue_push(worker_queue, cs->pfd.desc.s, cs, cs->p);
 923     if (rc != APR_SUCCESS) {
 924         /* trash the connection; we couldn't queue the connected
 925          * socket to a worker
 926          */
 927         apr_bucket_alloc_destroy(cs->bucket_alloc);
 928         apr_socket_close(cs->pfd.desc.s);
 929         ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
 930                      ap_server_conf, "push2worker: ap_queue_push failed");
 931         apr_pool_clear(cs->p);
 932         ap_push_pool(worker_queue_info, cs->p);
 933     }
 934
 935     return rc;
 936 }
 937
 938 /* get_worker:
 939  *     reserve a worker thread, block if all are currently busy.
 940  *     this prevents the worker queue from overflowing and lets
 941  *     other processes accept new connections in the mean time.
 942  */
 943 static int get_worker(int *have_idle_worker_p)
 944 {
 945     apr_status_t rc;
 946
 947     if (!*have_idle_worker_p) {
 948         rc = ap_queue_info_wait_for_idler(worker_queue_info);
 949
 950         if (rc == APR_SUCCESS) {
 951             *have_idle_worker_p = 1;
 952             return 1;
 953         }
 954         else {
 955             if (!APR_STATUS_IS_EOF(rc)) {
 956                 ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
 957                              "ap_queue_info_wait_for_idler failed.  "
 958                              "Attempting to shutdown process gracefully");
 959                 signal_threads(ST_GRACEFUL);
 960             }
 961             return 0;
 962         }
 963     }
 964     else {
 965         /* already reserved a worker thread - must have hit a
 966          * transient error on a previous pass
 967          */
 968         return 1;
 969     }
 970 }
 971
 972 /* XXXXXX: Convert to skiplist or other better data structure
 973  * (yes, this is VERY VERY VERY VERY BAD)
 974  */
 975
 976 /* Structures to reuse */
 977 static APR_RING_HEAD(timer_free_ring_t, timer_event_t) timer_free_ring;
 978 /* Active timers */
 979 static APR_RING_HEAD(timer_ring_t, timer_event_t) timer_ring;
 980
 981 static apr_thread_mutex_t *g_timer_ring_mtx;
 982
 983 static apr_status_t event_register_timed_callback(apr_time_t t,
 984                                                   ap_mpm_callback_fn_t *cbfn,
 985                                                   void *baton)
 986 {
 987     int inserted = 0;
 988     timer_event_t *ep;
 989     timer_event_t *te;
 990     /* oh yeah, and make locking smarter/fine grained. */
 991     apr_thread_mutex_lock(g_timer_ring_mtx);
 992
 993     if (!APR_RING_EMPTY(&timer_free_ring, timer_event_t, link)) {
 994         te = APR_RING_FIRST(&timer_free_ring);
 995         APR_RING_REMOVE(te, link);
 996     }
 997     else {
 998         /* XXXXX: lol, pool allocation without a context from any thread.Yeah. Right. MPMs Suck. */
 999         te = malloc(sizeof(timer_event_t));
1000         APR_RING_ELEM_INIT(te, link);
1001     }
1002
1003     te->cbfunc = cbfn;
1004     te->baton = baton;
1005     /* XXXXX: optimize */
1006     te->when = t + apr_time_now();
1007
1008     /* Okay, insert sorted by when.. */
1009     for (ep = APR_RING_FIRST(&timer_ring);
1010          ep != APR_RING_SENTINEL(&timer_ring,
1011                                  timer_event_t, link);
1012          ep = APR_RING_NEXT(ep, link))
1013     {
1014         if (ep->when > te->when) {
1015             inserted = 1;
1016             APR_RING_INSERT_BEFORE(ep, te, link);
1017             break;
1018         }
1019     }
1020
1021     if (!inserted) {
1022         APR_RING_INSERT_TAIL(&timer_ring, te, timer_event_t, link);
1023     }
1024
1025     apr_thread_mutex_unlock(g_timer_ring_mtx);
1026
1027     return APR_SUCCESS;
1028 }
1029
1030 #ifndef apr_time_from_msec
1031 #define apr_time_from_msec(x) (x * 1000)
1032 #endif
1033
1034 static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
1035 {
1036     timer_event_t *ep;
1037     timer_event_t *te;
1038     apr_status_t rc;
1039     proc_info *ti = dummy;
1040     int process_slot = ti->pid;
1041     apr_pool_t *tpool = apr_thread_pool_get(thd);
1042     void *csd = NULL;
1043     apr_pool_t *ptrans;         /* Pool for per-transaction stuff */
1044     ap_listen_rec *lr;
1045     int have_idle_worker = 0;
1046     conn_state_t *cs;
1047     const apr_pollfd_t *out_pfd;
1048     apr_int32_t num = 0;
1049     apr_time_t time_now = 0;
1050     apr_interval_time_t timeout_interval;
1051     apr_time_t timeout_time;
1052     listener_poll_type *pt;
1053
1054     free(ti);
1055
1056     /* the following times out events that are really close in the future
1057      *   to prevent extra poll calls
1058      *
1059      * current value is .1 second
1060      */
1061 #define TIMEOUT_FUDGE_FACTOR 100000
1062 #define EVENT_FUDGE_FACTOR 10000
1063
1064     rc = init_pollset(tpool);
1065     if (rc != APR_SUCCESS) {
1066         ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
1067                      "failed to initialize pollset, "
1068                      "attempting to shutdown process gracefully");
1069         signal_threads(ST_GRACEFUL);
1070         return NULL;
1071     }
1072
1073     /* Unblock the signal used to wake this thread up, and set a handler for
1074      * it.
1075      */
1076     unblock_signal(LISTENER_SIGNAL);
1077     apr_signal(LISTENER_SIGNAL, dummy_signal_handler);
1078
1079     while (!listener_may_exit) {
1080
1081         if (requests_this_child <= 0) {
1082             check_infinite_requests();
1083         }
1084
1085
1086         {
1087             apr_time_t now = apr_time_now();
1088             apr_thread_mutex_lock(g_timer_ring_mtx);
1089
1090             if (!APR_RING_EMPTY(&timer_ring, timer_event_t, link)) {
1091                 te = APR_RING_FIRST(&timer_ring);
1092                 if (te->when > now) {
1093                     timeout_interval = te->when - now;
1094                 }
1095                 else {
1096                     timeout_interval = 1;
1097                 }
1098             }
1099             else {
1100                 timeout_interval = apr_time_from_msec(100);
1101             }
1102             apr_thread_mutex_unlock(g_timer_ring_mtx);
1103         }
1104
1105 #if AP_HAS_SERF
1106         rc = serf_context_prerun(g_serf);
1107         if (rc != APR_SUCCESS) {
1108             /* TOOD: what should do here? ugh. */
1109         }
1110
1111 #endif
1112         rc = apr_pollset_poll(event_pollset, timeout_interval, &num,
1113                               &out_pfd);
1114
1115         if (rc != APR_SUCCESS) {
1116             if (APR_STATUS_IS_EINTR(rc)) {
1117                 continue;
1118             }
1119             if (!APR_STATUS_IS_TIMEUP(rc)) {
1120                 ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
1121                              "apr_pollset_poll failed.  Attempting to "
1122                              "shutdown process gracefully");
1123                 signal_threads(ST_GRACEFUL);
1124             }
1125         }
1126
1127         if (listener_may_exit)
1128             break;
1129
1130         {
1131             apr_time_t now = apr_time_now();
1132             apr_thread_mutex_lock(g_timer_ring_mtx);
1133             for (ep = APR_RING_FIRST(&timer_ring);
1134                  ep != APR_RING_SENTINEL(&timer_ring,
1135                                          timer_event_t, link);
1136                  ep = APR_RING_FIRST(&timer_ring))
1137             {
1138                 if (ep->when < now + EVENT_FUDGE_FACTOR) {
1139                     APR_RING_REMOVE(ep, link);
1140                     push_timer2worker(ep);
1141                 }
1142                 else {
1143                     break;
1144                 }
1145             }
1146             apr_thread_mutex_unlock(g_timer_ring_mtx);
1147         }
1148
1149         while (num && get_worker(&have_idle_worker)) {
1150             pt = (listener_poll_type *) out_pfd->client_data;
1151             if (pt->type == PT_CSD) {
1152                 /* one of the sockets is readable */
1153                 cs = (conn_state_t *) pt->baton;
1154                 switch (cs->state) {
1155                 case CONN_STATE_CHECK_REQUEST_LINE_READABLE:
1156                     cs->state = CONN_STATE_READ_REQUEST_LINE;
1157                     break;
1158                 case CONN_STATE_WRITE_COMPLETION:
1159                     break;
1160                 default:
1161                     ap_log_error(APLOG_MARK, APLOG_ERR, rc,
1162                                  ap_server_conf,
1163                                  "event_loop: unexpected state %d",
1164                                  cs->state);
1165                     AP_DEBUG_ASSERT(0);
1166                 }
1167
1168                 apr_thread_mutex_lock(timeout_mutex);
1169                 APR_RING_REMOVE(cs, timeout_list);
1170                 apr_thread_mutex_unlock(timeout_mutex);
1171                 APR_RING_ELEM_INIT(cs, timeout_list);
1172
1173                 rc = push2worker(out_pfd, event_pollset);
1174                 if (rc != APR_SUCCESS) {
1175                     ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1176                                  ap_server_conf, "push2worker failed");
1177                 }
1178                 else {
1179                     have_idle_worker = 0;
1180                 }
1181             }
1182             else if (pt->type == PT_ACCEPT) {
1183                 /* A Listener Socket is ready for an accept() */
1184
1185                 lr = (ap_listen_rec *) pt->baton;
1186
1187                 ap_pop_pool(&ptrans, worker_queue_info);
1188
1189                 if (ptrans == NULL) {
1190                     /* create a new transaction pool for each accepted socket */
1191                     apr_allocator_t *allocator;
1192
1193                     apr_allocator_create(&allocator);
1194                     apr_allocator_max_free_set(allocator,
1195                                                ap_max_mem_free);
1196                     apr_pool_create_ex(&ptrans, pconf, NULL, allocator);
1197                     apr_allocator_owner_set(allocator, ptrans);
1198                     if (ptrans == NULL) {
1199                         ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1200                                      ap_server_conf,
1201                                      "Failed to create transaction pool");
1202                         signal_threads(ST_GRACEFUL);
1203                         return NULL;
1204                     }
1205                 }
1206                 apr_pool_tag(ptrans, "transaction");
1207
1208                 rc = lr->accept_func(&csd, lr, ptrans);
1209
1210                 /* later we trash rv and rely on csd to indicate
1211                  * success/failure
1212                  */
1213                 AP_DEBUG_ASSERT(rc == APR_SUCCESS || !csd);
1214
1215                 if (rc == APR_EGENERAL) {
1216                     /* E[NM]FILE, ENOMEM, etc */
1217                     resource_shortage = 1;
1218                     signal_threads(ST_GRACEFUL);
1219                 }
1220
1221                 if (csd != NULL) {
1222                     rc = ap_queue_push(worker_queue, csd, NULL, ptrans);
1223                     if (rc != APR_SUCCESS) {
1224                         /* trash the connection; we couldn't queue the connected
1225                          * socket to a worker
1226                          */
1227                         apr_socket_close(csd);
1228                         ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
1229                                      ap_server_conf,
1230                                      "ap_queue_push failed");
1231                         apr_pool_clear(ptrans);
1232                         ap_push_pool(worker_queue_info, ptrans);
1233                     }
1234                     else {
1235                         have_idle_worker = 0;
1236                     }
1237                 }
1238                 else {
1239                     apr_pool_clear(ptrans);
1240                     ap_push_pool(worker_queue_info, ptrans);
1241                 }
1242             }               /* if:else on pt->type */
1243 #if AP_HAS_SERF
1244             else if (pt->type == PT_SERF) {
1245                 /* send socket to serf. */
1246                 /* XXXX: this doesn't require get_worker(&have_idle_worker) */
1247                 serf_event_trigger(g_serf, pt->baton, out_pfd);
1248             }
1249 #endif
1250             out_pfd++;
1251             num--;
1252         }                   /* while for processing poll */
1253
1254         /* XXX possible optimization: stash the current time for use as
1255          * r->request_time for new requests
1256          */
1257         time_now = apr_time_now();
1258
1259         /* handle timed out sockets */
1260         apr_thread_mutex_lock(timeout_mutex);
1261
1262         /* Step 1: keepalive timeouts */
1263         cs = APR_RING_FIRST(&keepalive_timeout_head);
1264         timeout_time = time_now + TIMEOUT_FUDGE_FACTOR;
1265         while (!APR_RING_EMPTY(&keepalive_timeout_head, conn_state_t, timeout_list)
1266                && cs->expiration_time < timeout_time) {
1267
1268             cs->state = CONN_STATE_LINGER;
1269
1270             APR_RING_REMOVE(cs, timeout_list);
1271             apr_thread_mutex_unlock(timeout_mutex);
1272
1273             if (!get_worker(&have_idle_worker)) {
1274                 apr_thread_mutex_lock(timeout_mutex);
1275                 APR_RING_INSERT_HEAD(&keepalive_timeout_head, cs,
1276                                      conn_state_t, timeout_list);
1277                 break;
1278             }
1279
1280             rc = push2worker(&cs->pfd, event_pollset);
1281
1282             if (rc != APR_SUCCESS) {
1283                 return NULL;
1284                 /* XXX return NULL looks wrong - not an init failure
1285                  * that bypasses all the cleanup outside the main loop
1286                  * break seems more like it
1287                  * need to evaluate seriousness of push2worker failures
1288                  */
1289             }
1290             have_idle_worker = 0;
1291             apr_thread_mutex_lock(timeout_mutex);
1292             cs = APR_RING_FIRST(&keepalive_timeout_head);
1293         }
1294
1295         /* Step 2: write completion timeouts */
1296         cs = APR_RING_FIRST(&timeout_head);
1297         while (!APR_RING_EMPTY(&timeout_head, conn_state_t, timeout_list)
1298                && cs->expiration_time < timeout_time) {
1299
1300             cs->state = CONN_STATE_LINGER;
1301             APR_RING_REMOVE(cs, timeout_list);
1302             apr_thread_mutex_unlock(timeout_mutex);
1303
1304             if (!get_worker(&have_idle_worker)) {
1305                 apr_thread_mutex_lock(timeout_mutex);
1306                 APR_RING_INSERT_HEAD(&timeout_head, cs,
1307                                      conn_state_t, timeout_list);
1308                 break;
1309             }
1310
1311             rc = push2worker(&cs->pfd, event_pollset);
1312             if (rc != APR_SUCCESS) {
1313                 return NULL;
1314             }
1315             have_idle_worker = 0;
1316             apr_thread_mutex_lock(timeout_mutex);
1317             cs = APR_RING_FIRST(&timeout_head);
1318         }
1319
1320         apr_thread_mutex_unlock(timeout_mutex);
1321
1322     }     /* listener main loop */
1323
1324     ap_close_listeners();
1325     ap_queue_term(worker_queue);
1326     dying = 1;
1327     ap_scoreboard_image->parent[process_slot].quiescing = 1;
1328
1329     /* wake up the main thread */
1330     kill(ap_my_pid, SIGTERM);
1331
1332     apr_thread_exit(thd, APR_SUCCESS);
1333     return NULL;
1334 }
1335
1336 /* XXX For ungraceful termination/restart, we definitely don't want to
1337  *     wait for active connections to finish but we may want to wait
1338  *     for idle workers to get out of the queue code and release mutexes,
1339  *     since those mutexes are cleaned up pretty soon and some systems
1340  *     may not react favorably (i.e., segfault) if operations are attempted
1341  *     on cleaned-up mutexes.
1342  */
1343 static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy)
1344 {
1345     proc_info *ti = dummy;
1346     int process_slot = ti->pid;
1347     int thread_slot = ti->tid;
1348     apr_socket_t *csd = NULL;
1349     conn_state_t *cs;
1350     apr_pool_t *ptrans;         /* Pool for per-transaction stuff */
1351     apr_status_t rv;
1352     int is_idle = 0;
1353     timer_event_t *te = NULL;
1354
1355     free(ti);
1356
1357     ap_scoreboard_image->servers[process_slot][thread_slot].pid = ap_my_pid;
1358     ap_scoreboard_image->servers[process_slot][thread_slot].tid = apr_os_thread_current();
1359     ap_scoreboard_image->servers[process_slot][thread_slot].generation = my_generation;
1360     ap_update_child_status_from_indexes(process_slot, thread_slot,
1361                                         SERVER_STARTING, NULL);
1362
1363     while (!workers_may_exit) {
1364         if (!is_idle) {
1365             rv = ap_queue_info_set_idle(worker_queue_info, NULL);
1366             if (rv != APR_SUCCESS) {
1367                 ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf,
1368                              "ap_queue_info_set_idle failed. Attempting to "
1369                              "shutdown process gracefully.");
1370                 signal_threads(ST_GRACEFUL);
1371                 break;
1372             }
1373             is_idle = 1;
1374         }
1375
1376         ap_update_child_status_from_indexes(process_slot, thread_slot,
1377                                             SERVER_READY, NULL);
1378       worker_pop:
1379         if (workers_may_exit) {
1380             break;
1381         }
1382
1383         te = NULL;
1384
1385         rv = ap_queue_pop_something(worker_queue, &csd, &cs, &ptrans, &te);
1386
1387         if (rv != APR_SUCCESS) {
1388             /* We get APR_EOF during a graceful shutdown once all the
1389              * connections accepted by this server process have been handled.
1390              */
1391             if (APR_STATUS_IS_EOF(rv)) {
1392                 break;
1393             }
1394             /* We get APR_EINTR whenever ap_queue_pop() has been interrupted
1395              * from an explicit call to ap_queue_interrupt_all(). This allows
1396              * us to unblock threads stuck in ap_queue_pop() when a shutdown
1397              * is pending.
1398              *
1399              * If workers_may_exit is set and this is ungraceful termination/
1400              * restart, we are bound to get an error on some systems (e.g.,
1401              * AIX, which sanity-checks mutex operations) since the queue
1402              * may have already been cleaned up.  Don't log the "error" if
1403              * workers_may_exit is set.
1404              */
1405             else if (APR_STATUS_IS_EINTR(rv)) {
1406                 goto worker_pop;
1407             }
1408             /* We got some other error. */
1409             else if (!workers_may_exit) {
1410                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
1411                              "ap_queue_pop failed");
1412             }
1413             continue;
1414         }
1415         if (te != NULL) {
1416
1417             te->cbfunc(te->baton);
1418
1419             {
1420                 apr_thread_mutex_lock(g_timer_ring_mtx);
1421                 APR_RING_INSERT_TAIL(&timer_free_ring, te, timer_event_t, link);
1422                 apr_thread_mutex_unlock(g_timer_ring_mtx);
1423             }
1424         }
1425         else {
1426             is_idle = 0;
1427             worker_sockets[thread_slot] = csd;
1428             rv = process_socket(thd, ptrans, csd, cs, process_slot, thread_slot);
1429             if (!rv) {
1430                 requests_this_child--;
1431             }
1432             worker_sockets[thread_slot] = NULL;
1433         }
1434     }
1435
1436     ap_update_child_status_from_indexes(process_slot, thread_slot,
1437                                         (dying) ? SERVER_DEAD :
1438                                         SERVER_GRACEFUL,
1439                                         (request_rec *) NULL);
1440
1441     apr_thread_exit(thd, APR_SUCCESS);
1442     return NULL;
1443 }
1444
1445 static int check_signal(int signum)
1446 {
1447     switch (signum) {
1448     case SIGTERM:
1449     case SIGINT:
1450         return 1;
1451     }
1452     return 0;
1453 }
1454
1455
1456
1457 static void create_listener_thread(thread_starter * ts)
1458 {
1459     int my_child_num = ts->child_num_arg;
1460     apr_threadattr_t *thread_attr = ts->threadattr;
1461     proc_info *my_info;
1462     apr_status_t rv;
1463
1464     my_info = (proc_info *) malloc(sizeof(proc_info));
1465     my_info->pid = my_child_num;
1466     my_info->tid = -1;          /* listener thread doesn't have a thread slot */
1467     my_info->sd = 0;
1468     rv = apr_thread_create(&ts->listener, thread_attr, listener_thread,
1469                            my_info, pchild);
1470     if (rv != APR_SUCCESS) {
1471         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
1472                      "apr_thread_create: unable to create listener thread");
1473         /* let the parent decide how bad this really is */
1474         clean_child_exit(APEXIT_CHILDSICK);
1475     }
1476     apr_os_thread_get(&listener_os_thread, ts->listener);
1477 }
1478
1479 /* XXX under some circumstances not understood, children can get stuck
1480  *     in start_threads forever trying to take over slots which will
1481  *     never be cleaned up; for now there is an APLOG_DEBUG message issued
1482  *     every so often when this condition occurs
1483  */
1484 static void *APR_THREAD_FUNC start_threads(apr_thread_t * thd, void *dummy)
1485 {
1486     thread_starter *ts = dummy;
1487     apr_thread_t **threads = ts->threads;
1488     apr_threadattr_t *thread_attr = ts->threadattr;
1489     int child_num_arg = ts->child_num_arg;
1490     int my_child_num = child_num_arg;
1491     proc_info *my_info;
1492     apr_status_t rv;
1493     int i;
1494     int threads_created = 0;
1495     int listener_started = 0;
1496     int loops;
1497     int prev_threads_created;
1498
1499     /* We must create the fd queues before we start up the listener
1500      * and worker threads. */
1501     worker_queue = apr_pcalloc(pchild, sizeof(*worker_queue));
1502     rv = ap_queue_init(worker_queue, threads_per_child, pchild);
1503     if (rv != APR_SUCCESS) {
1504         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
1505                      "ap_queue_init() failed");
1506         clean_child_exit(APEXIT_CHILDFATAL);
1507     }
1508
1509     rv = ap_queue_info_create(&worker_queue_info, pchild,
1510                               threads_per_child);
1511     if (rv != APR_SUCCESS) {
1512         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
1513                      "ap_queue_info_create() failed");
1514         clean_child_exit(APEXIT_CHILDFATAL);
1515     }
1516
1517     worker_sockets = apr_pcalloc(pchild, threads_per_child
1518                                  * sizeof(apr_socket_t *));
1519
1520     loops = prev_threads_created = 0;
1521     while (1) {
1522         /* threads_per_child does not include the listener thread */
1523         for (i = 0; i < threads_per_child; i++) {
1524             int status =
1525                 ap_scoreboard_image->servers[child_num_arg][i].status;
1526
1527             if (status != SERVER_GRACEFUL && status != SERVER_DEAD) {
1528                 continue;
1529             }
1530
1531             my_info = (proc_info *) malloc(sizeof(proc_info));
1532             if (my_info == NULL) {
1533                 ap_log_error(APLOG_MARK, APLOG_ALERT, errno, ap_server_conf,
1534                              "malloc: out of memory");
1535                 clean_child_exit(APEXIT_CHILDFATAL);
1536             }
1537             my_info->pid = my_child_num;
1538             my_info->tid = i;
1539             my_info->sd = 0;
1540
1541             /* We are creating threads right now */
1542             ap_update_child_status_from_indexes(my_child_num, i,
1543                                                 SERVER_STARTING, NULL);
1544             /* We let each thread update its own scoreboard entry.  This is
1545              * done because it lets us deal with tid better.
1546              */
1547             rv = apr_thread_create(&threads[i], thread_attr,
1548                                    worker_thread, my_info, pchild);
1549             if (rv != APR_SUCCESS) {
1550                 ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
1551                              "apr_thread_create: unable to create worker thread");
1552                 /* let the parent decide how bad this really is */
1553                 clean_child_exit(APEXIT_CHILDSICK);
1554             }
1555             threads_created++;
1556         }
1557
1558         /* Start the listener only when there are workers available */
1559         if (!listener_started && threads_created) {
1560             create_listener_thread(ts);
1561             listener_started = 1;
1562         }
1563
1564
1565         if (start_thread_may_exit || threads_created == threads_per_child) {
1566             break;
1567         }
1568         /* wait for previous generation to clean up an entry */
1569         apr_sleep(apr_time_from_sec(1));
1570         ++loops;
1571         if (loops % 120 == 0) { /* every couple of minutes */
1572             if (prev_threads_created == threads_created) {
1573                 ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
1574                              "child %" APR_PID_T_FMT " isn't taking over "
1575                              "slots very quickly (%d of %d)",
1576                              ap_my_pid, threads_created,
1577                              threads_per_child);
1578             }
1579             prev_threads_created = threads_created;
1580         }
1581     }
1582
1583     /* What state should this child_main process be listed as in the
1584      * scoreboard...?
1585      *  ap_update_child_status_from_indexes(my_child_num, i, SERVER_STARTING,
1586      *                                      (request_rec *) NULL);
1587      *
1588      *  This state should be listed separately in the scoreboard, in some kind
1589      *  of process_status, not mixed in with the worker threads' status.
1590      *  "life_status" is almost right, but it's in the worker's structure, and
1591      *  the name could be clearer.   gla
1592      */
1593     apr_thread_exit(thd, APR_SUCCESS);
1594     return NULL;
1595 }
1596
1597 static void join_workers(apr_thread_t * listener, apr_thread_t ** threads)
1598 {
1599     int i;
1600     apr_status_t rv, thread_rv;
1601
1602     if (listener) {
1603         int iter;
1604
1605         /* deal with a rare timing window which affects waking up the
1606          * listener thread...  if the signal sent to the listener thread
1607          * is delivered between the time it verifies that the
1608          * listener_may_exit flag is clear and the time it enters a
1609          * blocking syscall, the signal didn't do any good...  work around
1610          * that by sleeping briefly and sending it again
1611          */
1612
1613         iter = 0;
1614         while (iter < 10 &&
1615 #ifdef HAVE_PTHREAD_KILL
1616                pthread_kill(*listener_os_thread, 0)
1617 #else
1618                kill(ap_my_pid, 0)
1619 #endif
1620                == 0) {
1621             /* listener not dead yet */
1622             apr_sleep(apr_time_make(0, 500000));
1623             wakeup_listener();
1624             ++iter;
1625         }
1626         if (iter >= 10) {
1627             ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
1628                          "the listener thread didn't exit");
1629         }
1630         else {
1631             rv = apr_thread_join(&thread_rv, listener);
1632             if (rv != APR_SUCCESS) {
1633                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
1634                              "apr_thread_join: unable to join listener thread");
1635             }
1636         }
1637     }
1638
1639     for (i = 0; i < threads_per_child; i++) {
1640         if (threads[i]) {       /* if we ever created this thread */
1641             rv = apr_thread_join(&thread_rv, threads[i]);
1642             if (rv != APR_SUCCESS) {
1643                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
1644                              "apr_thread_join: unable to join worker "
1645                              "thread %d", i);
1646             }
1647         }
1648     }
1649 }
1650
1651 static void join_start_thread(apr_thread_t * start_thread_id)
1652 {
1653     apr_status_t rv, thread_rv;
1654
1655     start_thread_may_exit = 1;  /* tell it to give up in case it is still
1656                                  * trying to take over slots from a
1657                                  * previous generation
1658                                  */
1659     rv = apr_thread_join(&thread_rv, start_thread_id);
1660     if (rv != APR_SUCCESS) {
1661         ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
1662                      "apr_thread_join: unable to join the start " "thread");
1663     }
1664 }
1665
1666 static void child_main(int child_num_arg)
1667 {
1668     apr_thread_t **threads;
1669     apr_status_t rv;
1670     thread_starter *ts;
1671     apr_threadattr_t *thread_attr;
1672     apr_thread_t *start_thread_id;
1673
1674     mpm_state = AP_MPMQ_STARTING;       /* for benefit of any hooks that run as this
1675                                          * child initializes
1676                                          */
1677     ap_my_pid = getpid();
1678     ap_fatal_signal_child_setup(ap_server_conf);
1679     apr_pool_create(&pchild, pconf);
1680
1681     /*stuff to do before we switch id's, so we have permissions. */
1682     ap_reopen_scoreboard(pchild, NULL, 0);
1683
1684     if (ap_run_drop_privileges(pchild, ap_server_conf)) {
1685         clean_child_exit(APEXIT_CHILDFATAL);
1686     }
1687
1688     apr_thread_mutex_create(&g_timer_ring_mtx, APR_THREAD_MUTEX_DEFAULT, pchild);
1689     APR_RING_INIT(&timer_free_ring, timer_event_t, link);
1690     APR_RING_INIT(&timer_ring, timer_event_t, link);
1691
1692     ap_run_child_init(pchild, ap_server_conf);
1693
1694     /* done with init critical section */
1695
1696     /* Just use the standard apr_setup_signal_thread to block all signals
1697      * from being received.  The child processes no longer use signals for
1698      * any communication with the parent process.
1699      */
1700     rv = apr_setup_signal_thread();
1701     if (rv != APR_SUCCESS) {
1702         ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf,
1703                      "Couldn't initialize signal thread");
1704         clean_child_exit(APEXIT_CHILDFATAL);
1705     }
1706
1707     if (ap_max_requests_per_child) {
1708         requests_this_child = ap_max_requests_per_child;
1709     }
1710     else {
1711         /* coding a value of zero means infinity */
1712         requests_this_child = INT_MAX;
1713     }
1714
1715     /* Setup worker threads */
1716
1717     /* clear the storage; we may not create all our threads immediately,
1718      * and we want a 0 entry to indicate a thread which was not created
1719      */
1720     threads = (apr_thread_t **) calloc(1,
1721                                        sizeof(apr_thread_t *) *
1722                                        threads_per_child);
1723     if (threads == NULL) {
1724         ap_log_error(APLOG_MARK, APLOG_ALERT, errno, ap_server_conf,
1725                      "malloc: out of memory");
1726         clean_child_exit(APEXIT_CHILDFATAL);
1727     }
1728
1729     ts = (thread_starter *) apr_palloc(pchild, sizeof(*ts));
1730
1731     apr_threadattr_create(&thread_attr, pchild);
1732     /* 0 means PTHREAD_CREATE_JOINABLE */
1733     apr_threadattr_detach_set(thread_attr, 0);
1734
1735     if (ap_thread_stacksize != 0) {
1736         apr_threadattr_stacksize_set(thread_attr, ap_thread_stacksize);
1737     }
1738
1739     ts->threads = threads;
1740     ts->listener = NULL;
1741     ts->child_num_arg = child_num_arg;
1742     ts->threadattr = thread_attr;
1743
1744     rv = apr_thread_create(&start_thread_id, thread_attr, start_threads,
1745                            ts, pchild);
1746     if (rv != APR_SUCCESS) {
1747         ap_log_error(APLOG_MARK, APLOG_ALERT, rv, ap_server_conf,
1748                      "apr_thread_create: unable to create worker thread");
1749         /* let the parent decide how bad this really is */
1750         clean_child_exit(APEXIT_CHILDSICK);
1751     }
1752
1753     mpm_state = AP_MPMQ_RUNNING;
1754
1755     /* If we are only running in one_process mode, we will want to
1756      * still handle signals. */
1757     if (one_process) {
1758         /* Block until we get a terminating signal. */
1759         apr_signal_thread(check_signal);
1760         /* make sure the start thread has finished; signal_threads()
1761          * and join_workers() depend on that
1762          */
1763         /* XXX join_start_thread() won't be awakened if one of our
1764          *     threads encounters a critical error and attempts to
1765          *     shutdown this child
1766          */
1767         join_start_thread(start_thread_id);
1768
1769         /* helps us terminate a little more quickly than the dispatch of the
1770          * signal thread; beats the Pipe of Death and the browsers
1771          */
1772         signal_threads(ST_UNGRACEFUL);
1773
1774         /* A terminating signal was received. Now join each of the
1775          * workers to clean them up.
1776          *   If the worker already exited, then the join frees
1777          *   their resources and returns.
1778          *   If the worker hasn't exited, then this blocks until
1779          *   they have (then cleans up).
1780          */
1781         join_workers(ts->listener, threads);
1782     }
1783     else {                      /* !one_process */
1784         /* remove SIGTERM from the set of blocked signals...  if one of
1785          * the other threads in the process needs to take us down
1786          * (e.g., for MaxRequestsPerChild) it will send us SIGTERM
1787          */
1788         unblock_signal(SIGTERM);
1789         apr_signal(SIGTERM, dummy_signal_handler);
1790         /* Watch for any messages from the parent over the POD */
1791         while (1) {
1792             rv = ap_event_pod_check(pod);
1793             if (rv == AP_NORESTART) {
1794                 /* see if termination was triggered while we slept */
1795                 switch (terminate_mode) {
1796                 case ST_GRACEFUL:
1797                     rv = AP_GRACEFUL;
1798                     break;
1799                 case ST_UNGRACEFUL:
1800                     rv = AP_RESTART;
1801                     break;
1802                 }
1803             }
1804             if (rv == AP_GRACEFUL || rv == AP_RESTART) {
1805                 /* make sure the start thread has finished;
1806                  * signal_threads() and join_workers depend on that
1807                  */
1808                 join_start_thread(start_thread_id);
1809                 signal_threads(rv ==
1810                                AP_GRACEFUL ? ST_GRACEFUL : ST_UNGRACEFUL);
1811                 break;
1812             }
1813         }
1814
1815         /* A terminating signal was received. Now join each of the
1816          * workers to clean them up.
1817          *   If the worker already exited, then the join frees
1818          *   their resources and returns.
1819          *   If the worker hasn't exited, then this blocks until
1820          *   they have (then cleans up).
1821          */
1822         join_workers(ts->listener, threads);
1823     }
1824
1825     free(threads);
1826
1827     clean_child_exit(resource_shortage ? APEXIT_CHILDSICK : 0);
1828 }
1829
1830 static int make_child(server_rec * s, int slot)
1831 {
1832     int pid;
1833
1834     if (slot + 1 > max_daemons_limit) {
1835         max_daemons_limit = slot + 1;
1836     }
1837
1838     if (one_process) {
1839         set_signals();
1840         ap_scoreboard_image->parent[slot].pid = getpid();
1841         child_main(slot);
1842     }
1843
1844     if ((pid = fork()) == -1) {
1845         ap_log_error(APLOG_MARK, APLOG_ERR, errno, s,
1846                      "fork: Unable to fork new process");
1847
1848         /* fork didn't succeed.  There's no need to touch the scoreboard;
1849          * if we were trying to replace a failed child process, then
1850          * server_main_loop() marked its workers SERVER_DEAD, and if
1851          * we were trying to replace a child process that exited normally,
1852          * its worker_thread()s left SERVER_DEAD or SERVER_GRACEFUL behind.
1853          */
1854
1855         /* In case system resources are maxxed out, we don't want
1856            Apache running away with the CPU trying to fork over and
1857            over and over again. */
1858         apr_sleep(apr_time_from_sec(10));
1859
1860         return -1;
1861     }
1862
1863     if (!pid) {
1864 #ifdef HAVE_BINDPROCESSOR
1865         /* By default, AIX binds to a single processor.  This bit unbinds
1866          * children which will then bind to another CPU.
1867          */
1868         int status = bindprocessor(BINDPROCESS, (int) getpid(),
1869                                    PROCESSOR_CLASS_ANY);
1870         if (status != OK)
1871             ap_log_error(APLOG_MARK, APLOG_WARNING, errno,
1872                          ap_server_conf,
1873                          "processor unbind failed %d", status);
1874 #endif
1875         RAISE_SIGSTOP(MAKE_CHILD);
1876
1877         apr_signal(SIGTERM, just_die);
1878         child_main(slot);
1879
1880         clean_child_exit(0);
1881     }
1882     /* else */
1883     if (ap_scoreboard_image->parent[slot].pid != 0) {
1884         /* This new child process is squatting on the scoreboard
1885          * entry owned by an exiting child process, which cannot
1886          * exit until all active requests complete.
1887          * Don't forget about this exiting child process, or we
1888          * won't be able to kill it if it doesn't exit by the
1889          * time the server is shut down.
1890          */
1891         ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf,
1892                      "taking over scoreboard slot from %" APR_PID_T_FMT "%s",
1893                      ap_scoreboard_image->parent[slot].pid,
1894                      ap_scoreboard_image->parent[slot].quiescing ?
1895                          " (quiescing)" : "");
1896         ap_register_extra_mpm_process(ap_scoreboard_image->parent[slot].pid);
1897     }
1898     ap_scoreboard_image->parent[slot].quiescing = 0;
1899     ap_scoreboard_image->parent[slot].pid = pid;
1900     return 0;
1901 }
1902
1903 /* start up a bunch of children */
1904 static void startup_children(int number_to_start)
1905 {
1906     int i;
1907
1908     for (i = 0; number_to_start && i < ap_daemons_limit; ++i) {
1909         if (ap_scoreboard_image->parent[i].pid != 0) {
1910             continue;
1911         }
1912         if (make_child(ap_server_conf, i) < 0) {
1913             break;
1914         }
1915         --number_to_start;
1916     }
1917 }
1918
1919
1920 /*
1921  * idle_spawn_rate is the number of children that will be spawned on the
1922  * next maintenance cycle if there aren't enough idle servers.  It is
1923  * doubled up to MAX_SPAWN_RATE, and reset only when a cycle goes by
1924  * without the need to spawn.
1925  */
1926 static int idle_spawn_rate = 1;
1927 #ifndef MAX_SPAWN_RATE
1928 #define MAX_SPAWN_RATE        (32)
1929 #endif
1930 static int hold_off_on_exponential_spawning;
1931
1932 static void perform_idle_server_maintenance(void)
1933 {
1934     int i, j;
1935     int idle_thread_count;
1936     worker_score *ws;
1937     process_score *ps;
1938     int free_length;
1939     int totally_free_length = 0;
1940     int free_slots[MAX_SPAWN_RATE];
1941     int last_non_dead;
1942     int total_non_dead;
1943     int active_thread_count = 0;
1944
1945     /* initialize the free_list */
1946     free_length = 0;
1947
1948     idle_thread_count = 0;
1949     last_non_dead = -1;
1950     total_non_dead = 0;
1951
1952     for (i = 0; i < ap_daemons_limit; ++i) {
1953         /* Initialization to satisfy the compiler. It doesn't know
1954          * that threads_per_child is always > 0 */
1955         int status = SERVER_DEAD;
1956         int any_dying_threads = 0;
1957         int any_dead_threads = 0;
1958         int all_dead_threads = 1;
1959
1960         if (i >= max_daemons_limit
1961             && totally_free_length == idle_spawn_rate)
1962             /* short cut if all active processes have been examined and
1963              * enough empty scoreboard slots have been found
1964              */
1965
1966             break;
1967         ps = &ap_scoreboard_image->parent[i];
1968         for (j = 0; j < threads_per_child; j++) {
1969             ws = &ap_scoreboard_image->servers[i][j];
1970             status = ws->status;
1971
1972             /* XXX any_dying_threads is probably no longer needed    GLA */
1973             any_dying_threads = any_dying_threads ||
1974                 (status == SERVER_GRACEFUL);
1975             any_dead_threads = any_dead_threads || (status == SERVER_DEAD);
1976             all_dead_threads = all_dead_threads &&
1977                 (status == SERVER_DEAD || status == SERVER_GRACEFUL);
1978
1979             /* We consider a starting server as idle because we started it
1980              * at least a cycle ago, and if it still hasn't finished starting
1981              * then we're just going to swamp things worse by forking more.
1982              * So we hopefully won't need to fork more if we count it.
1983              * This depends on the ordering of SERVER_READY and SERVER_STARTING.
1984              */
1985             if (ps->pid != 0) { /* XXX just set all_dead_threads in outer
1986                                    for loop if no pid?  not much else matters */
1987                 if (status <= SERVER_READY &&
1988                         !ps->quiescing && ps->generation == my_generation) {
1989                     ++idle_thread_count;
1990                 }
1991                 if (status >= SERVER_READY && status < SERVER_GRACEFUL) {
1992                     ++active_thread_count;
1993                 }
1994             }
1995         }
1996         if (any_dead_threads
1997             && totally_free_length < idle_spawn_rate
1998             && free_length < MAX_SPAWN_RATE
1999             && (!ps->pid      /* no process in the slot */
2000                   || ps->quiescing)) {  /* or at least one is going away */
2001             if (all_dead_threads) {
2002                 /* great! we prefer these, because the new process can
2003                  * start more threads sooner.  So prioritize this slot
2004                  * by putting it ahead of any slots with active threads.
2005                  *
2006                  * first, make room by moving a slot that's potentially still
2007                  * in use to the end of the array
2008                  */
2009                 free_slots[free_length] = free_slots[totally_free_length];
2010                 free_slots[totally_free_length++] = i;
2011             }
2012             else {
2013                 /* slot is still in use - back of the bus
2014                  */
2015                 free_slots[free_length] = i;
2016             }
2017             ++free_length;
2018         }
2019         /* XXX if (!ps->quiescing)     is probably more reliable  GLA */
2020         if (!any_dying_threads) {
2021             last_non_dead = i;
2022             ++total_non_dead;
2023         }
2024     }
2025
2026     if (sick_child_detected) {
2027         if (active_thread_count > 0) {
2028             /* some child processes appear to be working.  don't kill the
2029              * whole server.
2030              */
2031             sick_child_detected = 0;
2032         }
2033         else {
2034             /* looks like a basket case.  give up.
2035              */
2036             shutdown_pending = 1;
2037             child_fatal = 1;
2038             ap_log_error(APLOG_MARK, APLOG_ALERT, 0,
2039                          ap_server_conf,
2040                          "No active workers found..."
2041                          " Apache is exiting!");
2042             /* the child already logged the failure details */
2043             return;
2044         }
2045     }
2046
2047     max_daemons_limit = last_non_dead + 1;
2048
2049     if (idle_thread_count > max_spare_threads) {
2050         /* Kill off one child */
2051         ap_event_pod_signal(pod, TRUE);
2052         idle_spawn_rate = 1;
2053     }
2054     else if (idle_thread_count < min_spare_threads) {
2055         /* terminate the free list */
2056         if (free_length == 0) { /* scoreboard is full, can't fork */
2057
2058             if (active_thread_count >= ap_daemons_limit * threads_per_child) {
2059                 static int reported = 0;
2060                 if (!reported) {
2061                     /* only report this condition once */
2062                     ap_log_error(APLOG_MARK, APLOG_ERR, 0,
2063                                  ap_server_conf,
2064                                  "server reached MaxClients setting, consider"
2065                                  " raising the MaxClients setting");
2066                     reported = 1;
2067                 }
2068             }
2069             else {
2070                 ap_log_error(APLOG_MARK, APLOG_ERR, 0,
2071                              ap_server_conf,
2072                              "scoreboard is full, not at MaxClients");
2073             }
2074             idle_spawn_rate = 1;
2075         }
2076         else {
2077             if (free_length > idle_spawn_rate) {
2078                 free_length = idle_spawn_rate;
2079             }
2080             if (idle_spawn_rate >= 8) {
2081                 ap_log_error(APLOG_MARK, APLOG_INFO, 0,
2082                              ap_server_conf,
2083                              "server seems busy, (you may need "
2084                              "to increase StartServers, ThreadsPerChild "
2085                              "or Min/MaxSpareThreads), "
2086                              "spawning %d children, there are around %d idle "
2087                              "threads, and %d total children", free_length,
2088                              idle_thread_count, total_non_dead);
2089             }
2090             for (i = 0; i < free_length; ++i) {
2091                 make_child(ap_server_conf, free_slots[i]);
2092             }
2093             /* the next time around we want to spawn twice as many if this
2094              * wasn't good enough, but not if we've just done a graceful
2095              */
2096             if (hold_off_on_exponential_spawning) {
2097                 --hold_off_on_exponential_spawning;
2098             }
2099             else if (idle_spawn_rate < MAX_SPAWN_RATE) {
2100                 idle_spawn_rate *= 2;
2101             }
2102         }
2103     }
2104     else {
2105         idle_spawn_rate = 1;
2106     }
2107 }
2108
2109 static void server_main_loop(int remaining_children_to_start)
2110 {
2111     int child_slot;
2112     apr_exit_why_e exitwhy;
2113     int status, processed_status;
2114     apr_proc_t pid;
2115     int i;
2116
2117     while (!restart_pending && !shutdown_pending) {
2118         ap_wait_or_timeout(&exitwhy, &status, &pid, pconf);
2119
2120         if (pid.pid != -1) {
2121             processed_status = ap_process_child_status(&pid, exitwhy, status);
2122             if (processed_status == APEXIT_CHILDFATAL) {
2123                 shutdown_pending = 1;
2124                 child_fatal = 1;
2125                 return;
2126             }
2127             else if (processed_status == APEXIT_CHILDSICK) {
2128                 /* tell perform_idle_server_maintenance to check into this
2129                  * on the next timer pop
2130                  */
2131                 sick_child_detected = 1;
2132             }
2133             /* non-fatal death... note that it's gone in the scoreboard. */
2134             child_slot = ap_find_child_by_pid(&pid);
2135             if (child_slot >= 0) {
2136                 for (i = 0; i < threads_per_child; i++)
2137                     ap_update_child_status_from_indexes(child_slot, i,
2138                                                         SERVER_DEAD,
2139                                                         (request_rec *) NULL);
2140
2141                 ap_scoreboard_image->parent[child_slot].pid = 0;
2142                 ap_scoreboard_image->parent[child_slot].quiescing = 0;
2143                 if (processed_status == APEXIT_CHILDSICK) {
2144                     /* resource shortage, minimize the fork rate */
2145                     idle_spawn_rate = 1;
2146                 }
2147                 else if (remaining_children_to_start
2148                          && child_slot < ap_daemons_limit) {
2149                     /* we're still doing a 1-for-1 replacement of dead
2150                      * children with new children
2151                      */
2152                     make_child(ap_server_conf, child_slot);
2153                     --remaining_children_to_start;
2154                 }
2155             }
2156             else if (ap_unregister_extra_mpm_process(pid.pid) == 1) {
2157                 /* handled */
2158 #if APR_HAS_OTHER_CHILD
2159             }
2160             else if (apr_proc_other_child_alert(&pid, APR_OC_REASON_DEATH,
2161                                                 status) == 0) {
2162                 /* handled */
2163 #endif
2164             }
2165             else if (is_graceful) {
2166                 /* Great, we've probably just lost a slot in the
2167                  * scoreboard.  Somehow we don't know about this child.
2168                  */
2169                 ap_log_error(APLOG_MARK, APLOG_WARNING, 0,
2170                              ap_server_conf,
2171                              "long lost child came home! (pid %ld)",
2172                              (long) pid.pid);
2173             }
2174             /* Don't perform idle maintenance when a child dies,
2175              * only do it when there's a timeout.  Remember only a
2176              * finite number of children can die, and it's pretty
2177              * pathological for a lot to die suddenly.
2178              */
2179             continue;
2180         }
2181         else if (remaining_children_to_start) {
2182             /* we hit a 1 second timeout in which none of the previous
2183              * generation of children needed to be reaped... so assume
2184              * they're all done, and pick up the slack if any is left.
2185              */
2186             startup_children(remaining_children_to_start);
2187             remaining_children_to_start = 0;
2188             /* In any event we really shouldn't do the code below because
2189              * few of the servers we just started are in the IDLE state
2190              * yet, so we'd mistakenly create an extra server.
2191              */
2192             continue;
2193         }
2194
2195         perform_idle_server_maintenance();
2196     }
2197 }
2198
2199 static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
2200 {
2201     int remaining_children_to_start;
2202
2203     ap_log_pid(pconf, ap_pid_fname);
2204
2205     if (!is_graceful) {
2206         if (ap_run_pre_mpm(s->process->pool, SB_SHARED) != OK) {
2207             mpm_state = AP_MPMQ_STOPPING;
2208             return 1;
2209         }
2210         /* fix the generation number in the global score; we just got a new,
2211          * cleared scoreboard
2212          */
2213         ap_scoreboard_image->global->running_generation = my_generation;
2214     }
2215
2216     set_signals();
2217     /* Don't thrash... */
2218     if (max_spare_threads < min_spare_threads + threads_per_child)
2219         max_spare_threads = min_spare_threads + threads_per_child;
2220
2221     /* If we're doing a graceful_restart then we're going to see a lot
2222      * of children exiting immediately when we get into the main loop
2223      * below (because we just sent them AP_SIG_GRACEFUL).  This happens pretty
2224      * rapidly... and for each one that exits we may start a new one, until
2225      * there are at least min_spare_threads idle threads, counting across
2226      * all children.  But we may be permitted to start more children than
2227      * that, so we'll just keep track of how many we're
2228      * supposed to start up without the 1 second penalty between each fork.
2229      */
2230     remaining_children_to_start = ap_daemons_to_start;
2231     if (remaining_children_to_start > ap_daemons_limit) {
2232         remaining_children_to_start = ap_daemons_limit;
2233     }
2234     if (!is_graceful) {
2235         startup_children(remaining_children_to_start);
2236         remaining_children_to_start = 0;
2237     }
2238     else {
2239         /* give the system some time to recover before kicking into
2240          * exponential mode */
2241         hold_off_on_exponential_spawning = 10;
2242     }
2243
2244     ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf,
2245                  "%s configured -- resuming normal operations",
2246                  ap_get_server_description());
2247     ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf,
2248                  "Server built: %s", ap_get_server_built());
2249
2250     restart_pending = shutdown_pending = 0;
2251     mpm_state = AP_MPMQ_RUNNING;
2252
2253     server_main_loop(remaining_children_to_start);
2254     mpm_state = AP_MPMQ_STOPPING;
2255
2256     if (shutdown_pending && !is_graceful) {
2257         /* Time to shut down:
2258          * Kill child processes, tell them to call child_exit, etc...
2259          */
2260         ap_event_pod_killpg(pod, ap_daemons_limit, FALSE);
2261         ap_reclaim_child_processes(1);  /* Start with SIGTERM */
2262
2263         if (!child_fatal) {
2264             /* cleanup pid file on normal shutdown */
2265             const char *pidfile = NULL;
2266             pidfile = ap_server_root_relative(pconf, ap_pid_fname);
2267             if (pidfile != NULL && unlink(pidfile) == 0)
2268                 ap_log_error(APLOG_MARK, APLOG_INFO, 0,
2269                              ap_server_conf,
2270                              "removed PID file %s (pid=%ld)",
2271                              pidfile, (long) getpid());
2272
2273             ap_log_error(APLOG_MARK, APLOG_NOTICE, 0,
2274                          ap_server_conf, "caught SIGTERM, shutting down");
2275         }
2276         return 1;
2277     } else if (shutdown_pending) {
2278         /* Time to gracefully shut down:
2279          * Kill child processes, tell them to call child_exit, etc...
2280          */
2281         int active_children;
2282         int index;
2283         apr_time_t cutoff = 0;
2284
2285         /* Close our listeners, and then ask our children to do same */
2286         ap_close_listeners();
2287         ap_event_pod_killpg(pod, ap_daemons_limit, TRUE);
2288         ap_relieve_child_processes();
2289
2290         if (!child_fatal) {
2291             /* cleanup pid file on normal shutdown */
2292             const char *pidfile = NULL;
2293             pidfile = ap_server_root_relative (pconf, ap_pid_fname);
2294             if ( pidfile != NULL && unlink(pidfile) == 0)
2295                 ap_log_error(APLOG_MARK, APLOG_INFO, 0,
2296                              ap_server_conf,
2297                              "removed PID file %s (pid=%ld)",
2298                              pidfile, (long)getpid());
2299
2300             ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf,
2301                          "caught " AP_SIG_GRACEFUL_STOP_STRING
2302                          ", shutting down gracefully");
2303         }
2304
2305         if (ap_graceful_shutdown_timeout) {
2306             cutoff = apr_time_now() +
2307                      apr_time_from_sec(ap_graceful_shutdown_timeout);
2308         }
2309
2310         /* Don't really exit until each child has finished */
2311         shutdown_pending = 0;
2312         do {
2313             /* Pause for a second */
2314             apr_sleep(apr_time_from_sec(1));
2315
2316             /* Relieve any children which have now exited */
2317             ap_relieve_child_processes();
2318
2319             active_children = 0;
2320             for (index = 0; index < ap_daemons_limit; ++index) {
2321                 if (ap_mpm_safe_kill(MPM_CHILD_PID(index), 0) == APR_SUCCESS) {
2322                     active_children = 1;
2323                     /* Having just one child is enough to stay around */
2324                     break;
2325                 }
2326             }
2327         } while (!shutdown_pending && active_children &&
2328                  (!ap_graceful_shutdown_timeout || apr_time_now() < cutoff));
2329
2330         /* We might be here because we received SIGTERM, either
2331          * way, try and make sure that all of our processes are
2332          * really dead.
2333          */
2334         ap_event_pod_killpg(pod, ap_daemons_limit, FALSE);
2335         ap_reclaim_child_processes(1);
2336
2337         return 1;
2338     }
2339
2340     /* we've been told to restart */
2341     apr_signal(SIGHUP, SIG_IGN);
2342
2343     if (one_process) {
2344         /* not worth thinking about */
2345         return 1;
2346     }
2347
2348     /* advance to the next generation */
2349     /* XXX: we really need to make sure this new generation number isn't in
2350      * use by any of the children.
2351      */
2352     ++my_generation;
2353     ap_scoreboard_image->global->running_generation = my_generation;
2354
2355     if (is_graceful) {
2356         ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf,
2357                      AP_SIG_GRACEFUL_STRING
2358                      " received.  Doing graceful restart");
2359         /* wake up the children...time to die.  But we'll have more soon */
2360         ap_event_pod_killpg(pod, ap_daemons_limit, TRUE);
2361
2362
2363         /* This is mostly for debugging... so that we know what is still
2364          * gracefully dealing with existing request.
2365          */
2366
2367     }
2368     else {
2369         /* Kill 'em all.  Since the child acts the same on the parents SIGTERM
2370          * and a SIGHUP, we may as well use the same signal, because some user
2371          * pthreads are stealing signals from us left and right.
2372          */
2373         ap_event_pod_killpg(pod, ap_daemons_limit, FALSE);
2374
2375         ap_reclaim_child_processes(1);  /* Start with SIGTERM */
2376         ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf,
2377                      "SIGHUP received.  Attempting to restart");
2378     }
2379
2380     return 0;
2381 }
2382
2383 /* This really should be a post_config hook, but the error log is already
2384  * redirected by that point, so we need to do this in the open_logs phase.
2385  */
2386 static int event_open_logs(apr_pool_t * p, apr_pool_t * plog,
2387                            apr_pool_t * ptemp, server_rec * s)
2388 {
2389     int startup = 0;
2390     int level_flags = 0;
2391     apr_status_t rv;
2392
2393     pconf = p;
2394
2395     /* the reverse of pre_config, we want this only the first time around */
2396     if (retained->module_loads == 1) {
2397         startup = 1;
2398         level_flags |= APLOG_STARTUP;
2399     }
2400
2401     if ((num_listensocks = ap_setup_listeners(ap_server_conf)) < 1) {
2402         ap_log_error(APLOG_MARK, APLOG_ALERT | level_flags, 0,
2403                      (startup ? NULL : s),
2404                      "no listening sockets available, shutting down");
2405         return DONE;
2406     }
2407
2408     if (!one_process) {
2409         if ((rv = ap_event_pod_open(pconf, &pod))) {
2410             ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
2411                          (startup ? NULL : s),
2412                          "could not open pipe-of-death");
2413             return DONE;
2414         }
2415     }
2416     return OK;
2417 }
2418
2419 static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
2420                             apr_pool_t * ptemp)
2421 {
2422     int no_detach, debug, foreground;
2423     apr_status_t rv;
2424     const char *userdata_key = "mpm_event_module";
2425
2426     mpm_state = AP_MPMQ_STARTING;
2427
2428     debug = ap_exists_config_define("DEBUG");
2429
2430     if (debug) {
2431         foreground = one_process = 1;
2432         no_detach = 0;
2433     }
2434     else {
2435         one_process = ap_exists_config_define("ONE_PROCESS");
2436         no_detach = ap_exists_config_define("NO_DETACH");
2437         foreground = ap_exists_config_define("FOREGROUND");
2438     }
2439
2440     /* sigh, want this only the second time around */
2441     retained = ap_get_retained_data(userdata_key);
2442     if (!retained) {
2443         retained = ap_set_retained_data(userdata_key, sizeof(*retained));
2444     }
2445     ++retained->module_loads;
2446     if (retained->module_loads == 2) {
2447         is_graceful = 0;
2448         rv = apr_pollset_create(&event_pollset, 1, plog,
2449                                 APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY);
2450         if (rv != APR_SUCCESS) {
2451             ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL,
2452                          "Couldn't create a Thread Safe Pollset. "
2453                          "Is it supported on your platform?");
2454             return HTTP_INTERNAL_SERVER_ERROR;
2455         }
2456         apr_pollset_destroy(event_pollset);
2457
2458         if (!one_process && !foreground) {
2459             rv = apr_proc_detach(no_detach ? APR_PROC_DETACH_FOREGROUND
2460                                  : APR_PROC_DETACH_DAEMONIZE);
2461             if (rv != APR_SUCCESS) {
2462                 ap_log_error(APLOG_MARK, APLOG_CRIT, rv, NULL,
2463                              "apr_proc_detach failed");
2464                 return HTTP_INTERNAL_SERVER_ERROR;
2465             }
2466         }
2467         parent_pid = ap_my_pid = getpid();
2468     }
2469
2470     ap_listen_pre_config();
2471     ap_daemons_to_start = DEFAULT_START_DAEMON;
2472     min_spare_threads = DEFAULT_MIN_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD;
2473     max_spare_threads = DEFAULT_MAX_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD;
2474     server_limit = DEFAULT_SERVER_LIMIT;
2475     thread_limit = DEFAULT_THREAD_LIMIT;
2476     ap_daemons_limit = server_limit;
2477     threads_per_child = DEFAULT_THREADS_PER_CHILD;
2478     max_clients = ap_daemons_limit * threads_per_child;
2479     ap_pid_fname = DEFAULT_PIDLOG;
2480     ap_lock_fname = DEFAULT_LOCKFILE;
2481     ap_max_requests_per_child = DEFAULT_MAX_REQUESTS_PER_CHILD;
2482     ap_extended_status = 0;
2483     ap_max_mem_free = APR_ALLOCATOR_MAX_FREE_UNLIMITED;
2484
2485     apr_cpystrn(ap_coredump_dir, ap_server_root, sizeof(ap_coredump_dir));
2486
2487     return OK;
2488 }
2489
2490 static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
2491                               apr_pool_t *ptemp, server_rec *s)
2492 {
2493     int startup = 0;
2494
2495     /* the reverse of pre_config, we want this only the first time around */
2496     if (retained->module_loads == 1) {
2497         startup = 1;
2498     }
2499
2500     if (server_limit > MAX_SERVER_LIMIT) {
2501         if (startup) {
2502             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2503                          "WARNING: ServerLimit of %d exceeds compile-time "
2504                          "limit of", server_limit);
2505             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2506                          " %d servers, decreasing to %d.",
2507                          MAX_SERVER_LIMIT, MAX_SERVER_LIMIT);
2508         } else {
2509             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2510                          "ServerLimit of %d exceeds compile-time limit "
2511                          "of %d, decreasing to match",
2512                          server_limit, MAX_SERVER_LIMIT);
2513         }
2514         server_limit = MAX_SERVER_LIMIT;
2515     }
2516     else if (server_limit < 1) {
2517         if (startup) {
2518             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2519                          "WARNING: ServerLimit of %d not allowed, "
2520                          "increasing to 1.", server_limit);
2521         } else {
2522             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2523                          "ServerLimit of %d not allowed, increasing to 1",
2524                          server_limit);
2525         }
2526         server_limit = 1;
2527     }
2528
2529     /* you cannot change ServerLimit across a restart; ignore
2530      * any such attempts
2531      */
2532     if (!retained->first_server_limit) {
2533         retained->first_server_limit = server_limit;
2534     }
2535     else if (server_limit != retained->first_server_limit) {
2536         /* don't need a startup console version here */
2537         ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2538                      "changing ServerLimit to %d from original value of %d "
2539                      "not allowed during restart",
2540                      server_limit, retained->first_server_limit);
2541         server_limit = retained->first_server_limit;
2542     }
2543
2544     if (thread_limit > MAX_THREAD_LIMIT) {
2545         if (startup) {
2546             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2547                          "WARNING: ThreadLimit of %d exceeds compile-time "
2548                          "limit of", thread_limit);
2549             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2550                          " %d threads, decreasing to %d.",
2551                          MAX_THREAD_LIMIT, MAX_THREAD_LIMIT);
2552         } else {
2553             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2554                          "ThreadLimit of %d exceeds compile-time limit "
2555                          "of %d, decreasing to match",
2556                          thread_limit, MAX_THREAD_LIMIT);
2557         }
2558         thread_limit = MAX_THREAD_LIMIT;
2559     }
2560     else if (thread_limit < 1) {
2561         if (startup) {
2562             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2563                          "WARNING: ThreadLimit of %d not allowed, "
2564                          "increasing to 1.", thread_limit);
2565         } else {
2566             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2567                          "ThreadLimit of %d not allowed, increasing to 1",
2568                          thread_limit);
2569         }
2570         thread_limit = 1;
2571     }
2572
2573     /* you cannot change ThreadLimit across a restart; ignore
2574      * any such attempts
2575      */
2576     if (!retained->first_thread_limit) {
2577         retained->first_thread_limit = thread_limit;
2578     }
2579     else if (thread_limit != retained->first_thread_limit) {
2580         /* don't need a startup console version here */
2581         ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2582                      "changing ThreadLimit to %d from original value of %d "
2583                      "not allowed during restart",
2584                      thread_limit, retained->first_thread_limit);
2585         thread_limit = retained->first_thread_limit;
2586     }
2587
2588     if (threads_per_child > thread_limit) {
2589         if (startup) {
2590             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2591                          "WARNING: ThreadsPerChild of %d exceeds ThreadLimit "
2592                          "of", threads_per_child);
2593             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2594                          " %d threads, decreasing to %d.",
2595                          thread_limit, thread_limit);
2596             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2597                          " To increase, please see the ThreadLimit "
2598                          "directive.");
2599         } else {
2600             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2601                          "ThreadsPerChild of %d exceeds ThreadLimit "
2602                          "of %d, decreasing to match",
2603                          threads_per_child, thread_limit);
2604         }
2605         threads_per_child = thread_limit;
2606     }
2607     else if (threads_per_child < 1) {
2608         if (startup) {
2609             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2610                          "WARNING: ThreadsPerChild of %d not allowed, "
2611                          "increasing to 1.", threads_per_child);
2612         } else {
2613             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2614                          "ThreadsPerChild of %d not allowed, increasing to 1",
2615                          threads_per_child);
2616         }
2617         threads_per_child = 1;
2618     }
2619
2620     if (max_clients < threads_per_child) {
2621         if (startup) {
2622             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2623                          "WARNING: MaxClients of %d is less than "
2624                          "ThreadsPerChild of", max_clients);
2625             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2626                          " %d, increasing to %d.  MaxClients must be at "
2627                          "least as large",
2628                          threads_per_child, threads_per_child);
2629             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2630                          " as the number of threads in a single server.");
2631         } else {
2632             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2633                          "MaxClients of %d is less than ThreadsPerChild "
2634                          "of %d, increasing to match",
2635                          max_clients, threads_per_child);
2636         }
2637         max_clients = threads_per_child;
2638     }
2639
2640     ap_daemons_limit = max_clients / threads_per_child;
2641
2642     if (max_clients % threads_per_child) {
2643         int tmp_max_clients = ap_daemons_limit * threads_per_child;
2644
2645         if (startup) {
2646             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2647                          "WARNING: MaxClients of %d is not an integer "
2648                          "multiple of", max_clients);
2649             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2650                          " ThreadsPerChild of %d, decreasing to nearest "
2651                          "multiple %d,", threads_per_child,
2652                          tmp_max_clients);
2653             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2654                          " for a maximum of %d servers.",
2655                          ap_daemons_limit);
2656         } else {
2657             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2658                          "MaxClients of %d is not an integer multiple of "
2659                          "ThreadsPerChild of %d, decreasing to nearest "
2660                          "multiple %d", max_clients, threads_per_child,
2661                          tmp_max_clients);
2662         }
2663         max_clients = tmp_max_clients;
2664     }
2665
2666     if (ap_daemons_limit > server_limit) {
2667         if (startup) {
2668             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2669                          "WARNING: MaxClients of %d would require %d "
2670                          "servers and ", max_clients, ap_daemons_limit);
2671             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2672                          " would exceed ServerLimit of %d, decreasing to %d.",
2673                          server_limit, server_limit * threads_per_child);
2674             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2675                          " To increase, please see the ServerLimit "
2676                          "directive.");
2677         } else {
2678             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2679                          "MaxClients of %d would require %d servers and "
2680                          "exceed ServerLimit of %d, decreasing to %d",
2681                          max_clients, ap_daemons_limit, server_limit,
2682                          server_limit * threads_per_child);
2683         }
2684         ap_daemons_limit = server_limit;
2685     }
2686
2687     /* ap_daemons_to_start > ap_daemons_limit checked in ap_mpm_run() */
2688     if (ap_daemons_to_start < 0) {
2689         if (startup) {
2690             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2691                          "WARNING: StartServers of %d not allowed, "
2692                          "increasing to 1.", ap_daemons_to_start);
2693         } else {
2694             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2695                          "StartServers of %d not allowed, increasing to 1",
2696                          ap_daemons_to_start);
2697         }
2698         ap_daemons_to_start = 1;
2699     }
2700
2701     if (min_spare_threads < 1) {
2702         if (startup) {
2703             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2704                          "WARNING: MinSpareThreads of %d not allowed, "
2705                          "increasing to 1", min_spare_threads);
2706             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2707                          " to avoid almost certain server failure.");
2708             ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL,
2709                          " Please read the documentation.");
2710         } else {
2711             ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s,
2712                          "MinSpareThreads of %d not allowed, increasing to 1",
2713                          min_spare_threads);
2714         }
2715         min_spare_threads = 1;
2716     }
2717
2718     /* max_spare_threads < min_spare_threads + threads_per_child
2719      * checked in ap_mpm_run()
2720      */
2721
2722     return OK;
2723 }
2724
2725 static void event_hooks(apr_pool_t * p)
2726 {
2727     /* Our open_logs hook function must run before the core's, or stderr
2728      * will be redirected to a file, and the messages won't print to the
2729      * console.
2730      */
2731     static const char *const aszSucc[] = { "core.c", NULL };
2732     one_process = 0;
2733
2734     ap_hook_open_logs(event_open_logs, NULL, aszSucc, APR_HOOK_REALLY_FIRST);
2735     /* we need to set the MPM state before other pre-config hooks use MPM query
2736      * to retrieve it, so register as REALLY_FIRST
2737      */
2738     ap_hook_pre_config(event_pre_config, NULL, NULL, APR_HOOK_REALLY_FIRST);
2739     ap_hook_check_config(event_check_config, NULL, NULL, APR_HOOK_MIDDLE);
2740     ap_hook_mpm(event_run, NULL, NULL, APR_HOOK_MIDDLE);
2741     ap_hook_mpm_query(event_query, NULL, NULL, APR_HOOK_MIDDLE);
2742     ap_hook_mpm_note_child_killed(event_note_child_killed, NULL, NULL, APR_HOOK_MIDDLE);
2743     ap_hook_mpm_register_timed_callback(event_register_timed_callback, NULL, NULL,
2744                                         APR_HOOK_MIDDLE);
2745     ap_hook_mpm_get_name(event_get_name, NULL, NULL, APR_HOOK_MIDDLE);
2746 }
2747
2748 static const char *set_daemons_to_start(cmd_parms *cmd, void *dummy,
2749                                         const char *arg)
2750 {
2751     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
2752     if (err != NULL) {
2753         return err;
2754     }
2755
2756     ap_daemons_to_start = atoi(arg);
2757     return NULL;
2758 }
2759
2760 static const char *set_min_spare_threads(cmd_parms * cmd, void *dummy,
2761                                          const char *arg)
2762 {
2763     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
2764     if (err != NULL) {
2765         return err;
2766     }
2767
2768     min_spare_threads = atoi(arg);
2769     return NULL;
2770 }
2771
2772 static const char *set_max_spare_threads(cmd_parms * cmd, void *dummy,
2773                                          const char *arg)
2774 {
2775     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
2776     if (err != NULL) {
2777         return err;
2778     }
2779
2780     max_spare_threads = atoi(arg);
2781     return NULL;
2782 }
2783
2784 static const char *set_max_clients(cmd_parms * cmd, void *dummy,
2785                                    const char *arg)
2786 {
2787     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
2788     if (err != NULL) {
2789         return err;
2790     }
2791
2792     max_clients = atoi(arg);
2793     return NULL;
2794 }
2795
2796 static const char *set_threads_per_child(cmd_parms * cmd, void *dummy,
2797                                          const char *arg)
2798 {
2799     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
2800     if (err != NULL) {
2801         return err;
2802     }
2803
2804     threads_per_child = atoi(arg);
2805     return NULL;
2806 }
2807 static const char *set_server_limit (cmd_parms *cmd, void *dummy, const char *arg)
2808 {
2809     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
2810     if (err != NULL) {
2811         return err;
2812     }
2813
2814     server_limit = atoi(arg);
2815     return NULL;
2816 }
2817
2818 static const char *set_thread_limit(cmd_parms * cmd, void *dummy,
2819                                     const char *arg)
2820 {
2821     const char *err = ap_check_cmd_context(cmd, GLOBAL_ONLY);
2822     if (err != NULL) {
2823         return err;
2824     }
2825
2826     thread_limit = atoi(arg);
2827     return NULL;
2828 }
2829
2830 static const command_rec event_cmds[] = {
2831     LISTEN_COMMANDS,
2832     AP_INIT_TAKE1("StartServers", set_daemons_to_start, NULL, RSRC_CONF,
2833                   "Number of child processes launched at server startup"),
2834     AP_INIT_TAKE1("ServerLimit", set_server_limit, NULL, RSRC_CONF,
2835                   "Maximum number of child processes for this run of Apache"),
2836     AP_INIT_TAKE1("MinSpareThreads", set_min_spare_threads, NULL, RSRC_CONF,
2837                   "Minimum number of idle threads, to handle request spikes"),
2838     AP_INIT_TAKE1("MaxSpareThreads", set_max_spare_threads, NULL, RSRC_CONF,
2839                   "Maximum number of idle threads"),
2840     AP_INIT_TAKE1("MaxClients", set_max_clients, NULL, RSRC_CONF,
2841                   "Maximum number of threads alive at the same time"),
2842     AP_INIT_TAKE1("ThreadsPerChild", set_threads_per_child, NULL, RSRC_CONF,
2843                   "Number of threads each child creates"),
2844     AP_INIT_TAKE1("ThreadLimit", set_thread_limit, NULL, RSRC_CONF,
2845                   "Maximum number of worker threads per child process for this "
2846                   "run of Apache - Upper limit for ThreadsPerChild"),
2847     AP_GRACEFUL_SHUTDOWN_TIMEOUT_COMMAND,
2848     {NULL}
2849 };
2850
2851 module AP_MODULE_DECLARE_DATA mpm_event_module = {
2852     MPM20_MODULE_STUFF,
2853     NULL,                       /* hook to run before apache parses args */
2854     NULL,                       /* create per-directory config structure */
2855     NULL,                       /* merge per-directory config structures */
2856     NULL,                       /* create per-server config structure */
2857     NULL,                       /* merge per-server config structures */
2858     event_cmds,                 /* command apr_table_t */
2859     event_hooks                 /* register_hooks */
2860 };