Merge r1809273, r1814719 from trunk:

[apache] / server / mpm / event / event.c
diff --git a/server/mpm/event/event.c b/server/mpm/event/event.c

index 89ee47671d954b5bd51d0c7588b3df9bb7f416cd..bd3ad3c9989d10ee18f7640ea914a27c46b87030 100644 (file)
--- a/server/mpm/event/event.c
+++ b/server/mpm/event/event.c
@@ -160,20 +160,24 @@
  #endif
  #define WORKER_FACTOR_SCALE   16  /* scale factor to allow fractional values */
  static unsigned int worker_factor = DEFAULT_WORKER_FACTOR * WORKER_FACTOR_SCALE;
-
-static int threads_per_child = 0;   /* Worker threads per child */
-static int ap_daemons_to_start = 0;
-static int min_spare_threads = 0;
-static int max_spare_threads = 0;
-static int ap_daemons_limit = 0;
-static int max_workers = 0;
-static int server_limit = 0;
-static int thread_limit = 0;
+    /* AsyncRequestWorkerFactor * 16 */
+
+static int threads_per_child = 0;           /* ThreadsPerChild */
+static int ap_daemons_to_start = 0;         /* StartServers */
+static int min_spare_threads = 0;           /* MinSpareThreads */
+static int max_spare_threads = 0;           /* MaxSpareThreads */
+static int active_daemons_limit = 0;        /* MaxRequestWorkers / ThreadsPerChild */
+static int active_daemons = 0;              /* workers that still active, i.e. are
+                                               not shutting down gracefully */
+static int max_workers = 0;                 /* MaxRequestWorkers */
+static int server_limit = 0;                /* ServerLimit */
+static int thread_limit = 0;                /* ThreadLimit */
  static int had_healthy_child = 0;
  static int dying = 0;
  static int workers_may_exit = 0;
  static int start_thread_may_exit = 0;
  static int listener_may_exit = 0;
+static int listener_is_wakeable = 0;        /* Pollset supports APR_POLLSET_WAKEABLE */
  static int num_listensocks = 0;
  static apr_int32_t conns_this_child;        /* MaxConnectionsPerChild, only access
                                                 in listener thread */
@@ -181,10 +185,11 @@ static apr_uint32_t connection_count = 0;   /* Number of open connections */
  static apr_uint32_t lingering_count = 0;    /* Number of connections in lingering close */
  static apr_uint32_t suspended_count = 0;    /* Number of suspended connections */
  static apr_uint32_t clogged_count = 0;      /* Number of threads processing ssl conns */
+static apr_uint32_t threads_shutdown = 0;   /* Number of threads that have shutdown
+                                               early during graceful termination */
  static int resource_shortage = 0;
  static fd_queue_t *worker_queue;
  static fd_queue_info_t *worker_queue_info;
-static int mpm_state = AP_MPMQ_STARTING;
  
  static apr_thread_mutex_t *timeout_mutex;
  
@@ -194,6 +199,25 @@ module AP_MODULE_DECLARE_DATA mpm_event_module;
  struct event_srv_cfg_s;
  typedef struct event_srv_cfg_s event_srv_cfg;
  
+static apr_pollfd_t *listener_pollfd;
+
+/*
+ * The pollset for sockets that are in any of the timeout queues. Currently
+ * we use the timeout_mutex to make sure that connections are added/removed
+ * atomically to/from both event_pollset and a timeout queue. Otherwise
+ * some confusion can happen under high load if timeout queues and pollset
+ * get out of sync.
+ * XXX: It should be possible to make the lock unnecessary in many or even all
+ * XXX: cases.
+ */
+static apr_pollset_t *event_pollset;
+
+/*
+ * The chain of connections to be shutdown by a worker thread (deferred),
+ * linked list updated atomically.
+ */
+static event_conn_state_t *volatile defer_linger_chain;
+
  struct event_conn_state_t {
      /** APR_RING of expiration timeouts */
      APR_RING_ENTRY(event_conn_state_t) timeout_list;
@@ -218,14 +242,18 @@ struct event_conn_state_t {
      apr_pollfd_t pfd;
      /** public parts of the connection state */
      conn_state_t pub;
+    /** chaining in defer_linger_chain */
+    struct event_conn_state_t *chain;
  };
+
  APR_RING_HEAD(timeout_head_t, event_conn_state_t);
  
  struct timeout_queue {
      struct timeout_head_t head;
-    int count, *total;
      apr_interval_time_t timeout;
-    struct timeout_queue *next;
+    apr_uint32_t count;         /* for this queue */
+    apr_uint32_t *total;        /* for all chained/related queues */
+    struct timeout_queue *next; /* chaining */
  };
  /*
   * Several timeout queues that use different timeouts, so that we always can
@@ -239,58 +267,71 @@ static struct timeout_queue *write_completion_q,
                              *keepalive_q,
                              *linger_q,
                              *short_linger_q;
+static volatile apr_time_t  queues_next_expiry;
  
-static apr_pollfd_t *listener_pollfd;
+/* Prevent extra poll/wakeup calls for timeouts close in the future (queues
+ * have the granularity of a second anyway).
+ * XXX: Wouldn't 0.5s (instead of 0.1s) be "enough"?
+ */
+#define TIMEOUT_FUDGE_FACTOR apr_time_from_msec(100)
  
  /*
   * Macros for accessing struct timeout_queue.
   * For TO_QUEUE_APPEND and TO_QUEUE_REMOVE, timeout_mutex must be held.
   */
-#define TO_QUEUE_APPEND(q, el)                                                \
-    do {                                                                      \
-        APR_RING_INSERT_TAIL(&(q)->head, el, event_conn_state_t,              \
-                             timeout_list);                                   \
-        ++*(q)->total;                                                        \
-        ++(q)->count;                                                         \
-    } while (0)
-
-#define TO_QUEUE_REMOVE(q, el)                                                \
-    do {                                                                      \
-        APR_RING_REMOVE(el, timeout_list);                                    \
-        --*(q)->total;                                                        \
-        --(q)->count;                                                         \
-    } while (0)
-
-#define TO_QUEUE_INIT(q, p, t, v)                                             \
-    do {                                                                      \
-        struct timeout_queue *b = (v);                                        \
-        (q) = apr_palloc((p), sizeof *(q));                                   \
-        APR_RING_INIT(&(q)->head, event_conn_state_t, timeout_list);          \
-        (q)->total = (b) ? (b)->total : apr_pcalloc((p), sizeof *(q)->total); \
-        (q)->count = 0;                                                       \
-        (q)->timeout = (t);                                                   \
-        (q)->next = NULL;                                                     \
-    } while (0)
-
-#define TO_QUEUE_ELEM_INIT(el) APR_RING_ELEM_INIT(el, timeout_list)
+static void TO_QUEUE_APPEND(struct timeout_queue *q, event_conn_state_t *el)
+{
+    apr_time_t q_expiry;
+    apr_time_t next_expiry;
  
-/*
- * The pollset for sockets that are in any of the timeout queues. Currently
- * we use the timeout_mutex to make sure that connections are added/removed
- * atomically to/from both event_pollset and a timeout queue. Otherwise
- * some confusion can happen under high load if timeout queues and pollset
- * get out of sync.
- * XXX: It should be possible to make the lock unnecessary in many or even all
- * XXX: cases.
- */
-static apr_pollset_t *event_pollset;
+    APR_RING_INSERT_TAIL(&q->head, el, event_conn_state_t, timeout_list);
+    apr_atomic_inc32(q->total);
+    ++q->count;
+
+    /* Cheaply update the overall queues' next expiry according to the
+     * first entry of this queue (oldest), if necessary.
+     */
+    el = APR_RING_FIRST(&q->head);
+    q_expiry = el->queue_timestamp + q->timeout;
+    next_expiry = queues_next_expiry;
+    if (!next_expiry || next_expiry > q_expiry + TIMEOUT_FUDGE_FACTOR) {
+        queues_next_expiry = q_expiry;
+        /* Unblock the poll()ing listener for it to update its timeout. */
+        if (listener_is_wakeable) {
+            apr_pollset_wakeup(event_pollset);
+        }
+    }
+}
+
+static void TO_QUEUE_REMOVE(struct timeout_queue *q, event_conn_state_t *el)
+{
+    APR_RING_REMOVE(el, timeout_list);
+    APR_RING_ELEM_INIT(el, timeout_list);
+    apr_atomic_dec32(q->total);
+    --q->count;
+}
+
+static struct timeout_queue *TO_QUEUE_MAKE(apr_pool_t *p, apr_time_t t,
+                                           struct timeout_queue *ref)
+{
+    struct timeout_queue *q;
+                                           
+    q = apr_pcalloc(p, sizeof *q);
+    APR_RING_INIT(&q->head, event_conn_state_t, timeout_list);
+    q->total = (ref) ? ref->total : apr_pcalloc(p, sizeof *q->total);
+    q->timeout = t;
+
+    return q;
+}
+
+#define TO_QUEUE_ELEM_INIT(el) \
+    APR_RING_ELEM_INIT((el), timeout_list)
  
  /* The structure used to pass unique initialization info to each thread */
  typedef struct
  {
-    int pid;
-    int tid;
-    int sd;
+    int pslot;  /* process slot */
+    int tslot;  /* worker slot of the thread */
  } proc_info;
  
  /* Structure used to pass information to the thread responsible for
@@ -321,12 +362,11 @@ typedef struct
   * subsequent calls to pre-config hook
   */
  typedef struct event_retained_data {
+    ap_unixd_mpm_retained_data *mpm;
+
      int first_server_limit;
      int first_thread_limit;
-    int module_loads;
      int sick_child_detected;
-    ap_generation_t my_generation;
-    int volatile is_graceful; /* set from signal handler */
      int maxclients_reported;
      /*
       * The max child slot ever assigned, preserved across restarts.  Necessary
@@ -335,6 +375,14 @@ typedef struct event_retained_data {
       * scoreboard.
       */
      int max_daemons_limit;
+
+    /*
+     * All running workers, active and shutting down, including those that
+     * may be left from before a graceful restart.
+     * Not kept up-to-date when shutdown is pending.
+     */
+    int total_daemons;
+
      /*
       * idle_spawn_rate is the number of children that will be spawned on the
       * next maintenance cycle if there aren't enough idle servers.  It is
@@ -346,12 +394,6 @@ typedef struct event_retained_data {
  #define MAX_SPAWN_RATE        (32)
  #endif
      int hold_off_on_exponential_spawning;
-    /*
-     * Current number of listeners buckets and maximum reached accross
-     * restarts (to size retained data according to dynamic num_buckets,
-     * eg. idle_spawn_rate).
-     */
-    int num_buckets, max_buckets;
  } event_retained_data;
  static event_retained_data *retained;
   
@@ -421,6 +463,9 @@ static void disable_listensocks(int process_slot)
  static void enable_listensocks(int process_slot)
  {
      int i;
+    if (listener_may_exit) {
+        return;
+    }
      ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00457)
                   "Accepting new connections again: "
                   "%u active conns (%u lingering/%u clogged/%u suspended), "
@@ -439,14 +484,55 @@ static void enable_listensocks(int process_slot)
      ap_scoreboard_image->parent[process_slot].not_accepting = 0;
  }
  
+static void abort_socket_nonblocking(apr_socket_t *csd)
+{
+    apr_status_t rv;
+    apr_socket_timeout_set(csd, 0);
+#if defined(SOL_SOCKET) && defined(SO_LINGER)
+    /* This socket is over now, and we don't want to block nor linger
+     * anymore, so reset it. A normal close could still linger in the
+     * system, while RST is fast, nonblocking, and what the peer will
+     * get if it sends us further data anyway.
+     */
+    {
+        apr_os_sock_t osd = -1;
+        struct linger opt;
+        opt.l_onoff = 1;
+        opt.l_linger = 0; /* zero timeout is RST */
+        apr_os_sock_get(&osd, csd);
+        setsockopt(osd, SOL_SOCKET, SO_LINGER, (void *)&opt, sizeof opt);
+    }
+#endif
+    rv = apr_socket_close(csd);
+    if (rv != APR_SUCCESS) {
+        ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(00468)
+                     "error closing socket");
+        AP_DEBUG_ASSERT(0);
+    }
+}
+
  static void close_worker_sockets(void)
  {
      int i;
      for (i = 0; i < threads_per_child; i++) {
-        if (worker_sockets[i]) {
-            apr_socket_close(worker_sockets[i]);
+        apr_socket_t *csd = worker_sockets[i];
+        if (csd) {
              worker_sockets[i] = NULL;
+            abort_socket_nonblocking(csd);
+        }
+    }
+    for (;;) {
+        event_conn_state_t *cs = defer_linger_chain;
+        if (!cs) {
+            break;
          }
+        if (apr_atomic_casptr((void *)&defer_linger_chain, cs->chain,
+                              cs) != cs) {
+            /* Race lost, try again */
+            continue;
+        }
+        cs->chain = NULL;
+        abort_socket_nonblocking(cs->pfd.desc.s);
      }
  }
  
@@ -462,6 +548,11 @@ static void wakeup_listener(void)
          return;
      }
  
+    /* Unblock the listener if it's poll()ing */
+    if (listener_is_wakeable) {
+        apr_pollset_wakeup(event_pollset);
+    }
+
      /* unblock the listener if it's waiting for a worker */
      ap_queue_info_term(worker_queue_info);
  
@@ -489,7 +580,7 @@ static void signal_threads(int mode)
          return;
      }
      terminate_mode = mode;
-    mpm_state = AP_MPMQ_STOPPING;
+    retained->mpm->mpm_state = AP_MPMQ_STOPPING;
  
      /* in case we weren't called from the listener thread, wake up the
       * listener thread
@@ -548,13 +639,13 @@ static int event_query(int query_code, int *result, apr_status_t *rv)
          *result = ap_max_requests_per_child;
          break;
      case AP_MPMQ_MAX_DAEMONS:
-        *result = ap_daemons_limit;
+        *result = active_daemons_limit;
          break;
      case AP_MPMQ_MPM_STATE:
-        *result = mpm_state;
+        *result = retained->mpm->mpm_state;
          break;
      case AP_MPMQ_GENERATION:
-        *result = retained->my_generation;
+        *result = retained->mpm->my_generation;
          break;
      default:
          *rv = APR_ENOTIMPL;
@@ -582,28 +673,7 @@ static void event_note_child_started(int slot, pid_t pid)
      ap_scoreboard_image->parent[slot].pid = pid;
      ap_run_child_status(ap_server_conf,
                          ap_scoreboard_image->parent[slot].pid,
-                        retained->my_generation, slot, MPM_CHILD_STARTED);
-}
-
-static void event_note_child_lost_slot(int slot, pid_t newpid)
-{
-    ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00458)
-                 "pid %" APR_PID_T_FMT " taking over scoreboard slot from "
-                 "%" APR_PID_T_FMT "%s",
-                 newpid,
-                 ap_scoreboard_image->parent[slot].pid,
-                 ap_scoreboard_image->parent[slot].quiescing ?
-                 " (quiescing)" : "");
-    ap_run_child_status(ap_server_conf,
-                        ap_scoreboard_image->parent[slot].pid,
-                        ap_scoreboard_image->parent[slot].generation,
-                        slot, MPM_CHILD_LOST_SLOT);
-    /* Don't forget about this exiting child process, or we
-     * won't be able to kill it if it doesn't exit by the
-     * time the server is shut down.
-     */
-    ap_register_extra_mpm_process(ap_scoreboard_image->parent[slot].pid,
-                                  ap_scoreboard_image->parent[slot].generation);
+                        retained->mpm->my_generation, slot, MPM_CHILD_STARTED);
  }
  
  static const char *event_get_name(void)
@@ -615,7 +685,7 @@ static const char *event_get_name(void)
  static void clean_child_exit(int code) __attribute__ ((noreturn));
  static void clean_child_exit(int code)
  {
-    mpm_state = AP_MPMQ_STOPPING;
+    retained->mpm->mpm_state = AP_MPMQ_STOPPING;
      if (pchild) {
          apr_pool_destroy(pchild);
      }
@@ -638,10 +708,6 @@ static void just_die(int sig)
  
  static int child_fatal;
  
-/* volatile because they're updated from a signal handler */
-static int volatile shutdown_pending;
-static int volatile restart_pending;
-
  static apr_status_t decrement_connection_count(void *cs_)
  {
      event_conn_state_t *cs = cs_;
@@ -656,151 +722,12 @@ static apr_status_t decrement_connection_count(void *cs_)
          default:
              break;
      }
-    apr_atomic_dec32(&connection_count);
-    return APR_SUCCESS;
-}
-
-/*
- * ap_start_shutdown() and ap_start_restart(), below, are a first stab at
- * functions to initiate shutdown or restart without relying on signals.
- * Previously this was initiated in sig_term() and restart() signal handlers,
- * but we want to be able to start a shutdown/restart from other sources --
- * e.g. on Win32, from the service manager. Now the service manager can
- * call ap_start_shutdown() or ap_start_restart() as appropriate.  Note that
- * these functions can also be called by the child processes, since global
- * variables are no longer used to pass on the required action to the parent.
- *
- * These should only be called from the parent process itself, since the
- * parent process will use the shutdown_pending and restart_pending variables
- * to determine whether to shutdown or restart. The child process should
- * call signal_parent() directly to tell the parent to die -- this will
- * cause neither of those variable to be set, which the parent will
- * assume means something serious is wrong (which it will be, for the
- * child to force an exit) and so do an exit anyway.
- */
-
-static void ap_start_shutdown(int graceful)
-{
-    mpm_state = AP_MPMQ_STOPPING;
-    if (shutdown_pending == 1) {
-        /* Um, is this _probably_ not an error, if the user has
-         * tried to do a shutdown twice quickly, so we won't
-         * worry about reporting it.
-         */
-        return;
-    }
-    shutdown_pending = 1;
-    retained->is_graceful = graceful;
-}
-
-/* do a graceful restart if graceful == 1 */
-static void ap_start_restart(int graceful)
-{
-    mpm_state = AP_MPMQ_STOPPING;
-    if (restart_pending == 1) {
-        /* Probably not an error - don't bother reporting it */
-        return;
-    }
-    restart_pending = 1;
-    retained->is_graceful = graceful;
-}
-
-static void sig_term(int sig)
-{
-    ap_start_shutdown(sig == AP_SIG_GRACEFUL_STOP);
-}
-
-static void restart(int sig)
-{
-    ap_start_restart(sig == AP_SIG_GRACEFUL);
-}
-
-static void set_signals(void)
-{
-#ifndef NO_USE_SIGACTION
-    struct sigaction sa;
-#endif
-
-    if (!one_process) {
-        ap_fatal_signal_setup(ap_server_conf, pconf);
+    /* Unblock the listener if it's waiting for connection_count = 0 */
+    if (!apr_atomic_dec32(&connection_count)
+             && listener_is_wakeable && listener_may_exit) {
+        apr_pollset_wakeup(event_pollset);
      }
-
-#ifndef NO_USE_SIGACTION
-    sigemptyset(&sa.sa_mask);
-    sa.sa_flags = 0;
-
-    sa.sa_handler = sig_term;
-    if (sigaction(SIGTERM, &sa, NULL) < 0)
-        ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf, APLOGNO(00459)
-                     "sigaction(SIGTERM)");
-#ifdef AP_SIG_GRACEFUL_STOP
-    if (sigaction(AP_SIG_GRACEFUL_STOP, &sa, NULL) < 0)
-        ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf, APLOGNO(00460)
-                     "sigaction(" AP_SIG_GRACEFUL_STOP_STRING ")");
-#endif
-#ifdef SIGINT
-    if (sigaction(SIGINT, &sa, NULL) < 0)
-        ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf, APLOGNO(00461)
-                     "sigaction(SIGINT)");
-#endif
-#ifdef SIGXCPU
-    sa.sa_handler = SIG_DFL;
-    if (sigaction(SIGXCPU, &sa, NULL) < 0)
-        ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf, APLOGNO(00462)
-                     "sigaction(SIGXCPU)");
-#endif
-#ifdef SIGXFSZ
-    /* For systems following the LFS standard, ignoring SIGXFSZ allows
-     * a write() beyond the 2GB limit to fail gracefully with E2BIG
-     * rather than terminate the process. */
-    sa.sa_handler = SIG_IGN;
-    if (sigaction(SIGXFSZ, &sa, NULL) < 0)
-        ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf, APLOGNO(00463)
-                     "sigaction(SIGXFSZ)");
-#endif
-#ifdef SIGPIPE
-    sa.sa_handler = SIG_IGN;
-    if (sigaction(SIGPIPE, &sa, NULL) < 0)
-        ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf, APLOGNO(00464)
-                     "sigaction(SIGPIPE)");
-#endif
-
-    /* we want to ignore HUPs and AP_SIG_GRACEFUL while we're busy
-     * processing one */
-    sigaddset(&sa.sa_mask, SIGHUP);
-    sigaddset(&sa.sa_mask, AP_SIG_GRACEFUL);
-    sa.sa_handler = restart;
-    if (sigaction(SIGHUP, &sa, NULL) < 0)
-        ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf, APLOGNO(00465)
-                     "sigaction(SIGHUP)");
-    if (sigaction(AP_SIG_GRACEFUL, &sa, NULL) < 0)
-        ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf, APLOGNO(00466)
-                     "sigaction(" AP_SIG_GRACEFUL_STRING ")");
-#else
-    if (!one_process) {
-#ifdef SIGXCPU
-        apr_signal(SIGXCPU, SIG_DFL);
-#endif /* SIGXCPU */
-#ifdef SIGXFSZ
-        apr_signal(SIGXFSZ, SIG_IGN);
-#endif /* SIGXFSZ */
-    }
-
-    apr_signal(SIGTERM, sig_term);
-#ifdef SIGHUP
-    apr_signal(SIGHUP, restart);
-#endif /* SIGHUP */
-#ifdef AP_SIG_GRACEFUL
-    apr_signal(AP_SIG_GRACEFUL, restart);
-#endif /* AP_SIG_GRACEFUL */
-#ifdef AP_SIG_GRACEFUL_STOP
-     apr_signal(AP_SIG_GRACEFUL_STOP, sig_term);
-#endif /* AP_SIG_GRACEFUL_STOP */
-#ifdef SIGPIPE
-    apr_signal(SIGPIPE, SIG_IGN);
-#endif /* SIGPIPE */
-
-#endif
+    return APR_SUCCESS;
  }
  
  static void notify_suspend(event_conn_state_t *cs)
@@ -817,11 +744,27 @@ static void notify_resume(event_conn_state_t *cs, ap_sb_handle_t *sbh)
      ap_run_resume_connection(cs->c, cs->r);
  }
  
-static int start_lingering_close_common(event_conn_state_t *cs, int in_worker)
+/*
+ * Close our side of the connection, flushing data to the client first.
+ * Pre-condition: cs is not in any timeout queue and not in the pollset,
+ *                timeout_mutex is not locked
+ * return: 0 if connection is fully closed,
+ *         1 if connection is lingering
+ * May only be called by worker thread.
+ */
+static int start_lingering_close_blocking(event_conn_state_t *cs)
  {
      apr_status_t rv;
      struct timeout_queue *q;
      apr_socket_t *csd = cs->pfd.desc.s;
+
+    if (ap_start_lingering_close(cs->c)) {
+        notify_suspend(cs);
+        apr_socket_close(csd);
+        ap_push_pool(worker_queue_info, cs->p);
+        return 0;
+    }
+
  #ifdef AP_DEBUG
      {
          rv = apr_socket_timeout_set(csd, 0);
@@ -830,6 +773,7 @@ static int start_lingering_close_common(event_conn_state_t *cs, int in_worker)
  #else
      apr_socket_timeout_set(csd, 0);
  #endif
+
      cs->queue_timestamp = apr_time_now();
      /*
       * If some module requested a shortened waiting period, only wait for
@@ -845,75 +789,48 @@ static int start_lingering_close_common(event_conn_state_t *cs, int in_worker)
          cs->pub.state = CONN_STATE_LINGER_NORMAL;
      }
      apr_atomic_inc32(&lingering_count);
-    if (in_worker) { 
-        notify_suspend(cs);
-    }
-    else {
-        cs->c->sbh = NULL;
-    }
-    apr_thread_mutex_lock(timeout_mutex);
-    TO_QUEUE_APPEND(q, cs);
+    notify_suspend(cs);
+
      cs->pfd.reqevents = (
              cs->pub.sense == CONN_SENSE_WANT_WRITE ? APR_POLLOUT :
                      APR_POLLIN) | APR_POLLHUP | APR_POLLERR;
      cs->pub.sense = CONN_SENSE_DEFAULT;
+    apr_thread_mutex_lock(timeout_mutex);
+    TO_QUEUE_APPEND(q, cs);
      rv = apr_pollset_add(event_pollset, &cs->pfd);
-    apr_thread_mutex_unlock(timeout_mutex);
      if (rv != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rv)) {
-        ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(03092)
-                     "start_lingering_close: apr_pollset_add failure");
-        apr_thread_mutex_lock(timeout_mutex);
          TO_QUEUE_REMOVE(q, cs);
          apr_thread_mutex_unlock(timeout_mutex);
+        ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(03092)
+                     "start_lingering_close: apr_pollset_add failure");
          apr_socket_close(cs->pfd.desc.s);
          ap_push_pool(worker_queue_info, cs->p);
          return 0;
      }
+    apr_thread_mutex_unlock(timeout_mutex);
      return 1;
  }
  
  /*
- * Close our side of the connection, flushing data to the client first.
+ * Defer flush and close of the connection by adding it to defer_linger_chain,
+ * for a worker to grab it and do the job (should that be blocking).
   * Pre-condition: cs is not in any timeout queue and not in the pollset,
   *                timeout_mutex is not locked
- * return: 0 if connection is fully closed,
- *         1 if connection is lingering
- * May only be called by worker thread.
- */
-static int start_lingering_close_blocking(event_conn_state_t *cs)
-{
-    if (ap_start_lingering_close(cs->c)) {
-        notify_suspend(cs);
-        ap_push_pool(worker_queue_info, cs->p);
-        return 0;
-    }
-    return start_lingering_close_common(cs, 1);
-}
-
-/*
- * Close our side of the connection, NOT flushing data to the client.
- * This should only be called if there has been an error or if we know
- * that our send buffers are empty.
- * Pre-condition: cs is not in any timeout queue and not in the pollset,
- *                timeout_mutex is not locked
- * return: 0 if connection is fully closed,
- *         1 if connection is lingering
- * may be called by listener thread
+ * return: 1 connection is alive (but aside and about to linger)
+ * May be called by listener thread.
   */
  static int start_lingering_close_nonblocking(event_conn_state_t *cs)
  {
-    conn_rec *c = cs->c;
-    apr_socket_t *csd = cs->pfd.desc.s;
-
-    if (ap_prep_lingering_close(c)
-        || c->aborted
-        || ap_shutdown_conn(c, 0) != APR_SUCCESS || c->aborted
-        || apr_socket_shutdown(csd, APR_SHUTDOWN_WRITE) != APR_SUCCESS) {
-        apr_socket_close(csd);
-        ap_push_pool(worker_queue_info, cs->p);
-        return 0;
+    event_conn_state_t *chain;
+    for (;;) {
+        cs->chain = chain = defer_linger_chain;
+        if (apr_atomic_casptr((void *)&defer_linger_chain, cs,
+                              chain) != chain) {
+            /* Race lost, try again */
+            continue;
+        }
+        return 1;
      }
-    return start_lingering_close_common(cs, 0);
  }
  
  /*
@@ -924,16 +841,13 @@ static int start_lingering_close_nonblocking(event_conn_state_t *cs)
   */
  static int stop_lingering_close(event_conn_state_t *cs)
  {
-    apr_status_t rv;
      apr_socket_t *csd = ap_get_conn_socket(cs->c);
      ap_log_error(APLOG_MARK, APLOG_TRACE4, 0, ap_server_conf,
                   "socket reached timeout in lingering-close state");
-    rv = apr_socket_close(csd);
-    if (rv != APR_SUCCESS) {
-        ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(00468) "error closing socket");
-        AP_DEBUG_ASSERT(0);
-    }
+    abort_socket_nonblocking(csd);
      ap_push_pool(worker_queue_info, cs->p);
+    if (dying)
+        ap_queue_interrupt_one(worker_queue);
      return 0;
  }
  
@@ -1079,9 +993,16 @@ static void process_socket(apr_thread_t *thd, apr_pool_t * p, apr_socket_t * soc
          c->current_thread = thd;
          /* Subsequent request on a conn, and thread number is part of ID */
          c->id = conn_id;
+
+        if (c->aborted) {
+            cs->pub.state = CONN_STATE_LINGER;
+        }
      }
  
-    if (c->clogging_input_filters && !c->aborted) {
+    if (cs->pub.state == CONN_STATE_LINGER) {
+        /* do lingering close below */
+    }
+    else if (c->clogging_input_filters) {
          /* Since we have an input filter which 'clogs' the input stream,
           * like mod_ssl used to, lets just do the normal read from input
           * filters, like the Worker MPM does. Filters that need to write
@@ -1095,20 +1016,14 @@ static void process_socket(apr_thread_t *thd, apr_pool_t * p, apr_socket_t * soc
          }
          apr_atomic_dec32(&clogged_count);
      }
-
+    else if (cs->pub.state == CONN_STATE_READ_REQUEST_LINE) {
  read_request:
-    if (cs->pub.state == CONN_STATE_READ_REQUEST_LINE) {
-        if (!c->aborted) {
-            ap_run_process_connection(c);
+        ap_run_process_connection(c);
  
-            /* state will be updated upon return
-             * fall thru to either wait for readability/timeout or
-             * do lingering close
-             */
-        }
-        else {
-            cs->pub.state = CONN_STATE_LINGER;
-        }
+        /* state will be updated upon return
+         * fall thru to either wait for readability/timeout or
+         * do lingering close
+         */
      }
  
      if (cs->pub.state == CONN_STATE_WRITE_COMPLETION) {
@@ -1131,18 +1046,29 @@ read_request:
               */
              cs->queue_timestamp = apr_time_now();
              notify_suspend(cs);
-            apr_thread_mutex_lock(timeout_mutex);
-            TO_QUEUE_APPEND(cs->sc->wc_q, cs);
              cs->pfd.reqevents = (
                      cs->pub.sense == CONN_SENSE_WANT_READ ? APR_POLLIN :
                              APR_POLLOUT) | APR_POLLHUP | APR_POLLERR;
              cs->pub.sense = CONN_SENSE_DEFAULT;
+            apr_thread_mutex_lock(timeout_mutex);
+            TO_QUEUE_APPEND(cs->sc->wc_q, cs);
              rc = apr_pollset_add(event_pollset, &cs->pfd);
-            apr_thread_mutex_unlock(timeout_mutex);
+            if (rc != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rc)) {
+                TO_QUEUE_REMOVE(cs->sc->wc_q, cs);
+                apr_thread_mutex_unlock(timeout_mutex);
+                ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf, APLOGNO(03465)
+                             "process_socket: apr_pollset_add failure for "
+                             "write completion");
+                apr_socket_close(cs->pfd.desc.s);
+                ap_push_pool(worker_queue_info, cs->p);
+            }
+            else {
+                apr_thread_mutex_unlock(timeout_mutex);
+            }
              return;
          }
          else if (c->keepalive != AP_CONN_KEEPALIVE || c->aborted ||
-            listener_may_exit) {
+                 listener_may_exit) {
              cs->pub.state = CONN_STATE_LINGER;
          }
          else if (c->data_in_input_filters) {
@@ -1168,19 +1094,23 @@ read_request:
           */
          cs->queue_timestamp = apr_time_now();
          notify_suspend(cs);
-        apr_thread_mutex_lock(timeout_mutex);
-        TO_QUEUE_APPEND(cs->sc->ka_q, cs);
  
          /* Add work to pollset. */
          cs->pfd.reqevents = APR_POLLIN;
+        apr_thread_mutex_lock(timeout_mutex);
+        TO_QUEUE_APPEND(cs->sc->ka_q, cs);
          rc = apr_pollset_add(event_pollset, &cs->pfd);
-        apr_thread_mutex_unlock(timeout_mutex);
-
-        if (rc != APR_SUCCESS) {
+        if (rc != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rc)) {
+            TO_QUEUE_REMOVE(cs->sc->ka_q, cs);
+            apr_thread_mutex_unlock(timeout_mutex);
              ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf, APLOGNO(03093)
-                         "process_socket: apr_pollset_add failure");
-            AP_DEBUG_ASSERT(rc == APR_SUCCESS);
+                         "process_socket: apr_pollset_add failure for "
+                         "keep alive");
+            apr_socket_close(cs->pfd.desc.s);
+            ap_push_pool(worker_queue_info, cs->p);
+            return;
          }
+        apr_thread_mutex_unlock(timeout_mutex);
      }
      else if (cs->pub.state == CONN_STATE_SUSPENDED) {
          apr_atomic_inc32(&suspended_count);
@@ -1219,6 +1149,9 @@ static void close_listeners(int process_slot, int *closed)
          }
          /* wake up the main thread */
          kill(ap_my_pid, SIGTERM);
+
+        ap_free_idle_pools(worker_queue_info);
+        ap_queue_interrupt_all(worker_queue);
      }
  }
  
@@ -1279,26 +1212,32 @@ static apr_status_t push_timer2worker(timer_event_t* te)
  }
  
  /*
- * Pre-condition: pfd->cs is neither in pollset nor timeout queue
+ * Pre-condition: cs is neither in event_pollset nor a timeout queue
   * this function may only be called by the listener
   */
-static apr_status_t push2worker(const apr_pollfd_t * pfd,
-                                apr_pollset_t * pollset)
+static apr_status_t push2worker(event_conn_state_t *cs, apr_socket_t *csd,
+                                apr_pool_t *ptrans)
  {
-    listener_poll_type *pt = (listener_poll_type *) pfd->client_data;
-    event_conn_state_t *cs = (event_conn_state_t *) pt->baton;
      apr_status_t rc;
  
-    rc = ap_queue_push(worker_queue, cs->pfd.desc.s, cs, cs->p);
+    if (cs) {
+        csd = cs->pfd.desc.s;
+        ptrans = cs->p;
+    }
+    rc = ap_queue_push(worker_queue, csd, cs, ptrans);
      if (rc != APR_SUCCESS) {
+        ap_log_error(APLOG_MARK, APLOG_CRIT, rc, ap_server_conf, APLOGNO(00471)
+                     "push2worker: ap_queue_push failed");
          /* trash the connection; we couldn't queue the connected
           * socket to a worker
           */
-        apr_bucket_alloc_destroy(cs->bucket_alloc);
-        apr_socket_close(cs->pfd.desc.s);
-        ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
-                     ap_server_conf, APLOGNO(00471) "push2worker: ap_queue_push failed");
-        ap_push_pool(worker_queue_info, cs->p);
+        if (csd) {
+            abort_socket_nonblocking(csd);
+        }
+        if (ptrans) {
+            ap_push_pool(worker_queue_info, ptrans);
+        }
+        signal_threads(ST_GRACEFUL);
      }
  
      return rc;
@@ -1347,6 +1286,13 @@ static void get_worker(int *have_idle_worker_p, int blocking, int *all_busy)
  static APR_RING_HEAD(timer_free_ring_t, timer_event_t) timer_free_ring;
  
  static apr_skiplist *timer_skiplist;
+static volatile apr_time_t timers_next_expiry;
+
+/* Same goal as for TIMEOUT_FUDGE_FACTOR (avoid extra poll calls), but applied
+ * to timers. Since their timeouts are custom (user defined), we can't be too
+ * approximative here (hence using 0.01s).
+ */
+#define EVENT_FUDGE_FACTOR apr_time_from_msec(10)
  
  /* The following compare function is used by apr_skiplist_insert() to keep the
   * elements (timers) sorted and provide O(log n) complexity (this is also true
@@ -1393,8 +1339,24 @@ static apr_status_t event_register_timed_callback(apr_time_t t,
      /* XXXXX: optimize */
      te->when = t + apr_time_now();
  
-    /* Okay, add sorted by when.. */
-    apr_skiplist_insert(timer_skiplist, te);
+    { 
+        apr_time_t next_expiry;
+
+        /* Okay, add sorted by when.. */
+        apr_skiplist_insert(timer_skiplist, te);
+
+        /* Cheaply update the overall timers' next expiry according to
+         * this event, if necessary.
+         */
+        next_expiry = timers_next_expiry;
+        if (!next_expiry || next_expiry > te->when + EVENT_FUDGE_FACTOR) {
+            timers_next_expiry = te->when;
+            /* Unblock the poll()ing listener for it to update its timeout. */
+            if (listener_is_wakeable) {
+                apr_pollset_wakeup(event_pollset);
+            }
+        }
+    }
  
      apr_thread_mutex_unlock(g_timer_skiplist_mtx);
  
@@ -1428,17 +1390,17 @@ static void process_lingering_close(event_conn_state_t *cs, const apr_pollfd_t *
      }
  
      apr_thread_mutex_lock(timeout_mutex);
+    TO_QUEUE_REMOVE(q, cs);
      rv = apr_pollset_remove(event_pollset, pfd);
-    AP_DEBUG_ASSERT(rv == APR_SUCCESS);
+    apr_thread_mutex_unlock(timeout_mutex);
+    AP_DEBUG_ASSERT(rv == APR_SUCCESS ||  APR_STATUS_IS_NOTFOUND(rv));
  
      rv = apr_socket_close(csd);
      AP_DEBUG_ASSERT(rv == APR_SUCCESS);
  
-    TO_QUEUE_REMOVE(q, cs);
-    apr_thread_mutex_unlock(timeout_mutex);
-    TO_QUEUE_ELEM_INIT(cs);
-
      ap_push_pool(worker_queue_info, cs->p);
+    if (dying)
+        ap_queue_interrupt_one(worker_queue);
  }
  
  /* call 'func' for all elements of 'q' with timeout less than 'timeout_time'.
@@ -1449,13 +1411,13 @@ static void process_timeout_queue(struct timeout_queue *q,
                                    apr_time_t timeout_time,
                                    int (*func)(event_conn_state_t *))
  {
-    int total = 0, count;
+    apr_uint32_t total = 0, count;
      event_conn_state_t *first, *cs, *last;
      struct timeout_head_t trash;
      struct timeout_queue *qp;
      apr_status_t rv;
  
-    if (!*q->total) {
+    if (!apr_atomic_read32(q->total)) {
          return;
      }
  
@@ -1464,20 +1426,32 @@ static void process_timeout_queue(struct timeout_queue *q,
          count = 0;
          cs = first = last = APR_RING_FIRST(&qp->head);
          while (cs != APR_RING_SENTINEL(&qp->head, event_conn_state_t,
-                                       timeout_list)
-               /* Trash the entry if:
-                * - no timeout_time was given (asked for all), or
-                * - it expired (according to the queue timeout), or
-                * - the system clock skewed in the past: no entry should be
-                *   registered above the given timeout_time (~now) + the queue
-                *   timeout, we won't keep any here (eg. for centuries).
-                * Stop otherwise, no following entry will match thanks to the
-                * single timeout per queue (entries are added to the end!).
-                * This allows maintenance in O(1).
-                */
-               && (!timeout_time
-                   || cs->queue_timestamp + qp->timeout < timeout_time
-                   || cs->queue_timestamp > timeout_time + qp->timeout)) {
+                                       timeout_list)) {
+            /* Trash the entry if:
+             * - no timeout_time was given (asked for all), or
+             * - it expired (according to the queue timeout), or
+             * - the system clock skewed in the past: no entry should be
+             *   registered above the given timeout_time (~now) + the queue
+             *   timeout, we won't keep any here (eg. for centuries).
+             *
+             * Otherwise stop, no following entry will match thanks to the
+             * single timeout per queue (entries are added to the end!).
+             * This allows maintenance in O(1).
+             */
+            if (timeout_time
+                    && cs->queue_timestamp + qp->timeout > timeout_time
+                    && cs->queue_timestamp < timeout_time + qp->timeout) {
+                /* Since this is the next expiring of this queue, update the
+                 * overall queues' next expiry if it's later than this one.
+                 */
+                apr_time_t q_expiry = cs->queue_timestamp + qp->timeout;
+                apr_time_t next_expiry = queues_next_expiry;
+                if (!next_expiry || next_expiry > q_expiry) {
+                    queues_next_expiry = q_expiry;
+                }
+                break;
+            }
+
              last = cs;
              rv = apr_pollset_remove(event_pollset, &cs->pfd);
              if (rv != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rv)) {
@@ -1493,14 +1467,14 @@ static void process_timeout_queue(struct timeout_queue *q,
          APR_RING_UNSPLICE(first, last, timeout_list);
          APR_RING_SPLICE_TAIL(&trash, first, last, event_conn_state_t,
                               timeout_list);
+        AP_DEBUG_ASSERT(apr_atomic_read32(q->total) >= count);
+        apr_atomic_sub32(q->total, count);
          qp->count -= count;
          total += count;
      }
      if (!total)
          return;
  
-    AP_DEBUG_ASSERT(*q->total >= total);
-    *q->total -= total;
      apr_thread_mutex_unlock(timeout_mutex);
      first = APR_RING_FIRST(&trash);
      do {
@@ -1512,13 +1486,28 @@ static void process_timeout_queue(struct timeout_queue *q,
      apr_thread_mutex_lock(timeout_mutex);
  }
  
+static void process_keepalive_queue(apr_time_t timeout_time)
+{
+    /* If all workers are busy, we kill older keep-alive connections so
+     * that they may connect to another process.
+     */
+    if (!timeout_time) {
+        ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
+                     "All workers are busy or dying, will close %u "
+                     "keep-alive connections",
+                     apr_atomic_read32(keepalive_q->total));
+    }
+    process_timeout_queue(keepalive_q, timeout_time,
+                          start_lingering_close_nonblocking);
+}
+
  static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
  {
-    timer_event_t *ep;
      timer_event_t *te;
      apr_status_t rc;
      proc_info *ti = dummy;
-    int process_slot = ti->pid;
+    int process_slot = ti->pslot;
+    struct process_score *ps = ap_get_scoreboard_process(process_slot);
      apr_pool_t *tpool = apr_thread_pool_get(thd);
      void *csd = NULL;
      apr_pool_t *ptrans;         /* Pool for per-transaction stuff */
@@ -1534,14 +1523,6 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
      last_log = apr_time_now();
      free(ti);
  
-    /* the following times out events that are really close in the future
-     *   to prevent extra poll calls
-     *
-     * current value is .1 second
-     */
-#define TIMEOUT_FUDGE_FACTOR 100000
-#define EVENT_FUDGE_FACTOR 10000
-
      rc = init_pollset(tpool);
      if (rc != APR_SUCCESS) {
          ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
@@ -1559,6 +1540,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
  
      for (;;) {
          int workers_were_busy = 0;
+
          if (listener_may_exit) {
              close_listeners(process_slot, &closed);
              if (terminate_mode == ST_UNGRACEFUL
@@ -1572,7 +1554,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
          now = apr_time_now();
          if (APLOGtrace6(ap_server_conf)) {
              /* trace log status every second */
-            if (now - last_log > apr_time_from_msec(1000)) {
+            if (now - last_log > apr_time_from_sec(1)) {
                  last_log = now;
                  apr_thread_mutex_lock(timeout_mutex);
                  ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf,
@@ -1580,40 +1562,85 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
                               "keep-alive: %d lingering: %d suspended: %u)",
                               apr_atomic_read32(&connection_count),
                               apr_atomic_read32(&clogged_count),
-                             *write_completion_q->total,
-                             *keepalive_q->total,
+                             apr_atomic_read32(write_completion_q->total),
+                             apr_atomic_read32(keepalive_q->total),
                               apr_atomic_read32(&lingering_count),
                               apr_atomic_read32(&suspended_count));
+                if (dying) {
+                    ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf,
+                                 "%u/%u workers shutdown",
+                                 apr_atomic_read32(&threads_shutdown),
+                                 threads_per_child);
+                }
                  apr_thread_mutex_unlock(timeout_mutex);
              }
          }
  
-        apr_thread_mutex_lock(g_timer_skiplist_mtx);
-        te = apr_skiplist_peek(timer_skiplist);
-        if (te) {
-            if (te->when > now) {
-                timeout_interval = te->when - now;
+        /* Start with an infinite poll() timeout and update it according to
+         * the next expiring timer or queue entry. If there are none, either
+         * the listener is wakeable and it can poll() indefinitely until a wake
+         * up occurs, otherwise periodic checks (maintenance, shutdown, ...)
+         * must be performed.
+         */
+        timeout_interval = -1;
+
+        /* Push expired timers to a worker, the first remaining one determines
+         * the maximum time to poll() below, if any.
+         */
+        timeout_time = timers_next_expiry;
+        if (timeout_time && timeout_time < now + EVENT_FUDGE_FACTOR) {
+            apr_thread_mutex_lock(g_timer_skiplist_mtx);
+            while ((te = apr_skiplist_peek(timer_skiplist))) {
+                if (te->when > now + EVENT_FUDGE_FACTOR) {
+                    timers_next_expiry = te->when;
+                    timeout_interval = te->when - now;
+                    break;
+                }
+                apr_skiplist_pop(timer_skiplist, NULL);
+                push_timer2worker(te);
              }
-            else {
-                timeout_interval = 1;
+            if (!te) {
+                timers_next_expiry = 0;
              }
+            apr_thread_mutex_unlock(g_timer_skiplist_mtx);
          }
-        else {
-            timeout_interval = apr_time_from_msec(100);
+
+        /* Same for queues, use their next expiry, if any. */
+        timeout_time = queues_next_expiry;
+        if (timeout_time
+                && (timeout_interval < 0
+                    || timeout_time <= now
+                    || timeout_interval > timeout_time - now)) {
+            timeout_interval = timeout_time > now ? timeout_time - now : 1;
+        }
+
+        /* When non-wakeable, don't wait more than 100 ms, in any case. */
+#define NON_WAKEABLE_POLL_TIMEOUT apr_time_from_msec(100)
+        if (!listener_is_wakeable
+                && (timeout_interval < 0
+                    || timeout_interval > NON_WAKEABLE_POLL_TIMEOUT)) {
+            timeout_interval = NON_WAKEABLE_POLL_TIMEOUT;
          }
-        apr_thread_mutex_unlock(g_timer_skiplist_mtx);
  
          rc = apr_pollset_poll(event_pollset, timeout_interval, &num, &out_pfd);
          if (rc != APR_SUCCESS) {
              if (APR_STATUS_IS_EINTR(rc)) {
-                continue;
+                /* Woken up, if we are exiting we must fall through to kill
+                 * kept-alive connections, otherwise we only need to update
+                 * timeouts (logic is above, so restart the loop).
+                 */
+                if (!listener_may_exit) {
+                    continue;
+                }
+                timeout_time = 0;
              }
-            if (!APR_STATUS_IS_TIMEUP(rc)) {
+            else if (!APR_STATUS_IS_TIMEUP(rc)) {
                  ap_log_error(APLOG_MARK, APLOG_CRIT, rc, ap_server_conf,
                               "apr_pollset_poll failed.  Attempting to "
                               "shutdown process gracefully");
                  signal_threads(ST_GRACEFUL);
              }
+            num = 0;
          }
  
          if (listener_may_exit) {
@@ -1623,21 +1650,6 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
                  break;
          }
  
-        now = apr_time_now();
-        apr_thread_mutex_lock(g_timer_skiplist_mtx);
-        ep = apr_skiplist_peek(timer_skiplist);
-        while (ep) {
-            if (ep->when < now + EVENT_FUDGE_FACTOR) {
-                apr_skiplist_pop(timer_skiplist, NULL);
-                push_timer2worker(ep);
-            }
-            else {
-                break;
-            }
-            ep = apr_skiplist_peek(timer_skiplist);
-        }
-        apr_thread_mutex_unlock(g_timer_skiplist_mtx);
-
          while (num) {
              pt = (listener_poll_type *) out_pfd->client_data;
              if (pt->type == PT_CSD) {
@@ -1674,29 +1686,23 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
                          break;
                      }
  
-                    TO_QUEUE_ELEM_INIT(cs);
                      /* If we didn't get a worker immediately for a keep-alive
                       * request, we close the connection, so that the client can
                       * re-connect to a different process.
                       */
                      if (!have_idle_worker) {
                          start_lingering_close_nonblocking(cs);
-                        break;
                      }
-                    rc = push2worker(out_pfd, event_pollset);
-                    if (rc != APR_SUCCESS) {
-                        ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
-                                     ap_server_conf, APLOGNO(03095)
-                                     "push2worker failed");
-                    }
-                    else {
+                    else if (push2worker(cs, NULL, NULL) == APR_SUCCESS) {
                          have_idle_worker = 0;
                      }
                      break;
+
                  case CONN_STATE_LINGER_NORMAL:
                  case CONN_STATE_LINGER_SHORT:
                      process_lingering_close(cs, out_pfd);
                      break;
+
                  default:
                      ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
                                   ap_server_conf, APLOGNO(03096)
@@ -1775,18 +1781,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
  
                      if (csd != NULL) {
                          conns_this_child--;
-                        rc = ap_queue_push(worker_queue, csd, NULL, ptrans);
-                        if (rc != APR_SUCCESS) {
-                            /* trash the connection; we couldn't queue the connected
-                             * socket to a worker
-                             */
-                            apr_socket_close(csd);
-                            ap_log_error(APLOG_MARK, APLOG_CRIT, rc,
-                                         ap_server_conf, APLOGNO(03098)
-                                         "ap_queue_push failed");
-                            ap_push_pool(worker_queue_info, ptrans);
-                        }
-                        else {
+                        if (push2worker(NULL, csd, ptrans) == APR_SUCCESS) {
                              have_idle_worker = 0;
                          }
                      }
@@ -1802,51 +1797,73 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
          /* XXX possible optimization: stash the current time for use as
           * r->request_time for new requests
           */
-        now = apr_time_now();
-        /* We only do this once per 0.1s (TIMEOUT_FUDGE_FACTOR), or on a clock
-         * skew (if the system time is set back in the meantime, timeout_time
-         * will exceed now + TIMEOUT_FUDGE_FACTOR, can't happen otherwise).
+        /* We process the timeout queues here only when their overall next
+         * expiry (read once above) is over. This happens accurately since
+         * adding to the queues (in workers) can only decrease this expiry,
+         * while latest ones are only taken into account here (in listener)
+         * during queues' processing, with the lock held. This works both
+         * with and without wake-ability.
           */
-        if (now > timeout_time || now + TIMEOUT_FUDGE_FACTOR < timeout_time ) {
-            struct process_score *ps;
+        if (timeout_time && timeout_time < (now = apr_time_now())) {
              timeout_time = now + TIMEOUT_FUDGE_FACTOR;
  
              /* handle timed out sockets */
              apr_thread_mutex_lock(timeout_mutex);
  
+            /* Processing all the queues below will recompute this. */
+            queues_next_expiry = 0;
+
              /* Step 1: keepalive timeouts */
-            /* If all workers are busy, we kill older keep-alive connections so that they
-             * may connect to another process.
-             */
-            if (workers_were_busy && *keepalive_q->total) {
-                ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
-                             "All workers are busy, will close %d keep-alive "
-                             "connections",
-                             *keepalive_q->total);
-                process_timeout_queue(keepalive_q, 0,
-                                      start_lingering_close_nonblocking);
+            if (workers_were_busy || dying) {
+                process_keepalive_queue(0); /* kill'em all \m/ */
              }
              else {
-                process_timeout_queue(keepalive_q, timeout_time,
-                                      start_lingering_close_nonblocking);
+                process_keepalive_queue(timeout_time);
              }
              /* Step 2: write completion timeouts */
              process_timeout_queue(write_completion_q, timeout_time,
                                    start_lingering_close_nonblocking);
              /* Step 3: (normal) lingering close completion timeouts */
-            process_timeout_queue(linger_q, timeout_time, stop_lingering_close);
+            process_timeout_queue(linger_q, timeout_time,
+                                  stop_lingering_close);
              /* Step 4: (short) lingering close completion timeouts */
-            process_timeout_queue(short_linger_q, timeout_time, stop_lingering_close);
+            process_timeout_queue(short_linger_q, timeout_time,
+                                  stop_lingering_close);
  
-            ps = ap_get_scoreboard_process(process_slot);
-            ps->write_completion = *write_completion_q->total;
-            ps->keep_alive = *keepalive_q->total;
              apr_thread_mutex_unlock(timeout_mutex);
  
+            ps->keep_alive = apr_atomic_read32(keepalive_q->total);
+            ps->write_completion = apr_atomic_read32(write_completion_q->total);
              ps->connections = apr_atomic_read32(&connection_count);
              ps->suspended = apr_atomic_read32(&suspended_count);
              ps->lingering_close = apr_atomic_read32(&lingering_count);
          }
+        else if ((workers_were_busy || dying)
+                 && apr_atomic_read32(keepalive_q->total)) {
+            apr_thread_mutex_lock(timeout_mutex);
+            process_keepalive_queue(0); /* kill'em all \m/ */
+            apr_thread_mutex_unlock(timeout_mutex);
+            ps->keep_alive = 0;
+        }
+
+        /* If there are some lingering closes to defer (to a worker), schedule
+         * them now. We might wakeup a worker spuriously if another one empties
+         * defer_linger_chain in the meantime, but there also may be no active
+         * or all busy workers for an undefined time.  In any case a deferred
+         * lingering close can't starve if we do that here since the chain is
+         * filled only above in the listener and it's emptied only in the
+         * worker(s); thus a NULL here means it will stay so while the listener
+         * waits (possibly indefinitely) in poll().
+         */
+        if (defer_linger_chain) {
+            get_worker(&have_idle_worker, 0, &workers_were_busy);
+            if (have_idle_worker
+                    && defer_linger_chain /* re-test */
+                    && push2worker(NULL, NULL, NULL) == APR_SUCCESS) {
+                have_idle_worker = 0;
+            }
+        }
+
          if (listeners_disabled && !workers_were_busy
              && (int)apr_atomic_read32(&connection_count)
                 - (int)apr_atomic_read32(&lingering_count)
@@ -1869,6 +1886,34 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
      return NULL;
  }
  
+/*
+ * During graceful shutdown, if there are more running worker threads than
+ * open connections, exit one worker thread.
+ *
+ * return 1 if thread should exit, 0 if it should continue running.
+ */
+static int worker_thread_should_exit_early(void)
+{
+    for (;;) {
+        apr_uint32_t conns = apr_atomic_read32(&connection_count);
+        apr_uint32_t dead = apr_atomic_read32(&threads_shutdown);
+        apr_uint32_t newdead;
+
+        AP_DEBUG_ASSERT(dead <= threads_per_child);
+        if (conns >= threads_per_child - dead)
+            return 0;
+
+        newdead = dead + 1;
+        if (apr_atomic_cas32(&threads_shutdown, newdead, dead) == dead) {
+            /*
+             * No other thread has exited in the mean time, safe to exit
+             * this one.
+             */
+            return 1;
+        }
+    }
+}
+
  /* XXX For ungraceful termination/restart, we definitely don't want to
   *     wait for active connections to finish but we may want to wait
   *     for idle workers to get out of the queue code and release mutexes,
@@ -1879,8 +1924,8 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t * thd, void *dummy)
  static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy)
  {
      proc_info *ti = dummy;
-    int process_slot = ti->pid;
-    int thread_slot = ti->tid;
+    int process_slot = ti->pslot;
+    int thread_slot = ti->tslot;
      apr_socket_t *csd = NULL;
      event_conn_state_t *cs;
      apr_pool_t *ptrans;         /* Pool for per-transaction stuff */
@@ -1892,7 +1937,7 @@ static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy)
  
      ap_scoreboard_image->servers[process_slot][thread_slot].pid = ap_my_pid;
      ap_scoreboard_image->servers[process_slot][thread_slot].tid = apr_os_thread_current();
-    ap_scoreboard_image->servers[process_slot][thread_slot].generation = retained->my_generation;
+    ap_scoreboard_image->servers[process_slot][thread_slot].generation = retained->mpm->my_generation;
      ap_update_child_status_from_indexes(process_slot, thread_slot,
                                          SERVER_STARTING, NULL);
  
@@ -1916,6 +1961,9 @@ static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy)
          if (workers_may_exit) {
              break;
          }
+        if (dying && worker_thread_should_exit_early()) {
+            break;
+        }
  
          te = NULL;
          rv = ap_queue_pop_something(worker_queue, &csd, &cs, &ptrans, &te);
@@ -1959,8 +2007,35 @@ static void *APR_THREAD_FUNC worker_thread(apr_thread_t * thd, void *dummy)
          }
          else {
              is_idle = 0;
-            worker_sockets[thread_slot] = csd;
-            process_socket(thd, ptrans, csd, cs, process_slot, thread_slot);
+            if (csd != NULL) {
+                worker_sockets[thread_slot] = csd;
+                process_socket(thd, ptrans, csd, cs, process_slot, thread_slot);
+                worker_sockets[thread_slot] = NULL;
+            }
+        }
+
+        /* If there are deferred lingering closes, handle them now. */
+        while (!workers_may_exit) {
+            cs = defer_linger_chain;
+            if (!cs) {
+                break;
+            }
+            if (apr_atomic_casptr((void *)&defer_linger_chain, cs->chain,
+                                  cs) != cs) {
+                /* Race lost, try again */
+                continue;
+            }
+            cs->chain = NULL;
+
+            worker_sockets[thread_slot] = csd = cs->pfd.desc.s;
+#ifdef AP_DEBUG
+            rv = apr_socket_timeout_set(csd, SECONDS_TO_LINGER);
+            AP_DEBUG_ASSERT(rv == APR_SUCCESS);
+#else
+            apr_socket_timeout_set(csd, SECONDS_TO_LINGER);
+#endif
+            cs->pub.state = CONN_STATE_LINGER;
+            process_socket(thd, cs->p, csd, cs, process_slot, thread_slot);
              worker_sockets[thread_slot] = NULL;
          }
      }
@@ -1993,9 +2068,8 @@ static void create_listener_thread(thread_starter * ts)
      apr_status_t rv;
  
      my_info = (proc_info *) ap_malloc(sizeof(proc_info));
-    my_info->pid = my_child_num;
-    my_info->tid = -1;          /* listener thread doesn't have a thread slot */
-    my_info->sd = 0;
+    my_info->pslot = my_child_num;
+    my_info->tslot = -1;      /* listener thread doesn't have a thread slot */
      rv = apr_thread_create(&ts->listener, thread_attr, listener_thread,
                             my_info, pchild);
      if (rv != APR_SUCCESS) {
@@ -2027,6 +2101,8 @@ static void *APR_THREAD_FUNC start_threads(apr_thread_t * thd, void *dummy)
      int prev_threads_created;
      int max_recycled_pools = -1;
      int good_methods[] = {APR_POLLSET_KQUEUE, APR_POLLSET_PORT, APR_POLLSET_EPOLL};
+    /* XXX don't we need more to handle K-A or lingering close? */
+    const apr_uint32_t pollset_size = threads_per_child * 2;
  
      /* We must create the fd queues before we start up the listener
       * and worker threads. */
@@ -2065,25 +2141,25 @@ static void *APR_THREAD_FUNC start_threads(apr_thread_t * thd, void *dummy)
      }
  
      /* Create the main pollset */
-    for (i = 0; i < sizeof(good_methods) / sizeof(void*); i++) {
-        rv = apr_pollset_create_ex(&event_pollset,
-                            threads_per_child*2, /* XXX don't we need more, to handle
-                                                * connections in K-A or lingering
-                                                * close?
-                                                */
-                            pchild, APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY | APR_POLLSET_NODEFAULT,
-                            good_methods[i]);
+    for (i = 0; i < sizeof(good_methods) / sizeof(good_methods[0]); i++) {
+        apr_uint32_t flags = APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY |
+                             APR_POLLSET_NODEFAULT | APR_POLLSET_WAKEABLE;
+        rv = apr_pollset_create_ex(&event_pollset, pollset_size, pchild, flags,
+                                   good_methods[i]);
+        if (rv == APR_SUCCESS) {
+            listener_is_wakeable = 1;
+            break;
+        }
+        flags &= ~APR_POLLSET_WAKEABLE;
+        rv = apr_pollset_create_ex(&event_pollset, pollset_size, pchild, flags,
+                                   good_methods[i]);
          if (rv == APR_SUCCESS) {
              break;
          }
      }
      if (rv != APR_SUCCESS) {
-        rv = apr_pollset_create(&event_pollset,
-                               threads_per_child*2, /* XXX don't we need more, to handle
-                                                     * connections in K-A or lingering
-                                                     * close?
-                                                     */
-                               pchild, APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY);
+        rv = apr_pollset_create(&event_pollset, pollset_size, pchild,
+                                APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY);
      }
      if (rv != APR_SUCCESS) {
          ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(03103)
@@ -2092,7 +2168,9 @@ static void *APR_THREAD_FUNC start_threads(apr_thread_t * thd, void *dummy)
      }
  
      ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(02471)
-                 "start_threads: Using %s", apr_pollset_method_name(event_pollset));
+                 "start_threads: Using %s (%swakeable)",
+                 apr_pollset_method_name(event_pollset),
+                 listener_is_wakeable ? "" : "not ");
      worker_sockets = apr_pcalloc(pchild, threads_per_child
                                   * sizeof(apr_socket_t *));
  
@@ -2103,14 +2181,13 @@ static void *APR_THREAD_FUNC start_threads(apr_thread_t * thd, void *dummy)
              int status =
                  ap_scoreboard_image->servers[my_child_num][i].status;
  
-            if (status != SERVER_GRACEFUL && status != SERVER_DEAD) {
+            if (status != SERVER_DEAD) {
                  continue;
              }
  
              my_info = (proc_info *) ap_malloc(sizeof(proc_info));
-            my_info->pid = my_child_num;
-            my_info->tid = i;
-            my_info->sd = 0;
+            my_info->pslot = my_child_num;
+            my_info->tslot = i;
  
              /* We are creating threads right now */
              ap_update_child_status_from_indexes(my_child_num, i,
@@ -2241,15 +2318,15 @@ static void child_main(int child_num_arg, int child_bucket)
      apr_thread_t *start_thread_id;
      int i;
  
-    mpm_state = AP_MPMQ_STARTING;       /* for benefit of any hooks that run as this
-                                         * child initializes
-                                         */
+    /* for benefit of any hooks that run as this child initializes */
+    retained->mpm->mpm_state = AP_MPMQ_STARTING;
+
      ap_my_pid = getpid();
      ap_fatal_signal_child_setup(ap_server_conf);
      apr_pool_create(&pchild, pconf);
  
      /* close unused listeners and pods */
-    for (i = 0; i < retained->num_buckets; i++) {
+    for (i = 0; i < retained->mpm->num_buckets; i++) {
          if (i != child_bucket) {
              ap_close_listeners_ex(all_buckets[i].listeners);
              ap_mpm_podx_close(all_buckets[i].pod);
@@ -2326,7 +2403,7 @@ static void child_main(int child_num_arg, int child_bucket)
          clean_child_exit(APEXIT_CHILDSICK);
      }
  
-    mpm_state = AP_MPMQ_RUNNING;
+    retained->mpm->mpm_state = AP_MPMQ_RUNNING;
  
      /* If we are only running in one_process mode, we will want to
       * still handle signals. */
@@ -2411,10 +2488,18 @@ static int make_child(server_rec * s, int slot, int bucket)
          retained->max_daemons_limit = slot + 1;
      }
  
+    if (ap_scoreboard_image->parent[slot].pid != 0) {
+        /* XXX replace with assert or remove ? */
+        ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(03455)
+                 "BUG: Scoreboard slot %d should be empty but is "
+                 "in use by pid %" APR_PID_T_FMT,
+                 slot, ap_scoreboard_image->parent[slot].pid);
+        return -1;
+    }
+
      if (one_process) {
          my_bucket = &all_buckets[0];
  
-        set_signals();
          event_note_child_started(slot, getpid());
          child_main(slot, 0);
          /* NOTREACHED */
@@ -2464,17 +2549,12 @@ static int make_child(server_rec * s, int slot, int bucket)
          return -1;
      }
  
-    if (ap_scoreboard_image->parent[slot].pid != 0) {
-        /* This new child process is squatting on the scoreboard
-         * entry owned by an exiting child process, which cannot
-         * exit until all active requests complete.
-         */
-        event_note_child_lost_slot(slot, pid);
-    }
      ap_scoreboard_image->parent[slot].quiescing = 0;
      ap_scoreboard_image->parent[slot].not_accepting = 0;
      ap_scoreboard_image->parent[slot].bucket = bucket;
      event_note_child_started(slot, pid);
+    active_daemons++;
+    retained->total_daemons++;
      return 0;
  }
  
@@ -2483,11 +2563,11 @@ static void startup_children(int number_to_start)
  {
      int i;
  
-    for (i = 0; number_to_start && i < ap_daemons_limit; ++i) {
+    for (i = 0; number_to_start && i < server_limit; ++i) {
          if (ap_scoreboard_image->parent[i].pid != 0) {
              continue;
          }
-        if (make_child(ap_server_conf, i, i % retained->num_buckets) < 0) {
+        if (make_child(ap_server_conf, i, i % retained->mpm->num_buckets) < 0) {
              break;
          }
          --number_to_start;
@@ -2497,34 +2577,22 @@ static void startup_children(int number_to_start)
  static void perform_idle_server_maintenance(int child_bucket, int num_buckets)
  {
      int i, j;
-    int idle_thread_count;
+    int idle_thread_count = 0;
      worker_score *ws;
      process_score *ps;
-    int free_length;
-    int totally_free_length = 0;
+    int free_length = 0;
      int free_slots[MAX_SPAWN_RATE];
-    int last_non_dead;
-    int total_non_dead;
+    int last_non_dead = -1;
      int active_thread_count = 0;
  
-    /* initialize the free_list */
-    free_length = 0;
-
-    idle_thread_count = 0;
-    last_non_dead = -1;
-    total_non_dead = 0;
-
-    for (i = 0; i < ap_daemons_limit; ++i) {
+    for (i = 0; i < server_limit; ++i) {
          /* Initialization to satisfy the compiler. It doesn't know
           * that threads_per_child is always > 0 */
          int status = SERVER_DEAD;
-        int any_dying_threads = 0;
-        int any_dead_threads = 0;
-        int all_dead_threads = 1;
          int child_threads_active = 0;
  
          if (i >= retained->max_daemons_limit &&
-            totally_free_length == retained->idle_spawn_rate[child_bucket]) {
+            free_length == retained->idle_spawn_rate[child_bucket]) {
              /* short cut if all active processes have been examined and
               * enough empty scoreboard slots have been found
               */
@@ -2532,27 +2600,19 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets)
              break;
          }
          ps = &ap_scoreboard_image->parent[i];
-        for (j = 0; j < threads_per_child; j++) {
-            ws = &ap_scoreboard_image->servers[i][j];
-            status = ws->status;
-
-            /* XXX any_dying_threads is probably no longer needed    GLA */
-            any_dying_threads = any_dying_threads ||
-                (status == SERVER_GRACEFUL);
-            any_dead_threads = any_dead_threads || (status == SERVER_DEAD);
-            all_dead_threads = all_dead_threads &&
-                (status == SERVER_DEAD || status == SERVER_GRACEFUL);
-
-            /* We consider a starting server as idle because we started it
-             * at least a cycle ago, and if it still hasn't finished starting
-             * then we're just going to swamp things worse by forking more.
-             * So we hopefully won't need to fork more if we count it.
-             * This depends on the ordering of SERVER_READY and SERVER_STARTING.
-             */
-            if (ps->pid != 0) { /* XXX just set all_dead_threads in outer
-                                   for loop if no pid?  not much else matters */
+        if (ps->pid != 0) {
+            for (j = 0; j < threads_per_child; j++) {
+                ws = &ap_scoreboard_image->servers[i][j];
+                status = ws->status;
+
+                /* We consider a starting server as idle because we started it
+                 * at least a cycle ago, and if it still hasn't finished starting
+                 * then we're just going to swamp things worse by forking more.
+                 * So we hopefully won't need to fork more if we count it.
+                 * This depends on the ordering of SERVER_READY and SERVER_STARTING.
+                 */
                  if (status <= SERVER_READY && !ps->quiescing && !ps->not_accepting
-                    && ps->generation == retained->my_generation
+                    && ps->generation == retained->mpm->my_generation
                      && ps->bucket == child_bucket)
                  {
                      ++idle_thread_count;
@@ -2561,39 +2621,13 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets)
                      ++child_threads_active;
                  }
              }
+            last_non_dead = i;
          }
          active_thread_count += child_threads_active;
-        if (any_dead_threads
-            && totally_free_length < retained->idle_spawn_rate[child_bucket]
-            && free_length < MAX_SPAWN_RATE / num_buckets
-            && (!ps->pid      /* no process in the slot */
-                  || ps->quiescing)) {  /* or at least one is going away */
-            if (all_dead_threads) {
-                /* great! we prefer these, because the new process can
-                 * start more threads sooner.  So prioritize this slot
-                 * by putting it ahead of any slots with active threads.
-                 *
-                 * first, make room by moving a slot that's potentially still
-                 * in use to the end of the array
-                 */
-                free_slots[free_length] = free_slots[totally_free_length];
-                free_slots[totally_free_length++] = i;
-            }
-            else {
-                /* slot is still in use - back of the bus
-                 */
-                free_slots[free_length] = i;
-            }
-            ++free_length;
-        }
-        else if (child_threads_active == threads_per_child) {
+        if (!ps->pid && free_length < retained->idle_spawn_rate[child_bucket])
+            free_slots[free_length++] = i;
+        else if (child_threads_active == threads_per_child)
              had_healthy_child = 1;
-        }
-        /* XXX if (!ps->quiescing)     is probably more reliable  GLA */
-        if (!any_dying_threads) {
-            last_non_dead = i;
-            ++total_non_dead;
-        }
      }
  
      if (retained->sick_child_detected) {
@@ -2607,7 +2641,7 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets)
          else {
              /* looks like a basket case, as no child ever fully initialized; give up.
               */
-            shutdown_pending = 1;
+            retained->mpm->shutdown_pending = 1;
              child_fatal = 1;
              ap_log_error(APLOG_MARK, APLOG_ALERT, 0,
                           ap_server_conf, APLOGNO(02324)
@@ -2621,32 +2655,56 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets)
  
      retained->max_daemons_limit = last_non_dead + 1;
  
-    if (idle_thread_count > max_spare_threads / num_buckets) {
-        /* Kill off one child */
-        ap_mpm_podx_signal(all_buckets[child_bucket].pod,
-                           AP_MPM_PODX_GRACEFUL);
-        retained->idle_spawn_rate[child_bucket] = 1;
+    if (idle_thread_count > max_spare_threads / num_buckets)
+    {
+        /*
+         * Child processes that we ask to shut down won't die immediately
+         * but may stay around for a long time when they finish their
+         * requests. If the server load changes many times, many such
+         * gracefully finishing processes may accumulate, filling up the
+         * scoreboard. To avoid running out of scoreboard entries, we
+         * don't shut down more processes when the total number of processes
+         * is high.
+         *
+         * XXX It would be nice if we could
+         * XXX - kill processes without keepalive connections first
+         * XXX - tell children to stop accepting new connections, and
+         * XXX   depending on server load, later be able to resurrect them
+         *       or kill them
+         */
+        if (retained->total_daemons <= active_daemons_limit &&
+            retained->total_daemons < server_limit) {
+            /* Kill off one child */
+            ap_mpm_podx_signal(all_buckets[child_bucket].pod,
+                               AP_MPM_PODX_GRACEFUL);
+            retained->idle_spawn_rate[child_bucket] = 1;
+            active_daemons--;
+        } else {
+            ap_log_error(APLOG_MARK, APLOG_TRACE5, 0, ap_server_conf,
+                         "Not shutting down child: total daemons %d / "
+                         "active limit %d / ServerLimit %d",
+                         retained->total_daemons, active_daemons_limit,
+                         server_limit);
+        }
      }
      else if (idle_thread_count < min_spare_threads / num_buckets) {
-        /* terminate the free list */
-        if (free_length == 0) { /* scoreboard is full, can't fork */
-
-            if (active_thread_count >= ap_daemons_limit * threads_per_child) {
-                if (!retained->maxclients_reported) {
-                    /* only report this condition once */
-                    ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(00484)
-                                 "server reached MaxRequestWorkers setting, "
-                                 "consider raising the MaxRequestWorkers "
-                                 "setting");
-                    retained->maxclients_reported = 1;
-                }
-            }
-            else {
-                ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(00485)
-                             "scoreboard is full, not at MaxRequestWorkers");
+        if (active_thread_count >= max_workers) {
+            if (!retained->maxclients_reported) {
+                /* only report this condition once */
+                ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(00484)
+                             "server reached MaxRequestWorkers setting, "
+                             "consider raising the MaxRequestWorkers "
+                             "setting");
+                retained->maxclients_reported = 1;
              }
              retained->idle_spawn_rate[child_bucket] = 1;
          }
+        else if (free_length == 0) { /* scoreboard is full, can't fork */
+            ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(03490)
+                         "scoreboard is full, not at MaxRequestWorkers."
+                         "Increase ServerLimit.");
+            retained->idle_spawn_rate[child_bucket] = 1;
+        }
          else {
              if (free_length > retained->idle_spawn_rate[child_bucket]) {
                  free_length = retained->idle_spawn_rate[child_bucket];
@@ -2657,10 +2715,17 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets)
                               "to increase StartServers, ThreadsPerChild "
                               "or Min/MaxSpareThreads), "
                               "spawning %d children, there are around %d idle "
-                             "threads, and %d total children", free_length,
-                             idle_thread_count, total_non_dead);
+                             "threads, %d active children, and %d children "
+                             "that are shutting down", free_length,
+                             idle_thread_count, active_daemons,
+                             retained->total_daemons);
              }
              for (i = 0; i < free_length; ++i) {
+                ap_log_error(APLOG_MARK, APLOG_TRACE5, 0, ap_server_conf,
+                             "Spawning new child: slot %d active / "
+                             "total daemons: %d/%d",
+                             free_slots[i], active_daemons,
+                             retained->total_daemons);
                  make_child(ap_server_conf, free_slots[i], child_bucket);
              }
              /* the next time around we want to spawn twice as many if this
@@ -2682,14 +2747,13 @@ static void perform_idle_server_maintenance(int child_bucket, int num_buckets)
  
  static void server_main_loop(int remaining_children_to_start, int num_buckets)
  {
-    ap_generation_t old_gen;
      int child_slot;
      apr_exit_why_e exitwhy;
      int status, processed_status;
      apr_proc_t pid;
      int i;
  
-    while (!restart_pending && !shutdown_pending) {
+    while (!retained->mpm->restart_pending && !retained->mpm->shutdown_pending) {
          ap_wait_or_timeout(&exitwhy, &status, &pid, pconf, ap_server_conf);
  
          if (pid.pid != -1) {
@@ -2703,9 +2767,13 @@ static void server_main_loop(int remaining_children_to_start, int num_buckets)
                   */
                  if (child_slot < 0
                      || ap_get_scoreboard_process(child_slot)->generation
-                       == retained->my_generation) {
-                    shutdown_pending = 1;
+                       == retained->mpm->my_generation) {
+                    retained->mpm->shutdown_pending = 1;
                      child_fatal = 1;
+                    /*
+                     * total_daemons counting will be off now, but as we
+                     * are shutting down, that is not an issue anymore.
+                     */
                      return;
                  }
                  else {
@@ -2732,13 +2800,16 @@ static void server_main_loop(int remaining_children_to_start, int num_buckets)
  
                  event_note_child_killed(child_slot, 0, 0);
                  ps = &ap_scoreboard_image->parent[child_slot];
+                if (!ps->quiescing)
+                    active_daemons--;
                  ps->quiescing = 0;
+                /* NOTE: We don't dec in the (child_slot < 0) case! */
+                retained->total_daemons--;
                  if (processed_status == APEXIT_CHILDSICK) {
                      /* resource shortage, minimize the fork rate */
                      retained->idle_spawn_rate[ps->bucket] = 1;
                  }
-                else if (remaining_children_to_start
-                         && child_slot < ap_daemons_limit) {
+                else if (remaining_children_to_start) {
                      /* we're still doing a 1-for-1 replacement of dead
                       * children with new children
                       */
@@ -2746,25 +2817,13 @@ static void server_main_loop(int remaining_children_to_start, int num_buckets)
                      --remaining_children_to_start;
                  }
              }
-            else if (ap_unregister_extra_mpm_process(pid.pid, &old_gen) == 1) {
-
-                event_note_child_killed(-1, /* already out of the scoreboard */
-                                        pid.pid, old_gen);
-                if (processed_status == APEXIT_CHILDSICK
-                    && old_gen == retained->my_generation) {
-                    /* resource shortage, minimize the fork rate */
-                    for (i = 0; i < num_buckets; i++) {
-                        retained->idle_spawn_rate[i] = 1;
-                    }
-                }
  #if APR_HAS_OTHER_CHILD
-            }
              else if (apr_proc_other_child_alert(&pid, APR_OC_REASON_DEATH,
                                                  status) == 0) {
                  /* handled */
-#endif
              }
-            else if (retained->is_graceful) {
+#endif
+            else if (retained->mpm->was_graceful) {
                  /* Great, we've probably just lost a slot in the
                   * scoreboard.  Somehow we don't know about this child.
                   */
@@ -2802,37 +2861,45 @@ static void server_main_loop(int remaining_children_to_start, int num_buckets)
  
  static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
  {
-    int num_buckets = retained->num_buckets;
+    int num_buckets = retained->mpm->num_buckets;
      int remaining_children_to_start;
      int i;
  
      ap_log_pid(pconf, ap_pid_fname);
  
-    if (!retained->is_graceful) {
+    if (!retained->mpm->was_graceful) {
          if (ap_run_pre_mpm(s->process->pool, SB_SHARED) != OK) {
-            mpm_state = AP_MPMQ_STOPPING;
+            retained->mpm->mpm_state = AP_MPMQ_STOPPING;
              return !OK;
          }
          /* fix the generation number in the global score; we just got a new,
           * cleared scoreboard
           */
-        ap_scoreboard_image->global->running_generation = retained->my_generation;
+        ap_scoreboard_image->global->running_generation = retained->mpm->my_generation;
      }
  
-    restart_pending = shutdown_pending = 0;
-    set_signals();
+    if (!one_process) {
+        ap_fatal_signal_setup(ap_server_conf, pconf);
+    }
+    ap_unixd_mpm_set_signals(pconf, one_process);
  
      /* Don't thrash since num_buckets depends on the
       * system and the number of online CPU cores...
       */
-    if (ap_daemons_limit < num_buckets)
-        ap_daemons_limit = num_buckets;
+    if (active_daemons_limit < num_buckets)
+        active_daemons_limit = num_buckets;
      if (ap_daemons_to_start < num_buckets)
          ap_daemons_to_start = num_buckets;
+    /* We want to create as much children at a time as the number of buckets,
+     * so to optimally accept connections (evenly distributed across buckets).
+     * Thus min_spare_threads should at least maintain num_buckets children,
+     * and max_spare_threads allow num_buckets more children w/o triggering
+     * immediately (e.g. num_buckets idle threads margin, one per bucket).
+     */
      if (min_spare_threads < threads_per_child * (num_buckets - 1) + num_buckets)
          min_spare_threads = threads_per_child * (num_buckets - 1) + num_buckets;
-    if (max_spare_threads < min_spare_threads + threads_per_child * num_buckets)
-        max_spare_threads = min_spare_threads + threads_per_child * num_buckets;
+    if (max_spare_threads < min_spare_threads + (threads_per_child + 1) * num_buckets)
+        max_spare_threads = min_spare_threads + (threads_per_child + 1) * num_buckets;
  
      /* If we're doing a graceful_restart then we're going to see a lot
       * of children exiting immediately when we get into the main loop
@@ -2844,10 +2911,10 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
       * supposed to start up without the 1 second penalty between each fork.
       */
      remaining_children_to_start = ap_daemons_to_start;
-    if (remaining_children_to_start > ap_daemons_limit) {
-        remaining_children_to_start = ap_daemons_limit;
+    if (remaining_children_to_start > active_daemons_limit) {
+        remaining_children_to_start = active_daemons_limit;
      }
-    if (!retained->is_graceful) {
+    if (!retained->mpm->was_graceful) {
          startup_children(remaining_children_to_start);
          remaining_children_to_start = 0;
      }
@@ -2865,17 +2932,17 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
      ap_log_command_line(plog, s);
      ap_log_mpm_common(s);
  
-    mpm_state = AP_MPMQ_RUNNING;
+    retained->mpm->mpm_state = AP_MPMQ_RUNNING;
  
      server_main_loop(remaining_children_to_start, num_buckets);
-    mpm_state = AP_MPMQ_STOPPING;
+    retained->mpm->mpm_state = AP_MPMQ_STOPPING;
  
-    if (shutdown_pending && !retained->is_graceful) {
+    if (retained->mpm->shutdown_pending && retained->mpm->is_ungraceful) {
          /* Time to shut down:
           * Kill child processes, tell them to call child_exit, etc...
           */
          for (i = 0; i < num_buckets; i++) {
-            ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit,
+            ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
                                 AP_MPM_PODX_RESTART);
          }
          ap_reclaim_child_processes(1, /* Start with SIGTERM */
@@ -2887,8 +2954,11 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
              ap_log_error(APLOG_MARK, APLOG_NOTICE, 0,
                           ap_server_conf, APLOGNO(00491) "caught SIGTERM, shutting down");
          }
+
          return DONE;
-    } else if (shutdown_pending) {
+    }
+
+    if (retained->mpm->shutdown_pending) {
          /* Time to gracefully shut down:
           * Kill child processes, tell them to call child_exit, etc...
           */
@@ -2899,7 +2969,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
          /* Close our listeners, and then ask our children to do same */
          ap_close_listeners();
          for (i = 0; i < num_buckets; i++) {
-            ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit,
+            ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
                                 AP_MPM_PODX_GRACEFUL);
          }
          ap_relieve_child_processes(event_note_child_killed);
@@ -2918,7 +2988,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
          }
  
          /* Don't really exit until each child has finished */
-        shutdown_pending = 0;
+        retained->mpm->shutdown_pending = 0;
          do {
              /* Pause for a second */
              apr_sleep(apr_time_from_sec(1));
@@ -2927,14 +2997,14 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
              ap_relieve_child_processes(event_note_child_killed);
  
              active_children = 0;
-            for (index = 0; index < ap_daemons_limit; ++index) {
+            for (index = 0; index < retained->max_daemons_limit; ++index) {
                  if (ap_mpm_safe_kill(MPM_CHILD_PID(index), 0) == APR_SUCCESS) {
                      active_children = 1;
                      /* Having just one child is enough to stay around */
                      break;
                  }
              }
-        } while (!shutdown_pending && active_children &&
+        } while (!retained->mpm->shutdown_pending && active_children &&
                   (!ap_graceful_shutdown_timeout || apr_time_now() < cutoff));
  
          /* We might be here because we received SIGTERM, either
@@ -2942,7 +3012,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
           * really dead.
           */
          for (i = 0; i < num_buckets; i++) {
-            ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit,
+            ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
                                 AP_MPM_PODX_RESTART);
          }
          ap_reclaim_child_processes(1, event_note_child_killed);
@@ -2951,8 +3021,6 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
      }
  
      /* we've been told to restart */
-    apr_signal(SIGHUP, SIG_IGN);
-
      if (one_process) {
          /* not worth thinking about */
          return DONE;
@@ -2962,16 +3030,16 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
      /* XXX: we really need to make sure this new generation number isn't in
       * use by any of the children.
       */
-    ++retained->my_generation;
-    ap_scoreboard_image->global->running_generation = retained->my_generation;
+    ++retained->mpm->my_generation;
+    ap_scoreboard_image->global->running_generation = retained->mpm->my_generation;
  
-    if (retained->is_graceful) {
+    if (!retained->mpm->is_ungraceful) {
          ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00493)
                       AP_SIG_GRACEFUL_STRING
                       " received.  Doing graceful restart");
          /* wake up the children...time to die.  But we'll have more soon */
          for (i = 0; i < num_buckets; i++) {
-            ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit,
+            ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
                                 AP_MPM_PODX_GRACEFUL);
          }
  
@@ -2986,7 +3054,7 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
           * pthreads are stealing signals from us left and right.
           */
          for (i = 0; i < num_buckets; i++) {
-            ap_mpm_podx_killpg(all_buckets[i].pod, ap_daemons_limit,
+            ap_mpm_podx_killpg(all_buckets[i].pod, active_daemons_limit,
                                 AP_MPM_PODX_RESTART);
          }
  
@@ -2996,6 +3064,8 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
                       "SIGHUP received.  Attempting to restart");
      }
  
+    active_daemons = 0;
+
      return OK;
  }
  
@@ -3064,7 +3134,7 @@ static int event_open_logs(apr_pool_t * p, apr_pool_t * plog,
      pconf = p;
  
      /* the reverse of pre_config, we want this only the first time around */
-    if (retained->module_loads == 1) {
+    if (retained->mpm->module_loads == 1) {
          startup = 1;
          level_flags |= APLOG_STARTUP;
      }
@@ -3079,9 +3149,9 @@ static int event_open_logs(apr_pool_t * p, apr_pool_t * plog,
      if (one_process) {
          num_buckets = 1;
      }
-    else if (retained->is_graceful) {
+    else if (retained->mpm->was_graceful) {
          /* Preserve the number of buckets on graceful restarts. */
-        num_buckets = retained->num_buckets;
+        num_buckets = retained->mpm->num_buckets;
      }
      if ((rv = ap_duplicate_listeners(pconf, ap_server_conf,
                                       &listen_buckets, &num_buckets))) {
@@ -3103,25 +3173,25 @@ static int event_open_logs(apr_pool_t * p, apr_pool_t * plog,
          all_buckets[i].listeners = listen_buckets[i];
      }
  
-    if (retained->max_buckets < num_buckets) {
+    if (retained->mpm->max_buckets < num_buckets) {
          int new_max, *new_ptr;
-        new_max = retained->max_buckets * 2;
+        new_max = retained->mpm->max_buckets * 2;
          if (new_max < num_buckets) {
              new_max = num_buckets;
          }
          new_ptr = (int *)apr_palloc(ap_pglobal, new_max * sizeof(int));
          memcpy(new_ptr, retained->idle_spawn_rate,
-               retained->num_buckets * sizeof(int));
+               retained->mpm->num_buckets * sizeof(int));
          retained->idle_spawn_rate = new_ptr;
-        retained->max_buckets = new_max;
+        retained->mpm->max_buckets = new_max;
      }
-    if (retained->num_buckets < num_buckets) {
+    if (retained->mpm->num_buckets < num_buckets) {
          int rate_max = 1;
          /* If new buckets are added, set their idle spawn rate to
           * the highest so far, so that they get filled as quickly
           * as the existing ones.
           */
-        for (i = 0; i < retained->num_buckets; i++) {
+        for (i = 0; i < retained->mpm->num_buckets; i++) {
              if (rate_max < retained->idle_spawn_rate[i]) {
                  rate_max = retained->idle_spawn_rate[i];
              }
@@ -3130,7 +3200,7 @@ static int event_open_logs(apr_pool_t * p, apr_pool_t * plog,
              retained->idle_spawn_rate[i] = rate_max;
          }
      }
-    retained->num_buckets = num_buckets;
+    retained->mpm->num_buckets = num_buckets;
  
      /* for skiplist */
      srand((unsigned int)apr_time_now());
@@ -3143,8 +3213,7 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
      int no_detach, debug, foreground;
      apr_status_t rv;
      const char *userdata_key = "mpm_event_module";
-
-    mpm_state = AP_MPMQ_STARTING;
+    int test_atomics = 0;
  
      debug = ap_exists_config_define("DEBUG");
  
@@ -3158,15 +3227,24 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
          foreground = ap_exists_config_define("FOREGROUND");
      }
  
-    /* sigh, want this only the second time around */
      retained = ap_retained_data_get(userdata_key);
      if (!retained) {
          retained = ap_retained_data_create(userdata_key, sizeof(*retained));
+        retained->mpm = ap_unixd_mpm_get_retained_data();
          retained->max_daemons_limit = -1;
+        if (retained->mpm->module_loads) {
+            test_atomics = 1;
+        }
+    }
+    retained->mpm->mpm_state = AP_MPMQ_STARTING;
+    if (retained->mpm->baton != retained) {
+        retained->mpm->was_graceful = 0;
+        retained->mpm->baton = retained;
      }
-    ++retained->module_loads;
-    if (retained->module_loads == 2) {
-        /* test for correct operation of fdqueue */
+    ++retained->mpm->module_loads;
+
+    /* test once for correct operation of fdqueue */
+    if (test_atomics || retained->mpm->module_loads == 2) {
          static apr_uint32_t foo1, foo2;
  
          apr_atomic_set32(&foo1, 100);
@@ -3176,7 +3254,10 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
                           "atomics not working as expected - add32 of negative number");
              return HTTP_INTERNAL_SERVER_ERROR;
          }
+    }
  
+    /* sigh, want this only the second time around */
+    if (retained->mpm->module_loads == 2) {
          rv = apr_pollset_create(&event_pollset, 1, plog,
                                  APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY);
          if (rv != APR_SUCCESS) {
@@ -3209,9 +3290,10 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
      max_spare_threads = DEFAULT_MAX_FREE_DAEMON * DEFAULT_THREADS_PER_CHILD;
      server_limit = DEFAULT_SERVER_LIMIT;
      thread_limit = DEFAULT_THREAD_LIMIT;
-    ap_daemons_limit = server_limit;
+    active_daemons_limit = server_limit;
      threads_per_child = DEFAULT_THREADS_PER_CHILD;
-    max_workers = ap_daemons_limit * threads_per_child;
+    max_workers = active_daemons_limit * threads_per_child;
+    defer_linger_chain = NULL;
      had_healthy_child = 0;
      ap_extended_status = 0;
  
@@ -3235,10 +3317,10 @@ static int event_post_config(apr_pool_t *pconf, apr_pool_t *plog,
      wc.hash = apr_hash_make(ptemp);
      ka.hash = apr_hash_make(ptemp);
  
-    TO_QUEUE_INIT(linger_q, pconf,
-                  apr_time_from_sec(MAX_SECS_TO_LINGER), NULL);
-    TO_QUEUE_INIT(short_linger_q, pconf,
-                  apr_time_from_sec(SECONDS_TO_LINGER), NULL);
+    linger_q = TO_QUEUE_MAKE(pconf, apr_time_from_sec(MAX_SECS_TO_LINGER),
+                             NULL);
+    short_linger_q = TO_QUEUE_MAKE(pconf, apr_time_from_sec(SECONDS_TO_LINGER),
+                                   NULL);
  
      for (; s; s = s->next) {
          event_srv_cfg *sc = apr_pcalloc(pconf, sizeof *sc);
@@ -3246,11 +3328,11 @@ static int event_post_config(apr_pool_t *pconf, apr_pool_t *plog,
          ap_set_module_config(s->module_config, &mpm_event_module, sc);
          if (!wc.tail) {
              /* The main server uses the global queues */
-            TO_QUEUE_INIT(wc.q, pconf, s->timeout, NULL);
+            wc.q = TO_QUEUE_MAKE(pconf, s->timeout, NULL);
              apr_hash_set(wc.hash, &s->timeout, sizeof s->timeout, wc.q);
              wc.tail = write_completion_q = wc.q;
  
-            TO_QUEUE_INIT(ka.q, pconf, s->keep_alive_timeout, NULL);
+            ka.q = TO_QUEUE_MAKE(pconf, s->keep_alive_timeout, NULL);
              apr_hash_set(ka.hash, &s->keep_alive_timeout,
                           sizeof s->keep_alive_timeout, ka.q);
              ka.tail = keepalive_q = ka.q;
@@ -3260,7 +3342,7 @@ static int event_post_config(apr_pool_t *pconf, apr_pool_t *plog,
               * or their own queue(s) if there isn't */
              wc.q = apr_hash_get(wc.hash, &s->timeout, sizeof s->timeout);
              if (!wc.q) {
-                TO_QUEUE_INIT(wc.q, pconf, s->timeout, wc.tail);
+                wc.q = TO_QUEUE_MAKE(pconf, s->timeout, wc.tail);
                  apr_hash_set(wc.hash, &s->timeout, sizeof s->timeout, wc.q);
                  wc.tail = wc.tail->next = wc.q;
              }
@@ -3268,7 +3350,7 @@ static int event_post_config(apr_pool_t *pconf, apr_pool_t *plog,
              ka.q = apr_hash_get(ka.hash, &s->keep_alive_timeout,
                                  sizeof s->keep_alive_timeout);
              if (!ka.q) {
-                TO_QUEUE_INIT(ka.q, pconf, s->keep_alive_timeout, ka.tail);
+                ka.q = TO_QUEUE_MAKE(pconf, s->keep_alive_timeout, ka.tail);
                  apr_hash_set(ka.hash, &s->keep_alive_timeout,
                               sizeof s->keep_alive_timeout, ka.q);
                  ka.tail = ka.tail->next = ka.q;
@@ -3287,7 +3369,7 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
      int startup = 0;
  
      /* the reverse of pre_config, we want this only the first time around */
-    if (retained->module_loads == 1) {
+    if (retained->mpm->module_loads == 1) {
          startup = 1;
      }
  
@@ -3420,10 +3502,10 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
          max_workers = threads_per_child;
      }
  
-    ap_daemons_limit = max_workers / threads_per_child;
+    active_daemons_limit = max_workers / threads_per_child;
  
      if (max_workers % threads_per_child) {
-        int tmp_max_workers = ap_daemons_limit * threads_per_child;
+        int tmp_max_workers = active_daemons_limit * threads_per_child;
  
          if (startup) {
              ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00513)
@@ -3431,7 +3513,7 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
                           "multiple of ThreadsPerChild of %d, decreasing to nearest "
                           "multiple %d, for a maximum of %d servers.",
                           max_workers, threads_per_child, tmp_max_workers,
-                         ap_daemons_limit);
+                         active_daemons_limit);
          } else {
              ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00514)
                           "MaxRequestWorkers of %d is not an integer multiple "
@@ -3442,25 +3524,25 @@ static int event_check_config(apr_pool_t *p, apr_pool_t *plog,
          max_workers = tmp_max_workers;
      }
  
-    if (ap_daemons_limit > server_limit) {
+    if (active_daemons_limit > server_limit) {
          if (startup) {
              ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00515)
                           "WARNING: MaxRequestWorkers of %d would require %d servers "
                           "and would exceed ServerLimit of %d, decreasing to %d. "
                           "To increase, please see the ServerLimit directive.",
-                         max_workers, ap_daemons_limit, server_limit,
+                         max_workers, active_daemons_limit, server_limit,
                           server_limit * threads_per_child);
          } else {
              ap_log_error(APLOG_MARK, APLOG_WARNING, 0, s, APLOGNO(00516)
                           "MaxRequestWorkers of %d would require %d servers and "
                           "exceed ServerLimit of %d, decreasing to %d",
-                         max_workers, ap_daemons_limit, server_limit,
+                         max_workers, active_daemons_limit, server_limit,
                           server_limit * threads_per_child);
          }
-        ap_daemons_limit = server_limit;
+        active_daemons_limit = server_limit;
      }
  
-    /* ap_daemons_to_start > ap_daemons_limit checked in ap_mpm_run() */
+    /* ap_daemons_to_start > active_daemons_limit checked in ap_mpm_run() */
      if (ap_daemons_to_start < 1) {
          if (startup) {
              ap_log_error(APLOG_MARK, APLOG_WARNING | APLOG_STARTUP, 0, NULL, APLOGNO(00517)