Optimize w/ duplicated listeners and use of SO_REUSEPORT

author Jim Jagielski <jim@apache.org>

Tue, 3 Jun 2014 13:07:29 +0000 (13:07 +0000)

committer Jim Jagielski <jim@apache.org>

Tue, 3 Jun 2014 13:07:29 +0000 (13:07 +0000)
author Jim Jagielski <jim@apache.org>
Tue, 3 Jun 2014 13:07:29 +0000 (13:07 +0000)
committer Jim Jagielski <jim@apache.org>
Tue, 3 Jun 2014 13:07:29 +0000 (13:07 +0000)
diff --git a/CHANGES b/CHANGES

index dca11889d173996313298ec685bbbc5a0a5b61ef..2e04c4808c22da31b540c6191ed2c955ce61c619 100644 (file)
--- a/CHANGES
+++ b/CHANGES
@@ -1,6 +1,10 @@
                                                           -*- coding: utf-8 -*-
  Changes with Apache 2.5.0
  
+  *) MPMs: Support SO_REUSEPORT to create multiple duplicated listener
+     records for scalability. [Yingqi Lu <yingqi.lu@intel.com>,
+     Jeff Trawick, Jim Jagielski]
+
    *) mod_proxy_html: support automatic detection of doctype and processing
       of FPIs.  PR56285 [Micha Lenk <micha lenk info>, Nick Kew]
  
diff --git a/include/ap_listen.h b/include/ap_listen.h

index 21101cd8d3f4053905534c8080620f4cb17e8b49..4538ef9648c1f0c7a8292d736d83f189c1b16a3a 100644 (file)
--- a/include/ap_listen.h
+++ b/include/ap_listen.h
@@ -78,6 +78,14 @@ struct ap_listen_rec {
   */
  AP_DECLARE_DATA extern ap_listen_rec *ap_listeners;
  
+AP_DECLARE_DATA extern ap_listen_rec **mpm_listen;
+
+AP_DECLARE_DATA extern int enable_default_listener;
+
+AP_DECLARE_DATA extern int num_buckets;
+
+AP_DECLARE_DATA extern int have_so_reuseport;
+
  /**
   * Setup all of the defaults for the listener list
   */
@@ -91,6 +99,14 @@ AP_DECLARE(void) ap_listen_pre_config(void);
   */
  AP_DECLARE(int) ap_setup_listeners(server_rec *s);
  
+/**This function duplicates ap_listeners.
+ * @param s The global server_rec
+ * @param p The config pool
+ * @param num_buckets The total number of listener buckets.
+**/
+AP_DECLARE(apr_status_t) ap_duplicate_listeners(server_rec *s, apr_pool_t *p, int num_buckets);
+
+
  /**
   * Loop through the global ap_listen_rec list and close each of the sockets.
   */
diff --git a/server/listen.c b/server/listen.c

index f9c4266d3a3ecab3e25c4865b8a7b0b7916a0b13..fb404d9fd622e3b9ea7e09e8802b393cc7f73af8 100644 (file)
--- a/server/listen.c
+++ b/server/listen.c
@@ -38,6 +38,11 @@
  
  AP_DECLARE_DATA ap_listen_rec *ap_listeners = NULL;
  
+AP_DECLARE_DATA ap_listen_rec **mpm_listen = NULL;
+AP_DECLARE_DATA int enable_default_listener = 1;
+AP_DECLARE_DATA int num_buckets = 1;
+AP_DECLARE_DATA int have_so_reuseport = 1;
+
  static ap_listen_rec *old_listeners;
  static int ap_listenbacklog;
  static int send_buffer_size;
@@ -124,6 +129,24 @@ static apr_status_t make_sock(apr_pool_t *p, ap_listen_rec *server, int do_bind_
      ap_sock_disable_nagle(s);
  #endif
  
+#ifndef SO_REUSEPORT
+#define SO_REUSEPORT 15
+#endif
+    int thesock;
+    apr_os_sock_get(&thesock, s);
+    if (setsockopt(thesock, SOL_SOCKET, SO_REUSEPORT, (void *)&one, sizeof(int)) < 0) {
+        if (errno == ENOPROTOOPT) {
+            have_so_reuseport = 0;
+        } /* Check if SO_REUSEPORT is supported by the running Linux Kernel.*/
+        else {
+            ap_log_perror(APLOG_MARK, APLOG_CRIT, stat, p, APLOGNO()
+                    "make_sock: for address %pI, apr_socket_opt_set: (SO_REUSEPORT)",
+                     server->bind_addr);
+            apr_socket_close(s);
+            return errno;
+        }
+     }
+
      if (do_bind_listen) {
  #if APR_HAVE_IPV6
          if (server->bind_addr->family == APR_INET6) {
@@ -179,7 +202,7 @@ static apr_status_t make_sock(apr_pool_t *p, ap_listen_rec *server, int do_bind_
  #endif
  
      server->sd = s;
-    server->active = 1;
+    server->active = enable_default_listener;
  
      server->accept_func = NULL;
  
@@ -575,7 +598,7 @@ static int open_listeners(apr_pool_t *pool)
                  }
              }
  #endif
-            if (make_sock(pool, lr, 1) == APR_SUCCESS) {
+            if (make_sock(pool, lr, enable_default_listener) == APR_SUCCESS) {
                  ++num_open;
              }
              else {
@@ -727,13 +750,73 @@ AP_DECLARE(int) ap_setup_listeners(server_rec *s)
      return num_listeners;
  }
  
+AP_DECLARE(apr_status_t) ap_duplicate_listeners(server_rec *s, apr_pool_t *p,
+                                                  int num_buckets) {
+    int i;
+    apr_status_t stat;
+    int use_nonblock = 0;
+    ap_listen_rec *lr;
+
+    mpm_listen = apr_palloc(p, sizeof(ap_listen_rec*) * num_buckets);
+    for (i = 0; i < num_buckets; i++) {
+        lr = ap_listeners;
+        ap_listen_rec *last = NULL;
+        while (lr) {
+            ap_listen_rec *duplr;
+            char *hostname;
+            apr_port_t port;
+            apr_sockaddr_t *sa;
+            duplr  = apr_palloc(p, sizeof(ap_listen_rec));
+            duplr->slave = NULL;
+            duplr->protocol = apr_pstrdup(p, lr->protocol);
+            hostname = apr_pstrdup(p, lr->bind_addr->hostname);
+            port = lr->bind_addr->port;
+            apr_sockaddr_info_get(&sa, hostname, APR_UNSPEC, port, 0, p);
+            duplr->bind_addr = sa;
+            duplr->next = NULL;
+            apr_socket_t *temps = duplr->sd;
+            if ((stat = apr_socket_create(&duplr->sd, duplr->bind_addr->family,
+                                          SOCK_STREAM, 0, p)) != APR_SUCCESS) {
+                ap_log_perror(APLOG_MARK, APLOG_CRIT, 0, p, APLOGNO()
+                              "ap_duplicate_socket: for address %pI, "
+                              "cannot duplicate a new socket!",
+                              duplr->bind_addr);
+                return stat;
+            }
+            make_sock(p, duplr, 1);
+#if AP_NONBLOCK_WHEN_MULTI_LISTEN
+            use_nonblock = (ap_listeners && ap_listeners->next);
+            if ((stat = apr_socket_opt_set(duplr->sd, APR_SO_NONBLOCK, use_nonblock))
+                != APR_SUCCESS) {
+                ap_log_perror(APLOG_MARK, APLOG_CRIT, stat, p, APLOGNO()
+                              "unable to control socket non-blocking status");
+                return stat;
+            }
+#endif
+            ap_apply_accept_filter(p, duplr, s);
+
+            if (last == NULL) {
+                mpm_listen[i] = last = duplr;
+            }
+            else {
+                last->next = duplr;
+                last = duplr;
+            }
+            lr = lr->next;
+        }
+    }
+    return APR_SUCCESS;
+}
+
  AP_DECLARE_NONSTD(void) ap_close_listeners(void)
  {
      ap_listen_rec *lr;
-
-    for (lr = ap_listeners; lr; lr = lr->next) {
-        apr_socket_close(lr->sd);
-        lr->active = 0;
+    int i;
+    for (i = 0; i < num_buckets; i++) {
+        for (lr = mpm_listen[i]; lr; lr = lr->next) {
+            apr_socket_close(lr->sd);
+            lr->active = 0;
+        }
      }
  }
  
diff --git a/server/mpm/event/event.c b/server/mpm/event/event.c

index ae247cd13c68229a62580ed0ef7a52e05b795590..779499a40ec99548b69659dab07b2b784a967cc8 100644 (file)
--- a/server/mpm/event/event.c
+++ b/server/mpm/event/event.c
@@ -59,6 +59,8 @@
  #include "apr_want.h"
  #include "apr_version.h"
  
+#include <stdlib.h>
+
  #if APR_HAVE_UNISTD_H
  #include <unistd.h>
  #endif
@@ -349,7 +351,7 @@ typedef struct event_retained_data {
       * doubled up to MAX_SPAWN_RATE, and reset only when a cycle goes by
       * without the need to spawn.
       */
-    int idle_spawn_rate;
+    int *idle_spawn_rate;
  #ifndef MAX_SPAWN_RATE
  #define MAX_SPAWN_RATE        (32)
  #endif
@@ -359,7 +361,10 @@ static event_retained_data *retained;
  
  #define ID_FROM_CHILD_THREAD(c, t)    ((c * thread_limit) + t)
  
-static ap_pod_t *pod;
+static ap_pod_t **pod;
+static ap_pod_t *child_pod;
+static ap_listen_rec *child_listen;
+static int *bucket;    /* bucket array for the httpd child processes */
  
  /* The event MPM respects a couple of runtime flags that can aid
   * in debugging. Setting the -DNO_DETACH flag will prevent the root process
@@ -1292,7 +1297,7 @@ static apr_status_t init_pollset(apr_pool_t *p)
      TO_QUEUE_INIT(short_linger_q);
  
      listener_pollfd = apr_palloc(p, sizeof(apr_pollfd_t) * num_listensocks);
-    for (lr = ap_listeners; lr != NULL; lr = lr->next, i++) {
+    for (lr = child_listen; lr != NULL; lr = lr->next, i++) {
          apr_pollfd_t *pfd;
          AP_DEBUG_ASSERT(i < num_listensocks);
          pfd = &listener_pollfd[i];
@@ -2421,6 +2426,8 @@ static void child_main(int child_num_arg)
      apr_threadattr_t *thread_attr;
      apr_thread_t *start_thread_id;
      apr_pool_t *pskip;
+    int i;
+    ap_listen_rec *lr;
  
      mpm_state = AP_MPMQ_STARTING;       /* for benefit of any hooks that run as this
                                           * child initializes
@@ -2429,6 +2436,19 @@ static void child_main(int child_num_arg)
      ap_fatal_signal_child_setup(ap_server_conf);
      apr_pool_create(&pchild, pconf);
  
+    /* close unused listeners and pods */
+    for (i = 0; i < num_buckets; i++) {
+        if (i != bucket[child_num_arg]) {
+            lr = mpm_listen[i];
+            while(lr) {
+                apr_socket_close(lr->sd);
+                lr = lr->next;
+            }
+            mpm_listen[i]->active = 0;
+            ap_mpm_podx_close(pod[i]);
+        }
+    }
+
      /*stuff to do before we switch id's, so we have permissions. */
      ap_reopen_scoreboard(pchild, NULL, 0);
  
@@ -2539,7 +2559,7 @@ static void child_main(int child_num_arg)
          apr_signal(SIGTERM, dummy_signal_handler);
          /* Watch for any messages from the parent over the POD */
          while (1) {
-            rv = ap_mpm_podx_check(pod);
+            rv = ap_mpm_podx_check(child_pod);
              if (rv == AP_MPM_PODX_NORESTART) {
                  /* see if termination was triggered while we slept */
                  switch (terminate_mode) {
@@ -2592,6 +2612,9 @@ static int make_child(server_rec * s, int slot)
          /* NOTREACHED */
      }
  
+    child_listen = mpm_listen[bucket[slot]];
+    child_pod = pod[bucket[slot]];
+
      if ((pid = fork()) == -1) {
          ap_log_error(APLOG_MARK, APLOG_ERR, errno, s, APLOGNO(00481)
                       "fork: Unable to fork new process");
@@ -2652,6 +2675,7 @@ static void startup_children(int number_to_start)
          if (ap_scoreboard_image->parent[i].pid != 0) {
              continue;
          }
+        bucket[i] = i % num_buckets;
          if (make_child(ap_server_conf, i) < 0) {
              break;
          }
@@ -2659,7 +2683,7 @@ static void startup_children(int number_to_start)
      }
  }
  
-static void perform_idle_server_maintenance(void)
+static void perform_idle_server_maintenance(int child_bucket)
  {
      int i, j;
      int idle_thread_count;
@@ -2689,7 +2713,7 @@ static void perform_idle_server_maintenance(void)
          int child_threads_active = 0;
  
          if (i >= retained->max_daemons_limit
-            && totally_free_length == retained->idle_spawn_rate)
+            && totally_free_length == retained->idle_spawn_rate[child_bucket])
              /* short cut if all active processes have been examined and
               * enough empty scoreboard slots have been found
               */
@@ -2716,7 +2740,8 @@ static void perform_idle_server_maintenance(void)
              if (ps->pid != 0) { /* XXX just set all_dead_threads in outer
                                     for loop if no pid?  not much else matters */
                  if (status <= SERVER_READY && !ps->quiescing && !ps->not_accepting
-                    && ps->generation == retained->my_generation)
+                    && ps->generation == retained->my_generation &&
+                    bucket[i] == child_bucket)
                  {
                      ++idle_thread_count;
                  }
@@ -2727,8 +2752,8 @@ static void perform_idle_server_maintenance(void)
          }
          active_thread_count += child_threads_active;
          if (any_dead_threads
-            && totally_free_length < retained->idle_spawn_rate
-            && free_length < MAX_SPAWN_RATE
+            && totally_free_length < retained->idle_spawn_rate[child_bucket]
+            && free_length < MAX_SPAWN_RATE/num_buckets
              && (!ps->pid      /* no process in the slot */
                    || ps->quiescing)) {  /* or at least one is going away */
              if (all_dead_threads) {
@@ -2784,12 +2809,12 @@ static void perform_idle_server_maintenance(void)
  
      retained->max_daemons_limit = last_non_dead + 1;
  
-    if (idle_thread_count > max_spare_threads) {
+    if (idle_thread_count > max_spare_threads/num_buckets) {
          /* Kill off one child */
-        ap_mpm_podx_signal(pod, AP_MPM_PODX_GRACEFUL);
-        retained->idle_spawn_rate = 1;
+        ap_mpm_podx_signal(pod[child_bucket], AP_MPM_PODX_GRACEFUL);
+        retained->idle_spawn_rate[child_bucket] = 1;
      }
-    else if (idle_thread_count < min_spare_threads) {
+    else if (idle_thread_count < min_spare_threads/num_buckets) {
          /* terminate the free list */
          if (free_length == 0) { /* scoreboard is full, can't fork */
  
@@ -2807,13 +2832,13 @@ static void perform_idle_server_maintenance(void)
                  ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf, APLOGNO(00485)
                               "scoreboard is full, not at MaxRequestWorkers");
              }
-            retained->idle_spawn_rate = 1;
+            retained->idle_spawn_rate[child_bucket] = 1;
          }
          else {
-            if (free_length > retained->idle_spawn_rate) {
-                free_length = retained->idle_spawn_rate;
+            if (free_length > retained->idle_spawn_rate[child_bucket]) {
+                free_length = retained->idle_spawn_rate[child_bucket];
              }
-            if (retained->idle_spawn_rate >= 8) {
+            if (retained->idle_spawn_rate[child_bucket] >= 8) {
                  ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, APLOGNO(00486)
                               "server seems busy, (you may need "
                               "to increase StartServers, ThreadsPerChild "
@@ -2823,6 +2848,7 @@ static void perform_idle_server_maintenance(void)
                               idle_thread_count, total_non_dead);
              }
              for (i = 0; i < free_length; ++i) {
+                bucket[free_slots[i]] = child_bucket;
                  make_child(ap_server_conf, free_slots[i]);
              }
              /* the next time around we want to spawn twice as many if this
@@ -2831,13 +2857,13 @@ static void perform_idle_server_maintenance(void)
              if (retained->hold_off_on_exponential_spawning) {
                  --retained->hold_off_on_exponential_spawning;
              }
-            else if (retained->idle_spawn_rate < MAX_SPAWN_RATE) {
-                retained->idle_spawn_rate *= 2;
+            else if (retained->idle_spawn_rate[child_bucket] < MAX_SPAWN_RATE/num_buckets) {
+                retained->idle_spawn_rate[child_bucket] *= 2;
              }
          }
      }
      else {
-        retained->idle_spawn_rate = 1;
+        retained->idle_spawn_rate[child_bucket] = 1;
      }
  }
  
@@ -2894,7 +2920,7 @@ static void server_main_loop(int remaining_children_to_start)
                  ap_scoreboard_image->parent[child_slot].quiescing = 0;
                  if (processed_status == APEXIT_CHILDSICK) {
                      /* resource shortage, minimize the fork rate */
-                    retained->idle_spawn_rate = 1;
+                    retained->idle_spawn_rate[bucket[child_slot]] = 1;
                  }
                  else if (remaining_children_to_start
                           && child_slot < ap_daemons_limit) {
@@ -2912,7 +2938,9 @@ static void server_main_loop(int remaining_children_to_start)
                  if (processed_status == APEXIT_CHILDSICK
                      && old_gen == retained->my_generation) {
                      /* resource shortage, minimize the fork rate */
-                    retained->idle_spawn_rate = 1;
+                    for (i = 0; i < num_buckets; i++) {
+                        retained->idle_spawn_rate[i] = 1;
+                    }
                  }
  #if APR_HAS_OTHER_CHILD
              }
@@ -2951,7 +2979,9 @@ static void server_main_loop(int remaining_children_to_start)
              continue;
          }
  
-        perform_idle_server_maintenance();
+        for (i = 0; i < num_buckets; i++) {
+            perform_idle_server_maintenance(i);
+        }
      }
  }
  
@@ -2959,6 +2989,8 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
  {
      int remaining_children_to_start;
  
+    int i;
+
      ap_log_pid(pconf, ap_pid_fname);
  
      if (!retained->is_graceful) {
@@ -2972,11 +3004,13 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
          ap_scoreboard_image->global->running_generation = retained->my_generation;
      }
  
+    bucket = apr_palloc(_pconf, sizeof(int) *  ap_daemons_limit);
+
      restart_pending = shutdown_pending = 0;
      set_signals();
      /* Don't thrash... */
-    if (max_spare_threads < min_spare_threads + threads_per_child)
-        max_spare_threads = min_spare_threads + threads_per_child;
+    if (max_spare_threads < min_spare_threads + threads_per_child * num_buckets)
+        max_spare_threads = min_spare_threads + threads_per_child * num_buckets;
  
      /* If we're doing a graceful_restart then we're going to see a lot
       * of children exiting immediately when we get into the main loop
@@ -3017,7 +3051,9 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
          /* Time to shut down:
           * Kill child processes, tell them to call child_exit, etc...
           */
-        ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_RESTART);
+        for (i = 0; i < num_buckets; i++) {
+            ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_RESTART);
+        }
          ap_reclaim_child_processes(1, /* Start with SIGTERM */
                                     event_note_child_killed);
  
@@ -3038,7 +3074,9 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
  
          /* Close our listeners, and then ask our children to do same */
          ap_close_listeners();
-        ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_GRACEFUL);
+        for (i = 0; i < num_buckets; i++) {
+            ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_GRACEFUL);
+        }
          ap_relieve_child_processes(event_note_child_killed);
  
          if (!child_fatal) {
@@ -3078,7 +3116,9 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
           * way, try and make sure that all of our processes are
           * really dead.
           */
-        ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_RESTART);
+        for (i = 0; i < num_buckets; i++) {
+            ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_RESTART);
+        }
          ap_reclaim_child_processes(1, event_note_child_killed);
  
          return DONE;
@@ -3104,8 +3144,9 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
                       AP_SIG_GRACEFUL_STRING
                       " received.  Doing graceful restart");
          /* wake up the children...time to die.  But we'll have more soon */
-        ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_GRACEFUL);
-
+        for (i = 0; i < num_buckets; i++) {
+            ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_GRACEFUL);
+        }
  
          /* This is mostly for debugging... so that we know what is still
           * gracefully dealing with existing request.
@@ -3117,7 +3158,9 @@ static int event_run(apr_pool_t * _pconf, apr_pool_t * plog, server_rec * s)
           * and a SIGHUP, we may as well use the same signal, because some user
           * pthreads are stealing signals from us left and right.
           */
-        ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_RESTART);
+        for (i = 0; i < num_buckets; i++) {
+            ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_RESTART);
+        }
  
          ap_reclaim_child_processes(1,  /* Start with SIGTERM */
                                     event_note_child_killed);
@@ -3137,6 +3180,8 @@ static int event_open_logs(apr_pool_t * p, apr_pool_t * plog,
      int startup = 0;
      int level_flags = 0;
      apr_status_t rv;
+    int i;
+    int num_of_cores = 0;
  
      pconf = p;
  
@@ -3146,6 +3191,8 @@ static int event_open_logs(apr_pool_t * p, apr_pool_t * plog,
          level_flags |= APLOG_STARTUP;
      }
  
+    enable_default_listener = 0;
+
      if ((num_listensocks = ap_setup_listeners(ap_server_conf)) < 1) {
          ap_log_error(APLOG_MARK, APLOG_ALERT | level_flags, 0,
                       (startup ? NULL : s),
@@ -3153,12 +3200,36 @@ static int event_open_logs(apr_pool_t * p, apr_pool_t * plog,
          return DONE;
      }
  
+    enable_default_listener = 1;
+    if (have_so_reuseport) {
+#ifdef _SC_NPROCESSORS_ONLN
+        num_of_cores = sysconf(_SC_NPROCESSORS_ONLN);
+#else
+        num_of_cores = 1;
+#endif
+        if (num_of_cores > 8) {
+            num_buckets = num_of_cores/8;
+        }
+        else {
+            num_buckets = 1;
+        }
+    }
+    else {
+        num_buckets = 1;
+    }
+
+    ap_duplicate_listeners(ap_server_conf, pconf, num_buckets);
+
+    pod = apr_palloc(pconf, sizeof(ap_pod_t *) * num_buckets);
+
      if (!one_process) {
-        if ((rv = ap_mpm_podx_open(pconf, &pod))) {
-            ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
-                         (startup ? NULL : s),
-                         "could not open pipe-of-death");
-            return DONE;
+        for (i = 0; i < num_buckets; i++) {
+            if ((rv = ap_mpm_podx_open(pconf, &pod[i]))) {
+                ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
+                             (startup ? NULL : s),
+                             "could not open pipe-of-death");
+                return DONE;
+            }
          }
      }
      /* for skiplist */
@@ -3172,6 +3243,7 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
      int no_detach, debug, foreground;
      apr_status_t rv;
      const char *userdata_key = "mpm_event_module";
+    int i;
  
      mpm_state = AP_MPMQ_STARTING;
  
@@ -3192,7 +3264,6 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
      if (!retained) {
          retained = ap_retained_data_create(userdata_key, sizeof(*retained));
          retained->max_daemons_limit = -1;
-        retained->idle_spawn_rate = 1;
      }
      ++retained->module_loads;
      if (retained->module_loads == 2) {
@@ -3206,6 +3277,10 @@ static int event_pre_config(apr_pool_t * pconf, apr_pool_t * plog,
                           "atomics not working as expected - add32 of negative number");
              return HTTP_INTERNAL_SERVER_ERROR;
          }
+        retained->idle_spawn_rate = apr_palloc(pconf, sizeof(int) * num_buckets);
+        for (i = 0; i< num_buckets; i++) {
+            retained->idle_spawn_rate[i] = 1;
+        }
          rv = apr_pollset_create(&event_pollset, 1, plog,
                                  APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY);
          if (rv != APR_SUCCESS) {
diff --git a/server/mpm/prefork/prefork.c b/server/mpm/prefork/prefork.c

index ae0fd37461f95c47313344e73333dcc408b2dfbd..05a782e69a2184b23d0f7442c184f8b788d8e389 100644 (file)
--- a/server/mpm/prefork/prefork.c
+++ b/server/mpm/prefork/prefork.c
@@ -48,6 +48,8 @@
  #include "ap_mmn.h"
  #include "apr_poll.h"
  
+#include <stdlib.h>
+
  #ifdef HAVE_TIME_H
  #include <time.h>
  #endif
@@ -86,14 +88,19 @@
  
  /* config globals */
  
-static apr_proc_mutex_t *accept_mutex;
+static apr_proc_mutex_t **accept_mutex;
  static int ap_daemons_to_start=0;
  static int ap_daemons_min_free=0;
  static int ap_daemons_max_free=0;
  static int ap_daemons_limit=0;      /* MaxRequestWorkers */
  static int server_limit = 0;
  static int mpm_state = AP_MPMQ_STARTING;
-static ap_pod_t *pod;
+static ap_pod_t **pod;
+static ap_pod_t *child_pod;
+static apr_proc_mutex_t *child_mutex;
+static ap_listen_rec *child_listen;
+static int *bucket;    /* bucket array for the httpd child processes */
+
  
  /* data retained by prefork across load/unload of the module
   * allocated on first call to pre-config hook; located on
@@ -222,14 +229,14 @@ static void clean_child_exit(int code)
          prefork_note_child_killed(/* slot */ 0, 0, 0);
      }
  
-    ap_mpm_pod_close(pod);
+    ap_mpm_pod_close(child_pod);
      chdir_for_gprof();
      exit(code);
  }
  
  static void accept_mutex_on(void)
  {
-    apr_status_t rv = apr_proc_mutex_lock(accept_mutex);
+    apr_status_t rv = apr_proc_mutex_lock(child_mutex);
      if (rv != APR_SUCCESS) {
          const char *msg = "couldn't grab the accept mutex";
  
@@ -247,7 +254,7 @@ static void accept_mutex_on(void)
  
  static void accept_mutex_off(void)
  {
-    apr_status_t rv = apr_proc_mutex_unlock(accept_mutex);
+    apr_status_t rv = apr_proc_mutex_unlock(child_mutex);
      if (rv != APR_SUCCESS) {
          const char *msg = "couldn't release the accept mutex";
  
@@ -272,7 +279,7 @@ static void accept_mutex_off(void)
   * when it's safe in the single Listen case.
   */
  #ifdef SINGLE_LISTEN_UNSERIALIZED_ACCEPT
-#define SAFE_ACCEPT(stmt) do {if (ap_listeners->next) {stmt;}} while(0)
+#define SAFE_ACCEPT(stmt) do {if (child_listen->next) {stmt;}} while(0)
  #else
  #define SAFE_ACCEPT(stmt) do {stmt;} while(0)
  #endif
@@ -521,10 +528,23 @@ static void child_main(int child_num_arg)
      apr_pool_create(&ptrans, pchild);
      apr_pool_tag(ptrans, "transaction");
  
+/* close unused listeners and pods */
+    for (i = 0; i < num_buckets; i++) {
+        if (i != bucket[my_child_num]) {
+            lr = mpm_listen[i];
+            while(lr) {
+                apr_socket_close(lr->sd);
+                lr = lr->next;
+            }
+            mpm_listen[i]->active = 0;
+            ap_mpm_pod_close(pod[i]);
+        }
+    }
+
      /* needs to be done before we switch UIDs so we have permissions */
      ap_reopen_scoreboard(pchild, NULL, 0);
-    lockfile = apr_proc_mutex_lockfile(accept_mutex);
-    status = apr_proc_mutex_child_init(&accept_mutex,
+    lockfile = apr_proc_mutex_lockfile(child_mutex);
+    status = apr_proc_mutex_child_init(&child_mutex,
                                         lockfile,
                                         pchild);
      if (status != APR_SUCCESS) {
@@ -532,7 +552,7 @@ static void child_main(int child_num_arg)
                       "Couldn't initialize cross-process lock in child "
                       "(%s) (%s)",
                       lockfile ? lockfile : "none",
-                     apr_proc_mutex_name(accept_mutex));
+                     apr_proc_mutex_name(child_mutex));
          clean_child_exit(APEXIT_CHILDFATAL);
      }
  
@@ -554,7 +574,7 @@ static void child_main(int child_num_arg)
          clean_child_exit(APEXIT_CHILDSICK); /* assume temporary resource issue */
      }
  
-    for (lr = ap_listeners, i = num_listensocks; i--; lr = lr->next) {
+    for (lr = child_listen, i = num_listensocks; i--; lr = lr->next) {
          apr_pollfd_t pfd = { 0 };
  
          pfd.desc_type = APR_POLL_SOCKET;
@@ -612,7 +632,7 @@ static void child_main(int child_num_arg)
  
          if (num_listensocks == 1) {
              /* There is only one listener record, so refer to that one. */
-            lr = ap_listeners;
+            lr = child_listen;
          }
          else {
              /* multiple listening sockets - need to poll */
@@ -710,7 +730,7 @@ static void child_main(int child_num_arg)
           * while we were processing the connection or we are the lucky
           * idle server process that gets to die.
           */
-        if (ap_mpm_pod_check(pod) == APR_SUCCESS) { /* selected as idle? */
+        if (ap_mpm_pod_check(child_pod) == APR_SUCCESS) { /* selected as idle? */
              die_now = 1;
          }
          else if (retained->my_generation !=
@@ -750,6 +770,9 @@ static int make_child(server_rec *s, int slot)
      (void) ap_update_child_status_from_indexes(slot, 0, SERVER_STARTING,
                                                 (request_rec *) NULL);
  
+    child_listen = mpm_listen[bucket[slot]];
+    child_mutex = accept_mutex[bucket[slot]];
+    child_pod = pod[bucket[slot]];
  
  #ifdef _OSD_POSIX
      /* BS2000 requires a "special" version of fork() before a setuid() call */
@@ -815,6 +838,7 @@ static void startup_children(int number_to_start)
          if (ap_scoreboard_image->servers[i][0].status != SERVER_DEAD) {
              continue;
          }
+        bucket[i] = i % num_buckets;
          if (make_child(ap_server_conf, i) < 0) {
              break;
          }
@@ -822,6 +846,8 @@ static void startup_children(int number_to_start)
      }
  }
  
+static int bucket_make_child_record = -1;
+static int bucket_kill_child_record = -1;
  static void perform_idle_server_maintenance(apr_pool_t *p)
  {
      int i;
@@ -874,7 +900,8 @@ static void perform_idle_server_maintenance(apr_pool_t *p)
           * shut down gracefully, in case it happened to pick up a request
           * while we were counting
           */
-        ap_mpm_pod_signal(pod);
+        bucket_kill_child_record = (bucket_kill_child_record + 1) % num_buckets;
+        ap_mpm_pod_signal(pod[bucket_kill_child_record]);
          retained->idle_spawn_rate = 1;
      }
      else if (idle_count < ap_daemons_min_free) {
@@ -899,6 +926,7 @@ static void perform_idle_server_maintenance(apr_pool_t *p)
                      idle_count, total_non_dead);
              }
              for (i = 0; i < free_length; ++i) {
+                bucket[free_slots[i]]= (++bucket_make_child_record) % num_buckets;
                  make_child(ap_server_conf, free_slots[i]);
              }
              /* the next time around we want to spawn twice as many if this
@@ -926,15 +954,24 @@ static int prefork_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
      int index;
      int remaining_children_to_start;
      apr_status_t rv;
+    int i;
+    ap_listen_rec *lr;
  
      ap_log_pid(pconf, ap_pid_fname);
  
-    /* Initialize cross-process accept lock */
-    rv = ap_proc_mutex_create(&accept_mutex, NULL, AP_ACCEPT_MUTEX_TYPE, NULL,
-                              s, _pconf, 0);
-    if (rv != APR_SUCCESS) {
-        mpm_state = AP_MPMQ_STOPPING;
-        return DONE;
+    bucket = apr_palloc(_pconf, sizeof(int) *  ap_daemons_limit);
+    /* Initialize cross-process accept lock for each bucket*/
+    accept_mutex = apr_palloc(_pconf, sizeof(apr_proc_mutex_t *) * num_buckets);
+    for (i = 0; i < num_buckets; i++) {
+        rv = ap_proc_mutex_create(&accept_mutex[i], NULL, AP_ACCEPT_MUTEX_TYPE, NULL,
+                                  s, _pconf, 0);
+        if (rv != APR_SUCCESS) {
+            mpm_state = AP_MPMQ_STOPPING;
+            return DONE;
+        }
+     }
+    for (lr = ap_listeners; lr; lr = lr->next) {
+        apr_socket_close(lr->sd);
      }
  
      if (!retained->is_graceful) {
@@ -953,12 +990,13 @@ static int prefork_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
  
      if (one_process) {
          AP_MONCONTROL(1);
+        bucket[0] = 0;
          make_child(ap_server_conf, 0);
          /* NOTREACHED */
      }
      else {
-    if (ap_daemons_max_free < ap_daemons_min_free + 1)  /* Don't thrash... */
-        ap_daemons_max_free = ap_daemons_min_free + 1;
+    if (ap_daemons_max_free < ap_daemons_min_free + num_buckets)  /* Don't thrash... */
+        ap_daemons_max_free = ap_daemons_min_free + num_buckets;
  
      /* If we're doing a graceful_restart then we're going to see a lot
       * of children exiting immediately when we get into the main loop
@@ -991,7 +1029,7 @@ static int prefork_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
      ap_log_command_line(plog, s);
      ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00165)
                  "Accept mutex: %s (default: %s)",
-                apr_proc_mutex_name(accept_mutex),
+                apr_proc_mutex_name(accept_mutex[0]),
                  apr_proc_mutex_defname());
  
      mpm_state = AP_MPMQ_RUNNING;
@@ -1122,7 +1160,9 @@ static int prefork_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
          ap_close_listeners();
  
          /* kill off the idle ones */
-        ap_mpm_pod_killpg(pod, retained->max_daemons_limit);
+        for (i = 0; i < num_buckets; i++) {
+            ap_mpm_pod_killpg(pod[i], retained->max_daemons_limit);
+        }
  
          /* Send SIGUSR1 to the active children */
          active_children = 0;
@@ -1196,7 +1236,9 @@ static int prefork_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
                      "Graceful restart requested, doing restart");
  
          /* kill off the idle ones */
-        ap_mpm_pod_killpg(pod, retained->max_daemons_limit);
+        for (i = 0; i < num_buckets; i++) {
+            ap_mpm_pod_killpg(pod[i], retained->max_daemons_limit);
+        }
  
          /* This is mostly for debugging... so that we know what is still
           * gracefully dealing with existing request.  This will break
@@ -1239,6 +1281,8 @@ static int prefork_open_logs(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp,
      int startup = 0;
      int level_flags = 0;
      apr_status_t rv;
+    int i;
+    int num_of_cores = 0;
  
      pconf = p;
  
@@ -1248,6 +1292,7 @@ static int prefork_open_logs(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp,
          level_flags |= APLOG_STARTUP;
      }
  
+    enable_default_listener = 0;
      if ((num_listensocks = ap_setup_listeners(ap_server_conf)) < 1) {
          ap_log_error(APLOG_MARK, APLOG_ALERT | level_flags, 0,
                       (startup ? NULL : s),
@@ -1255,12 +1300,36 @@ static int prefork_open_logs(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp,
          return DONE;
      }
  
-    if ((rv = ap_mpm_pod_open(pconf, &pod))) {
-        ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
-                     (startup ? NULL : s),
-                     "could not open pipe-of-death");
-        return DONE;
+    enable_default_listener = 1;
+    if (have_so_reuseport) {
+#ifdef _SC_NPROCESSORS_ONLN
+        num_of_cores = sysconf(_SC_NPROCESSORS_ONLN);
+#else
+        num_of_cores = 1;
+#endif
+        if (num_of_cores > 8) {
+            num_buckets = num_of_cores/8;
+        }
+        else {
+            num_buckets = 1;
+        }
      }
+    else {
+        num_buckets = 1;
+    }
+
+    ap_duplicate_listeners(ap_server_conf, pconf, num_buckets);
+
+    pod = apr_palloc(pconf, sizeof(ap_pod_t *) * num_buckets);
+    for (i = 0; i < num_buckets; i++) {
+        if ((rv = ap_mpm_pod_open(pconf, &pod[i]))) {
+            ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
+                         (startup ? NULL : s),
+                         "could not open pipe-of-death");
+            return DONE;
+        }
+     }
+
      return OK;
  }
  
diff --git a/server/mpm/worker/worker.c b/server/mpm/worker/worker.c

index 408d317650e2cd1c8638bac88e24848ca83f92d1..b90161970dbdcb7059428510f76ecc7d294e6d2e 100644 (file)
--- a/server/mpm/worker/worker.c
+++ b/server/mpm/worker/worker.c
@@ -30,6 +30,9 @@
  #include "apr_thread_mutex.h"
  #include "apr_proc_mutex.h"
  #include "apr_poll.h"
+
+#include <stdlib.h>
+
  #define APR_WANT_STRFUNC
  #include "apr_want.h"
  
@@ -159,7 +162,7 @@ typedef struct worker_retained_data {
       * doubled up to MAX_SPAWN_RATE, and reset only when a cycle goes by
       * without the need to spawn.
       */
-    int idle_spawn_rate;
+    int *idle_spawn_rate;
  #ifndef MAX_SPAWN_RATE
  #define MAX_SPAWN_RATE        (32)
  #endif
@@ -188,7 +191,8 @@ typedef struct {
  
  #define ID_FROM_CHILD_THREAD(c, t)    ((c * thread_limit) + t)
  
-static ap_pod_t *pod;
+static ap_pod_t **pod;
+static ap_pod_t *child_pod;
  
  /* The worker MPM respects a couple of runtime flags that can aid
   * in debugging. Setting the -DNO_DETACH flag will prevent the root process
@@ -218,10 +222,13 @@ static pid_t parent_pid;
  static apr_os_thread_t *listener_os_thread;
  
  /* Locks for accept serialization */
-static apr_proc_mutex_t *accept_mutex;
+static apr_proc_mutex_t **accept_mutex;
+static apr_proc_mutex_t *child_mutex;
+static ap_listen_rec *child_listen;
+static int *bucket;    /* bucket array for the httpd child processes */
  
  #ifdef SINGLE_LISTEN_UNSERIALIZED_ACCEPT
-#define SAFE_ACCEPT(stmt) (ap_listeners->next ? (stmt) : APR_SUCCESS)
+#define SAFE_ACCEPT(stmt) (child_listen->next ? (stmt) : APR_SUCCESS)
  #else
  #define SAFE_ACCEPT(stmt) (stmt)
  #endif
@@ -701,7 +708,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t *thd, void * dummy)
          clean_child_exit(APEXIT_CHILDSICK);
      }
  
-    for (lr = ap_listeners; lr != NULL; lr = lr->next) {
+    for (lr = child_listen; lr != NULL; lr = lr->next) {
          apr_pollfd_t pfd = { 0 };
  
          pfd.desc_type = APR_POLL_SOCKET;
@@ -758,7 +765,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t *thd, void * dummy)
          /* We've already decremented the idle worker count inside
           * ap_queue_info_wait_for_idler. */
  
-        if ((rv = SAFE_ACCEPT(apr_proc_mutex_lock(accept_mutex)))
+        if ((rv = SAFE_ACCEPT(apr_proc_mutex_lock(child_mutex)))
              != APR_SUCCESS) {
  
              if (!listener_may_exit) {
@@ -767,9 +774,9 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t *thd, void * dummy)
              break;                    /* skip the lock release */
          }
  
-        if (!ap_listeners->next) {
+        if (!child_listen->next) {
              /* Only one listener, so skip the poll */
-            lr = ap_listeners;
+            lr = child_listen;
          }
          else {
              while (!listener_may_exit) {
@@ -839,7 +846,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t *thd, void * dummy)
                  resource_shortage = 1;
                  signal_threads(ST_GRACEFUL);
              }
-            if ((rv = SAFE_ACCEPT(apr_proc_mutex_unlock(accept_mutex)))
+            if ((rv = SAFE_ACCEPT(apr_proc_mutex_unlock(child_mutex)))
                  != APR_SUCCESS) {
  
                  if (listener_may_exit) {
@@ -863,7 +870,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_thread_t *thd, void * dummy)
              }
          }
          else {
-            if ((rv = SAFE_ACCEPT(apr_proc_mutex_unlock(accept_mutex)))
+            if ((rv = SAFE_ACCEPT(apr_proc_mutex_unlock(child_mutex)))
                  != APR_SUCCESS) {
                  int level = APLOG_EMERG;
  
@@ -1217,6 +1224,8 @@ static void child_main(int child_num_arg)
      thread_starter *ts;
      apr_threadattr_t *thread_attr;
      apr_thread_t *start_thread_id;
+    int i;
+    ap_listen_rec *lr;
  
      mpm_state = AP_MPMQ_STARTING; /* for benefit of any hooks that run as this
                                     * child initializes
@@ -1225,11 +1234,24 @@ static void child_main(int child_num_arg)
      ap_fatal_signal_child_setup(ap_server_conf);
      apr_pool_create(&pchild, pconf);
  
+    /* close unused listeners and pods */
+    for (i = 0; i < num_buckets; i++) {
+        if (i != bucket[child_num_arg]) {
+            lr = mpm_listen[i];
+            while(lr) {
+                apr_socket_close(lr->sd);
+                lr = lr->next;
+            }
+            mpm_listen[i]->active = 0;
+            ap_mpm_podx_close(pod[i]);
+        }
+    }
+
      /*stuff to do before we switch id's, so we have permissions.*/
      ap_reopen_scoreboard(pchild, NULL, 0);
  
-    rv = SAFE_ACCEPT(apr_proc_mutex_child_init(&accept_mutex,
-                                               apr_proc_mutex_lockfile(accept_mutex),
+    rv = SAFE_ACCEPT(apr_proc_mutex_child_init(&child_mutex,
+                                               apr_proc_mutex_lockfile(child_mutex),
                                                 pchild));
      if (rv != APR_SUCCESS) {
          ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf, APLOGNO(00280)
@@ -1338,7 +1360,7 @@ static void child_main(int child_num_arg)
          apr_signal(SIGTERM, dummy_signal_handler);
          /* Watch for any messages from the parent over the POD */
          while (1) {
-            rv = ap_mpm_podx_check(pod);
+            rv = ap_mpm_podx_check(child_pod);
              if (rv == AP_MPM_PODX_NORESTART) {
                  /* see if termination was triggered while we slept */
                  switch(terminate_mode) {
@@ -1391,6 +1413,10 @@ static int make_child(server_rec *s, int slot)
          /* NOTREACHED */
      }
  
+    child_listen = mpm_listen[bucket[slot]];
+    child_mutex = accept_mutex[bucket[slot]];
+    child_pod = pod[bucket[slot]];
+
      if ((pid = fork()) == -1) {
          ap_log_error(APLOG_MARK, APLOG_ERR, errno, s, APLOGNO(00283)
                       "fork: Unable to fork new process");
@@ -1449,6 +1475,7 @@ static void startup_children(int number_to_start)
          if (ap_scoreboard_image->parent[i].pid != 0) {
              continue;
          }
+        bucket[i] = i % num_buckets;
          if (make_child(ap_server_conf, i) < 0) {
              break;
          }
@@ -1456,7 +1483,7 @@ static void startup_children(int number_to_start)
      }
  }
  
-static void perform_idle_server_maintenance(void)
+static void perform_idle_server_maintenance(int child_bucket)
  {
      int i, j;
      int idle_thread_count;
@@ -1485,7 +1512,7 @@ static void perform_idle_server_maintenance(void)
          int all_dead_threads = 1;
          int child_threads_active = 0;
  
-        if (i >= retained->max_daemons_limit && totally_free_length == retained->idle_spawn_rate)
+        if (i >= retained->max_daemons_limit && totally_free_length == retained->idle_spawn_rate[child_bucket])
              /* short cut if all active processes have been examined and
               * enough empty scoreboard slots have been found
               */
@@ -1513,7 +1540,8 @@ static void perform_idle_server_maintenance(void)
                                     loop if no pid?  not much else matters */
                  if (status <= SERVER_READY &&
                          !ps->quiescing &&
-                        ps->generation == retained->my_generation) {
+                        ps->generation == retained->my_generation &&
+                        bucket[i] == child_bucket) {
                      ++idle_thread_count;
                  }
                  if (status >= SERVER_READY && status < SERVER_GRACEFUL) {
@@ -1522,8 +1550,8 @@ static void perform_idle_server_maintenance(void)
              }
          }
          active_thread_count += child_threads_active;
-        if (any_dead_threads && totally_free_length < retained->idle_spawn_rate
-                && free_length < MAX_SPAWN_RATE
+        if (any_dead_threads && totally_free_length < retained->idle_spawn_rate[child_bucket]
+                && free_length < MAX_SPAWN_RATE/num_buckets
                  && (!ps->pid               /* no process in the slot */
                      || ps->quiescing)) {   /* or at least one is going away */
              if (all_dead_threads) {
@@ -1579,12 +1607,12 @@ static void perform_idle_server_maintenance(void)
  
      retained->max_daemons_limit = last_non_dead + 1;
  
-    if (idle_thread_count > max_spare_threads) {
+    if (idle_thread_count > max_spare_threads/num_buckets) {
          /* Kill off one child */
-        ap_mpm_podx_signal(pod, AP_MPM_PODX_GRACEFUL);
-        retained->idle_spawn_rate = 1;
+        ap_mpm_podx_signal(pod[child_bucket], AP_MPM_PODX_GRACEFUL);
+        retained->idle_spawn_rate[child_bucket] = 1;
      }
-    else if (idle_thread_count < min_spare_threads) {
+    else if (idle_thread_count < min_spare_threads/num_buckets) {
          /* terminate the free list */
          if (free_length == 0) { /* scoreboard is full, can't fork */
  
@@ -1615,13 +1643,13 @@ static void perform_idle_server_maintenance(void)
                               ap_server_conf, APLOGNO(00288)
                               "scoreboard is full, not at MaxRequestWorkers");
              }
-            retained->idle_spawn_rate = 1;
+            retained->idle_spawn_rate[child_bucket] = 1;
          }
          else {
-            if (free_length > retained->idle_spawn_rate) {
-                free_length = retained->idle_spawn_rate;
+            if (free_length > retained->idle_spawn_rate[child_bucket]) {
+                free_length = retained->idle_spawn_rate[child_bucket];
              }
-            if (retained->idle_spawn_rate >= 8) {
+            if (retained->idle_spawn_rate[child_bucket] >= 8) {
                  ap_log_error(APLOG_MARK, APLOG_INFO, 0,
                               ap_server_conf, APLOGNO(00289)
                               "server seems busy, (you may need "
@@ -1632,6 +1660,7 @@ static void perform_idle_server_maintenance(void)
                               idle_thread_count, total_non_dead);
              }
              for (i = 0; i < free_length; ++i) {
+                bucket[free_slots[i]] = child_bucket;
                  make_child(ap_server_conf, free_slots[i]);
              }
              /* the next time around we want to spawn twice as many if this
@@ -1640,13 +1669,13 @@ static void perform_idle_server_maintenance(void)
              if (retained->hold_off_on_exponential_spawning) {
                  --retained->hold_off_on_exponential_spawning;
              }
-            else if (retained->idle_spawn_rate < MAX_SPAWN_RATE) {
-                retained->idle_spawn_rate *= 2;
+            else if (retained->idle_spawn_rate[child_bucket] < MAX_SPAWN_RATE/num_buckets) {
+                retained->idle_spawn_rate[child_bucket] *= 2;
              }
          }
      }
      else {
-      retained->idle_spawn_rate = 1;
+      retained->idle_spawn_rate[child_bucket] = 1;
      }
  }
  
@@ -1702,7 +1731,7 @@ static void server_main_loop(int remaining_children_to_start)
                  ap_scoreboard_image->parent[child_slot].quiescing = 0;
                  if (processed_status == APEXIT_CHILDSICK) {
                      /* resource shortage, minimize the fork rate */
-                    retained->idle_spawn_rate = 1;
+                    retained->idle_spawn_rate[bucket[child_slot]] = 1;
                  }
                  else if (remaining_children_to_start
                      && child_slot < ap_daemons_limit) {
@@ -1719,7 +1748,9 @@ static void server_main_loop(int remaining_children_to_start)
                  if (processed_status == APEXIT_CHILDSICK
                      && old_gen == retained->my_generation) {
                      /* resource shortage, minimize the fork rate */
-                    retained->idle_spawn_rate = 1;
+                    for (i = 0; i < num_buckets; i++) {
+                        retained->idle_spawn_rate[i] = 1;
+                    }
                  }
  #if APR_HAS_OTHER_CHILD
              }
@@ -1758,7 +1789,9 @@ static void server_main_loop(int remaining_children_to_start)
              continue;
          }
  
-        perform_idle_server_maintenance();
+        for (i = 0; i < num_buckets; i++) {
+            perform_idle_server_maintenance(i);
+        }
      }
  }
  
@@ -1766,16 +1799,25 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
  {
      int remaining_children_to_start;
      apr_status_t rv;
+    int i;
+    ap_listen_rec *lr;
  
      ap_log_pid(pconf, ap_pid_fname);
  
+    bucket = apr_palloc(_pconf, sizeof(int) *  ap_daemons_limit);
      /* Initialize cross-process accept lock */
-    rv = ap_proc_mutex_create(&accept_mutex, NULL, AP_ACCEPT_MUTEX_TYPE, NULL,
-                              s, _pconf, 0);
-    if (rv != APR_SUCCESS) {
-        mpm_state = AP_MPMQ_STOPPING;
-        return DONE;
+    accept_mutex = apr_palloc(_pconf, sizeof(apr_proc_mutex_t *) * num_buckets);
+    for (i = 0; i < num_buckets; i++) {
+        rv = ap_proc_mutex_create(&accept_mutex[i], NULL, AP_ACCEPT_MUTEX_TYPE, NULL,
+                                  s, _pconf, 0);
+        if (rv != APR_SUCCESS) {
+            mpm_state = AP_MPMQ_STOPPING;
+            return DONE;
+        }
      }
+    for (lr = ap_listeners; lr; lr = lr->next) {
+        apr_socket_close(lr->sd);
+     }
  
      if (!retained->is_graceful) {
          if (ap_run_pre_mpm(s->process->pool, SB_SHARED) != OK) {
@@ -1791,8 +1833,8 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
      restart_pending = shutdown_pending = 0;
      set_signals();
      /* Don't thrash... */
-    if (max_spare_threads < min_spare_threads + threads_per_child)
-        max_spare_threads = min_spare_threads + threads_per_child;
+    if (max_spare_threads < min_spare_threads + threads_per_child * num_buckets)
+        max_spare_threads = min_spare_threads + threads_per_child * num_buckets;
  
      /* If we're doing a graceful_restart then we're going to see a lot
       * of children exiting immediately when we get into the main loop
@@ -1825,7 +1867,7 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
      ap_log_command_line(plog, s);
      ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00294)
                  "Accept mutex: %s (default: %s)",
-                apr_proc_mutex_name(accept_mutex),
+                apr_proc_mutex_name(accept_mutex[0]),
                  apr_proc_mutex_defname());
      mpm_state = AP_MPMQ_RUNNING;
  
@@ -1836,7 +1878,9 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
          /* Time to shut down:
           * Kill child processes, tell them to call child_exit, etc...
           */
-        ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_RESTART);
+        for (i = 0; i < num_buckets; i++) {
+            ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_RESTART);
+        }
          ap_reclaim_child_processes(1, /* Start with SIGTERM */
                                     worker_note_child_killed);
  
@@ -1857,7 +1901,9 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
  
          /* Close our listeners, and then ask our children to do same */
          ap_close_listeners();
-        ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_GRACEFUL);
+        for (i = 0; i < num_buckets; i++) {
+            ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_GRACEFUL);
+        }
          ap_relieve_child_processes(worker_note_child_killed);
  
          if (!child_fatal) {
@@ -1897,7 +1943,9 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
           * way, try and make sure that all of our processes are
           * really dead.
           */
-        ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_RESTART);
+        for (i = 0; i < num_buckets; i++) {
+            ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_RESTART);
+        }
          ap_reclaim_child_processes(1, worker_note_child_killed);
  
          return DONE;
@@ -1922,8 +1970,9 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
          ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00297)
                       AP_SIG_GRACEFUL_STRING " received.  Doing graceful restart");
          /* wake up the children...time to die.  But we'll have more soon */
-        ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_GRACEFUL);
-
+        for (i = 0; i < num_buckets; i++) {
+            ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_GRACEFUL);
+        }
  
          /* This is mostly for debugging... so that we know what is still
           * gracefully dealing with existing request.
@@ -1935,7 +1984,9 @@ static int worker_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
           * and a SIGHUP, we may as well use the same signal, because some user
           * pthreads are stealing signals from us left and right.
           */
-        ap_mpm_podx_killpg(pod, ap_daemons_limit, AP_MPM_PODX_RESTART);
+        for (i = 0; i < num_buckets; i++) {
+            ap_mpm_podx_killpg(pod[i], ap_daemons_limit, AP_MPM_PODX_RESTART);
+        }
  
          ap_reclaim_child_processes(1, /* Start with SIGTERM */
                                     worker_note_child_killed);
@@ -1954,6 +2005,8 @@ static int worker_open_logs(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp,
      int startup = 0;
      int level_flags = 0;
      apr_status_t rv;
+    int i;
+    int num_of_cores = 0;
  
      pconf = p;
  
@@ -1963,19 +2016,42 @@ static int worker_open_logs(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp,
          level_flags |= APLOG_STARTUP;
      }
  
+    enable_default_listener = 0;
      if ((num_listensocks = ap_setup_listeners(ap_server_conf)) < 1) {
          ap_log_error(APLOG_MARK, APLOG_ALERT | level_flags, 0,
                       (startup ? NULL : s),
                       "no listening sockets available, shutting down");
          return DONE;
      }
+    enable_default_listener = 1;
+    if (have_so_reuseport) {
+#ifdef _SC_NPROCESSORS_ONLN
+        num_of_cores = sysconf(_SC_NPROCESSORS_ONLN);
+#else
+        num_of_cores = 1;
+#endif
+        if (num_of_cores > 8) {
+            num_buckets = num_of_cores/8;
+        }
+        else {
+            num_buckets = 1;
+        }
+    }
+    else {
+        num_buckets = 1;
+    }
+
+    ap_duplicate_listeners(ap_server_conf, pconf, num_buckets);
  
+    pod = apr_palloc(pconf, sizeof(ap_pod_t *) * num_buckets);
      if (!one_process) {
-        if ((rv = ap_mpm_podx_open(pconf, &pod))) {
-            ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
-                         (startup ? NULL : s),
-                         "could not open pipe-of-death");
-            return DONE;
+        for (i = 0; i < num_buckets; i++) {
+            if ((rv = ap_mpm_podx_open(pconf, &pod[i]))) {
+                ap_log_error(APLOG_MARK, APLOG_CRIT | level_flags, rv,
+                             (startup ? NULL : s),
+                             "could not open pipe-of-death");
+                return DONE;
+            }
          }
      }
      return OK;
@@ -1987,6 +2063,7 @@ static int worker_pre_config(apr_pool_t *pconf, apr_pool_t *plog,
      int no_detach, debug, foreground;
      apr_status_t rv;
      const char *userdata_key = "mpm_worker_module";
+    int i;
  
      mpm_state = AP_MPMQ_STARTING;
  
@@ -2009,7 +2086,6 @@ static int worker_pre_config(apr_pool_t *pconf, apr_pool_t *plog,
      if (!retained) {
          retained = ap_retained_data_create(userdata_key, sizeof(*retained));
          retained->max_daemons_limit = -1;
-        retained->idle_spawn_rate = 1;
      }
      ++retained->module_loads;
      if (retained->module_loads == 2) {
@@ -2023,6 +2099,10 @@ static int worker_pre_config(apr_pool_t *pconf, apr_pool_t *plog,
                               "apr_proc_detach failed");
                  return HTTP_INTERNAL_SERVER_ERROR;
              }
+            retained->idle_spawn_rate = apr_palloc(pconf, sizeof(int) * num_buckets);
+            for (i = 0; i< num_buckets; i++) {
+                retained->idle_spawn_rate[i] = 1;
+            }
          }
      }
  
diff --git a/server/mpm_unix.c b/server/mpm_unix.c

index 0000cb6672c3f867952dd063d79f3de764b49fb6..97e3e65dff6056a97368be4615d43fda207a09bd 100644 (file)
--- a/server/mpm_unix.c
+++ b/server/mpm_unix.c
@@ -615,6 +615,7 @@ static apr_status_t dummy_connection(ap_pod_t *pod)
      apr_pool_t *p;
      apr_size_t len;
      ap_listen_rec *lp;
+    int i;
  
      /* create a temporary pool for the socket.  pconf stays around too long */
      rv = apr_pool_create(&p, pod->p);
@@ -626,87 +627,89 @@ static apr_status_t dummy_connection(ap_pod_t *pod)
       * plain-HTTP, not SSL; using an SSL port would either be
       * expensive to do correctly (performing a complete SSL handshake)
       * or cause log spam by doing incorrectly (simply sending EOF). */
-    lp = ap_listeners;
-    while (lp && lp->protocol && strcasecmp(lp->protocol, "http") != 0) {
-        lp = lp->next;
-    }
-    if (!lp) {
-        lp = ap_listeners;
-    }
+    for (i = 0; i < num_buckets; i++) {
+        lp = mpm_listen[i];
+        while (lp && lp->protocol && strcasecmp(lp->protocol, "http") != 0) {
+            lp = lp->next;
+        }
+        if (!lp) {
+            lp = mpm_listen[i];
+        }
  
-    rv = apr_socket_create(&sock, lp->bind_addr->family, SOCK_STREAM, 0, p);
-    if (rv != APR_SUCCESS) {
-        ap_log_error(APLOG_MARK, APLOG_WARNING, rv, ap_server_conf, APLOGNO(00054)
-                     "get socket to connect to listener");
-        apr_pool_destroy(p);
-        return rv;
-    }
+        rv = apr_socket_create(&sock, lp->bind_addr->family, SOCK_STREAM, 0, p);
+        if (rv != APR_SUCCESS) {
+            ap_log_error(APLOG_MARK, APLOG_WARNING, rv, ap_server_conf, APLOGNO(00054)
+                         "get socket to connect to listener");
+            apr_pool_destroy(p);
+            return rv;
+        }
  
-    /* on some platforms (e.g., FreeBSD), the kernel won't accept many
-     * queued connections before it starts blocking local connects...
-     * we need to keep from blocking too long and instead return an error,
-     * because the MPM won't want to hold up a graceful restart for a
-     * long time
-     */
-    rv = apr_socket_timeout_set(sock, apr_time_from_sec(3));
-    if (rv != APR_SUCCESS) {
-        ap_log_error(APLOG_MARK, APLOG_WARNING, rv, ap_server_conf, APLOGNO(00055)
-                     "set timeout on socket to connect to listener");
-        apr_socket_close(sock);
-        apr_pool_destroy(p);
-        return rv;
-    }
+        /* on some platforms (e.g., FreeBSD), the kernel won't accept many
+         * queued connections before it starts blocking local connects...
+         * we need to keep from blocking too long and instead return an error,
+         * because the MPM won't want to hold up a graceful restart for a
+         * long time
+         */
+        rv = apr_socket_timeout_set(sock, apr_time_from_sec(3));
+        if (rv != APR_SUCCESS) {
+            ap_log_error(APLOG_MARK, APLOG_WARNING, rv, ap_server_conf, APLOGNO(00055)
+                         "set timeout on socket to connect to listener");
+            apr_socket_close(sock);
+            apr_pool_destroy(p);
+            return rv;
+        }
  
-    rv = apr_socket_connect(sock, lp->bind_addr);
-    if (rv != APR_SUCCESS) {
-        int log_level = APLOG_WARNING;
-
-        if (APR_STATUS_IS_TIMEUP(rv)) {
-            /* probably some server processes bailed out already and there
-             * is nobody around to call accept and clear out the kernel
-             * connection queue; usually this is not worth logging
-             */
-            log_level = APLOG_DEBUG;
+        rv = apr_socket_connect(sock, lp->bind_addr);
+        if (rv != APR_SUCCESS) {
+            int log_level = APLOG_WARNING;
+
+            if (APR_STATUS_IS_TIMEUP(rv)) {
+                /* probably some server processes bailed out already and there
+                 * is nobody around to call accept and clear out the kernel
+                 * connection queue; usually this is not worth logging
+                 */
+                log_level = APLOG_DEBUG;
+            }
+
+            ap_log_error(APLOG_MARK, log_level, rv, ap_server_conf, APLOGNO(00056)
+                         "connect to listener on %pI", lp->bind_addr);
+            apr_pool_destroy(p);
+            return rv;
          }
  
-        ap_log_error(APLOG_MARK, log_level, rv, ap_server_conf, APLOGNO(00056)
-                     "connect to listener on %pI", lp->bind_addr);
-        apr_pool_destroy(p);
-        return rv;
-    }
+        if (lp->protocol && strcasecmp(lp->protocol, "https") == 0) {
+            /* Send a TLS 1.0 close_notify alert.  This is perhaps the
+             * "least wrong" way to open and cleanly terminate an SSL
+             * connection.  It should "work" without noisy error logs if
+             * the server actually expects SSLv3/TLSv1.  With
+             * SSLv23_server_method() OpenSSL's SSL_accept() fails
+             * ungracefully on receipt of this message, since it requires
+             * an 11-byte ClientHello message and this is too short. */
+            static const unsigned char tls10_close_notify[7] = {
+                '\x15',         /* TLSPlainText.type = Alert (21) */
+                '\x03', '\x01', /* TLSPlainText.version = {3, 1} */
+                '\x00', '\x02', /* TLSPlainText.length = 2 */
+                '\x01',         /* Alert.level = warning (1) */
+                '\x00'          /* Alert.description = close_notify (0) */
+            };
+            data = (const char *)tls10_close_notify;
+            len = sizeof(tls10_close_notify);
+        }
+        else /* ... XXX other request types here? */ {
+            /* Create an HTTP request string.  We include a User-Agent so
+             * that adminstrators can track down the cause of the
+             * odd-looking requests in their logs.  A complete request is
+             * used since kernel-level filtering may require that much
+             * data before returning from accept(). */
+            data = apr_pstrcat(p, "OPTIONS * HTTP/1.0\r\nUser-Agent: ",
+                               ap_get_server_description(),
+                               " (internal dummy connection)\r\n\r\n", NULL);
+            len = strlen(data);
+        }
  
-    if (lp->protocol && strcasecmp(lp->protocol, "https") == 0) {
-        /* Send a TLS 1.0 close_notify alert.  This is perhaps the
-         * "least wrong" way to open and cleanly terminate an SSL
-         * connection.  It should "work" without noisy error logs if
-         * the server actually expects SSLv3/TLSv1.  With
-         * SSLv23_server_method() OpenSSL's SSL_accept() fails
-         * ungracefully on receipt of this message, since it requires
-         * an 11-byte ClientHello message and this is too short. */
-        static const unsigned char tls10_close_notify[7] = {
-            '\x15',         /* TLSPlainText.type = Alert (21) */
-            '\x03', '\x01', /* TLSPlainText.version = {3, 1} */
-            '\x00', '\x02', /* TLSPlainText.length = 2 */
-            '\x01',         /* Alert.level = warning (1) */
-            '\x00'          /* Alert.description = close_notify (0) */
-        };
-        data = (const char *)tls10_close_notify;
-        len = sizeof(tls10_close_notify);
-    }
-    else /* ... XXX other request types here? */ {
-        /* Create an HTTP request string.  We include a User-Agent so
-         * that adminstrators can track down the cause of the
-         * odd-looking requests in their logs.  A complete request is
-         * used since kernel-level filtering may require that much
-         * data before returning from accept(). */
-        data = apr_pstrcat(p, "OPTIONS * HTTP/1.0\r\nUser-Agent: ",
-                           ap_get_server_description(),
-                           " (internal dummy connection)\r\n\r\n", NULL);
-        len = strlen(data);
+        apr_socket_send(sock, data, &len);
+        apr_socket_close(sock);
      }
-
-    apr_socket_send(sock, data, &len);
-    apr_socket_close(sock);
      apr_pool_destroy(p);
  
      return rv;
author	Jim Jagielski <jim@apache.org>
	Tue, 3 Jun 2014 13:07:29 +0000 (13:07 +0000)
committer	Jim Jagielski <jim@apache.org>
	Tue, 3 Jun 2014 13:07:29 +0000 (13:07 +0000)
CHANGES		patch \| blob \| history
include/ap_listen.h		patch \| blob \| history
server/listen.c		patch \| blob \| history
server/mpm/event/event.c		patch \| blob \| history
server/mpm/prefork/prefork.c		patch \| blob \| history
server/mpm/worker/worker.c		patch \| blob \| history
server/mpm_unix.c		patch \| blob \| history