static int workers_may_exit = 0;
static int start_thread_may_exit = 0;
static int listener_may_exit = 0;
+static int listener_is_wakeable = 0; /* Pollset supports APR_POLLSET_WAKEABLE */
static int num_listensocks = 0;
static apr_int32_t conns_this_child; /* MaxConnectionsPerChild, only access
in listener thread */
struct event_srv_cfg_s;
typedef struct event_srv_cfg_s event_srv_cfg;
+static apr_pollfd_t *listener_pollfd;
+
+/*
+ * The pollset for sockets that are in any of the timeout queues. Currently
+ * we use the timeout_mutex to make sure that connections are added/removed
+ * atomically to/from both event_pollset and a timeout queue. Otherwise
+ * some confusion can happen under high load if timeout queues and pollset
+ * get out of sync.
+ * XXX: It should be possible to make the lock unnecessary in many or even all
+ * XXX: cases.
+ */
+static apr_pollset_t *event_pollset;
+
struct event_conn_state_t {
/** APR_RING of expiration timeouts */
APR_RING_ENTRY(event_conn_state_t) timeout_list;
*keepalive_q,
*linger_q,
*short_linger_q;
+static volatile apr_time_t queues_next_expiry;
-static apr_pollfd_t *listener_pollfd;
+/* Prevent extra poll/wakeup calls for timeouts close in the future (queues
+ * have the granularity of a second anyway).
+ * XXX: Wouldn't 0.5s (instead of 0.1s) be "enough"?
+ */
+#define TIMEOUT_FUDGE_FACTOR apr_time_from_msec(100)
/*
* Macros for accessing struct timeout_queue.
*/
static void TO_QUEUE_APPEND(struct timeout_queue *q, event_conn_state_t *el)
{
+ apr_time_t q_expiry;
+ apr_time_t next_expiry;
+
APR_RING_INSERT_TAIL(&q->head, el, event_conn_state_t, timeout_list);
apr_atomic_inc32(q->total);
++q->count;
+
+ /* Cheaply update the overall queues' next expiry according to the
+ * first entry of this queue (oldest), if necessary.
+ */
+ el = APR_RING_FIRST(&q->head);
+ q_expiry = el->queue_timestamp + q->timeout;
+ next_expiry = queues_next_expiry;
+ if (!next_expiry || next_expiry > q_expiry + TIMEOUT_FUDGE_FACTOR) {
+ queues_next_expiry = q_expiry;
+ /* Unblock the poll()ing listener for it to update its timeout. */
+ if (listener_is_wakeable) {
+ apr_pollset_wakeup(event_pollset);
+ }
+ }
}
static void TO_QUEUE_REMOVE(struct timeout_queue *q, event_conn_state_t *el)
#define TO_QUEUE_ELEM_INIT(el) \
APR_RING_ELEM_INIT((el), timeout_list)
-/*
- * The pollset for sockets that are in any of the timeout queues. Currently
- * we use the timeout_mutex to make sure that connections are added/removed
- * atomically to/from both event_pollset and a timeout queue. Otherwise
- * some confusion can happen under high load if timeout queues and pollset
- * get out of sync.
- * XXX: It should be possible to make the lock unnecessary in many or even all
- * XXX: cases.
- */
-static apr_pollset_t *event_pollset;
-
#if HAVE_SERF
typedef struct {
apr_pollset_t *pollset;
return;
}
+ /* Unblock the listener if it's poll()ing */
+ if (listener_is_wakeable) {
+ apr_pollset_wakeup(event_pollset);
+ }
+
/* unblock the listener if it's waiting for a worker */
ap_queue_info_term(worker_queue_info);
default:
break;
}
- apr_atomic_dec32(&connection_count);
+ /* Unblock the listener if it's waiting for connection_count = 0 */
+ if (!apr_atomic_dec32(&connection_count)
+ && listener_is_wakeable && listener_may_exit) {
+ apr_pollset_wakeup(event_pollset);
+ }
return APR_SUCCESS;
}
static APR_RING_HEAD(timer_free_ring_t, timer_event_t) timer_free_ring;
static apr_skiplist *timer_skiplist;
+static volatile apr_time_t timers_next_expiry;
+
+/* Same goal as for TIMEOUT_FUDGE_FACTOR (avoid extra poll calls), but applied
+ * to timers. Since their timeouts are custom (user defined), we can't be too
+ * approximative here (hence using 0.01s).
+ */
+#define EVENT_FUDGE_FACTOR apr_time_from_msec(10)
/* The following compare function is used by apr_skiplist_insert() to keep the
* elements (timers) sorted and provide O(log n) complexity (this is also true
te->remove = remove;
if (insert) {
+ apr_time_t next_expiry;
+
/* Okay, add sorted by when.. */
apr_skiplist_insert(timer_skiplist, te);
+
+ /* Cheaply update the overall timers' next expiry according to
+ * this event, if necessary.
+ */
+ next_expiry = timers_next_expiry;
+ if (!next_expiry || next_expiry > te->when + EVENT_FUDGE_FACTOR) {
+ timers_next_expiry = te->when;
+ /* Unblock the poll()ing listener for it to update its timeout. */
+ if (listener_is_wakeable) {
+ apr_pollset_wakeup(event_pollset);
+ }
+ }
}
apr_thread_mutex_unlock(g_timer_skiplist_mtx);
count = 0;
cs = first = last = APR_RING_FIRST(&qp->head);
while (cs != APR_RING_SENTINEL(&qp->head, event_conn_state_t,
- timeout_list)
- /* Trash the entry if:
- * - no timeout_time was given (asked for all), or
- * - it expired (according to the queue timeout), or
- * - the system clock skewed in the past: no entry should be
- * registered above the given timeout_time (~now) + the queue
- * timeout, we won't keep any here (eg. for centuries).
- * Stop otherwise, no following entry will match thanks to the
- * single timeout per queue (entries are added to the end!).
- * This allows maintenance in O(1).
- */
- && (!timeout_time
- || cs->queue_timestamp + qp->timeout < timeout_time
- || cs->queue_timestamp > timeout_time + qp->timeout)) {
+ timeout_list)) {
+ /* Trash the entry if:
+ * - no timeout_time was given (asked for all), or
+ * - it expired (according to the queue timeout), or
+ * - the system clock skewed in the past: no entry should be
+ * registered above the given timeout_time (~now) + the queue
+ * timeout, we won't keep any here (eg. for centuries).
+ *
+ * Otherwise stop, no following entry will match thanks to the
+ * single timeout per queue (entries are added to the end!).
+ * This allows maintenance in O(1).
+ */
+ if (timeout_time
+ && cs->queue_timestamp + qp->timeout > timeout_time
+ && cs->queue_timestamp < timeout_time + qp->timeout) {
+ /* Since this is the next expiring of this queue, update the
+ * overall queues' next expiry if it's later than this one.
+ */
+ apr_time_t q_expiry = cs->queue_timestamp + qp->timeout;
+ apr_time_t next_expiry = queues_next_expiry;
+ if (!next_expiry || next_expiry > q_expiry) {
+ queues_next_expiry = q_expiry;
+ }
+ break;
+ }
+
last = cs;
rv = apr_pollset_remove(event_pollset, &cs->pfd);
if (rv != APR_SUCCESS && !APR_STATUS_IS_NOTFOUND(rv)) {
int process_slot = ti->pslot;
struct process_score *ps = ap_get_scoreboard_process(process_slot);
apr_pool_t *tpool = apr_thread_pool_get(thd);
- apr_time_t timeout_time = 0, last_log;
int closed = 0, listeners_disabled = 0;
int have_idle_worker = 0;
+ apr_time_t last_log;
last_log = apr_time_now();
free(ti);
- /* the following times out events that are really close in the future
- * to prevent extra poll calls
- *
- * current value is .1 second
- */
-#define TIMEOUT_FUDGE_FACTOR 100000
-#define EVENT_FUDGE_FACTOR 10000
-
rc = init_pollset(tpool);
if (rc != APR_SUCCESS) {
ap_log_error(APLOG_MARK, APLOG_ERR, rc, ap_server_conf,
apr_int32_t num = 0;
apr_uint32_t c_count, l_count, i_count;
apr_interval_time_t timeout_interval;
- apr_time_t now;
+ apr_time_t now, timeout_time;
int workers_were_busy = 0;
int keepalives;
if (APLOGtrace6(ap_server_conf)) {
now = apr_time_now();
/* trace log status every second */
- if (now - last_log > apr_time_from_msec(1000)) {
+ if (now - last_log > apr_time_from_sec(1)) {
last_log = now;
apr_thread_mutex_lock(timeout_mutex);
ap_log_error(APLOG_MARK, APLOG_TRACE6, 0, ap_server_conf,
#endif
now = apr_time_now();
- apr_thread_mutex_lock(g_timer_skiplist_mtx);
- te = apr_skiplist_peek(timer_skiplist);
- if (te) {
- if (te->when > now) {
- timeout_interval = te->when - now;
- }
- else {
- timeout_interval = 1;
- }
- }
- else {
- timeout_interval = apr_time_from_msec(100);
- }
- while (te) {
- if (te->when < now + EVENT_FUDGE_FACTOR) {
+
+ /* Start with an infinite poll() timeout and update it according to
+ * the next expiring timer or queue entry. If there are none, either
+ * the listener is wakeable and it can poll() indefinitely until a wake
+ * up occurs, otherwise periodic checks (maintenance, shutdown, ...)
+ * must be performed.
+ */
+ timeout_interval = -1;
+
+ /* Push expired timers to a worker, the first remaining one determines
+ * the maximum time to poll() below, if any.
+ */
+ timeout_time = timers_next_expiry;
+ if (timeout_time && timeout_time < now + EVENT_FUDGE_FACTOR) {
+ apr_thread_mutex_lock(g_timer_skiplist_mtx);
+ while ((te = apr_skiplist_peek(timer_skiplist))) {
+ if (te->when > now + EVENT_FUDGE_FACTOR) {
+ timers_next_expiry = te->when;
+ timeout_interval = te->when - now;
+ break;
+ }
apr_skiplist_pop(timer_skiplist, NULL);
if (!te->canceled) {
if (te->remove) {
int i;
for (i = 0; i < te->remove->nelts; i++) {
- apr_pollfd_t *pfd = (apr_pollfd_t *)te->remove->elts + i;
+ apr_pollfd_t *pfd;
+ pfd = (apr_pollfd_t *)te->remove->elts + i;
apr_pollset_remove(event_pollset, pfd);
}
}
push_timer2worker(te);
}
else {
- APR_RING_INSERT_TAIL(&timer_free_ring, te, timer_event_t,
- link);
+ APR_RING_INSERT_TAIL(&timer_free_ring, te,
+ timer_event_t, link);
}
}
- else {
- break;
+ if (!te) {
+ timers_next_expiry = 0;
}
- te = apr_skiplist_peek(timer_skiplist);
+ apr_thread_mutex_unlock(g_timer_skiplist_mtx);
+ }
+
+ /* Same for queues, use their next expiry, if any. */
+ timeout_time = queues_next_expiry;
+ if (timeout_time
+ && (timeout_interval < 0
+ || timeout_time <= now
+ || timeout_interval > timeout_time - now)) {
+ timeout_interval = timeout_time > now ? timeout_time - now : 1;
+ }
+
+ /* When non-wakeable, don't wait more than 100 ms, in any case. */
+#define NON_WAKEABLE_POLL_TIMEOUT apr_time_from_msec(100)
+ if (!listener_is_wakeable
+ && (timeout_interval < 0
+ || timeout_interval > NON_WAKEABLE_POLL_TIMEOUT)) {
+ timeout_interval = NON_WAKEABLE_POLL_TIMEOUT;
}
- apr_thread_mutex_unlock(g_timer_skiplist_mtx);
rc = apr_pollset_poll(event_pollset, timeout_interval, &num, &out_pfd);
if (rc != APR_SUCCESS) {
if (APR_STATUS_IS_EINTR(rc)) {
+ /* Woken up, either update timeouts or shutdown,
+ * both logics are above.
+ */
continue;
}
if (!APR_STATUS_IS_TIMEUP(rc)) {
"shutdown process gracefully");
signal_threads(ST_GRACEFUL);
}
+ num = 0;
}
if (listener_may_exit) {
/* XXX possible optimization: stash the current time for use as
* r->request_time for new requests
*/
- now = apr_time_now();
- /* We only do this once per 0.1s (TIMEOUT_FUDGE_FACTOR), or on a clock
- * skew (if the system time is set back in the meantime, timeout_time
- * will exceed now + TIMEOUT_FUDGE_FACTOR, can't happen otherwise).
+ /* We process the timeout queues here only when their overall next
+ * expiry (read once above) is over. This happens accurately since
+ * adding to the queues (in workers) can only decrease this expiry,
+ * while latest ones are only taken into account here (in listener)
+ * during queues' processing, with the lock held. This works both
+ * with and without wake-ability.
*/
- if (now > timeout_time || now + TIMEOUT_FUDGE_FACTOR < timeout_time ) {
+ if (timeout_time && timeout_time < (now = apr_time_now())) {
timeout_time = now + TIMEOUT_FUDGE_FACTOR;
/* handle timed out sockets */
apr_thread_mutex_lock(timeout_mutex);
+ /* Processing all the queues below will recompute this. */
+ queues_next_expiry = 0;
+
/* Step 1: keepalive timeouts */
- /* If all workers are busy, we kill older keep-alive connections so that they
- * may connect to another process.
- */
- if ((workers_were_busy || dying)
- && (keepalives = apr_atomic_read32(keepalive_q->total))) {
- /* If all workers are busy, we kill older keep-alive connections so
- * that they may connect to another process.
- */
- ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
- "All workers are %s, will close %d keep-alive "
- "connections", dying ? "dying" : "busy",
- keepalives);
- process_timeout_queue(keepalive_q, 0,
- start_lingering_close_nonblocking);
- }
- else {
- process_timeout_queue(keepalive_q, timeout_time,
- start_lingering_close_nonblocking);
- }
+ process_timeout_queue(keepalive_q, timeout_time,
+ start_lingering_close_nonblocking);
/* Step 2: write completion timeouts */
process_timeout_queue(write_completion_q, timeout_time,
start_lingering_close_nonblocking);
ps->suspended = apr_atomic_read32(&suspended_count);
ps->lingering_close = apr_atomic_read32(&lingering_count);
}
+ else if ((workers_were_busy || dying)
+ && (keepalives = apr_atomic_read32(keepalive_q->total))) {
+ /* If all workers are busy, we kill older keep-alive connections so
+ * that they may connect to another process.
+ */
+ ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
+ "All workers are %s, will close %d keep-alive "
+ "connections", dying ? "dying" : "busy",
+ keepalives);
+ apr_thread_mutex_lock(timeout_mutex);
+ process_timeout_queue(keepalive_q, 0,
+ start_lingering_close_nonblocking);
+ apr_thread_mutex_unlock(timeout_mutex);
+ ps->keep_alive = apr_atomic_read32(keepalive_q->total);
+ }
+
if (listeners_disabled && !workers_were_busy
&& ((c_count = apr_atomic_read32(&connection_count))
>= (l_count = apr_atomic_read32(&lingering_count))
int prev_threads_created;
int max_recycled_pools = -1;
int good_methods[] = {APR_POLLSET_KQUEUE, APR_POLLSET_PORT, APR_POLLSET_EPOLL};
+ /* XXX don't we need more to handle K-A or lingering close? */
+ const apr_uint32_t pollset_size = threads_per_child * 2;
/* We must create the fd queues before we start up the listener
* and worker threads. */
/* Create the main pollset */
for (i = 0; i < sizeof(good_methods) / sizeof(good_methods[0]); i++) {
- rv = apr_pollset_create_ex(&event_pollset,
- threads_per_child*2, /* XXX don't we need more, to handle
- * connections in K-A or lingering
- * close?
- */
- pchild, APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY | APR_POLLSET_NODEFAULT,
- good_methods[i]);
+ apr_uint32_t flags = APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY |
+ APR_POLLSET_NODEFAULT | APR_POLLSET_WAKEABLE;
+ rv = apr_pollset_create_ex(&event_pollset, pollset_size, pchild, flags,
+ good_methods[i]);
+ if (rv == APR_SUCCESS) {
+ listener_is_wakeable = 1;
+ break;
+ }
+ flags &= ~APR_POLLSET_WAKEABLE;
+ rv = apr_pollset_create_ex(&event_pollset, pollset_size, pchild, flags,
+ good_methods[i]);
if (rv == APR_SUCCESS) {
break;
}
}
if (rv != APR_SUCCESS) {
- rv = apr_pollset_create(&event_pollset,
- threads_per_child*2, /* XXX don't we need more, to handle
- * connections in K-A or lingering
- * close?
- */
- pchild, APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY);
+ rv = apr_pollset_create(&event_pollset, pollset_size, pchild,
+ APR_POLLSET_THREADSAFE | APR_POLLSET_NOCOPY);
}
if (rv != APR_SUCCESS) {
ap_log_error(APLOG_MARK, APLOG_ERR, rv, ap_server_conf, APLOGNO(03103)
}
ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(02471)
- "start_threads: Using %s", apr_pollset_method_name(event_pollset));
+ "start_threads: Using %s (%swakeable)",
+ apr_pollset_method_name(event_pollset),
+ listener_is_wakeable ? "" : "not ");
worker_sockets = apr_pcalloc(pchild, threads_per_child
* sizeof(apr_socket_t *));