-/* Copyright 2001-2005 The Apache Software Foundation or its licensors, as
- * applicable.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* limitations under the License.
*/
-/*
- * core_filters.c --- Core input/output network filters.
+/**
+ * @file core_filters.c
+ * @brief Core input/output network filters.
*/
#include "apr.h"
#include "apr_fnmatch.h"
#include "apr_hash.h"
#include "apr_thread_proc.h" /* for RLIMIT stuff */
-#include "apr_hooks.h"
#define APR_WANT_IOVEC
#define APR_WANT_STRFUNC
#define APR_WANT_MEMFUNC
#include "apr_want.h"
-#define CORE_PRIVATE
#include "ap_config.h"
#include "httpd.h"
#include "http_config.h"
#include "apr_buckets.h"
#include "util_filter.h"
#include "util_ebcdic.h"
-#include "mpm.h"
#include "mpm_common.h"
#include "scoreboard.h"
#include "mod_core.h"
-#include "mod_proxy.h"
#include "ap_listen.h"
+#include "core.h"
#include "mod_so.h" /* for ap_find_loaded_module_symbol */
#define AP_MIN_SENDFILE_BYTES (256)
-typedef struct net_time_filter_ctx {
- apr_socket_t *csd;
- int first_line;
-} net_time_filter_ctx_t;
-
-int ap_net_time_filter(ap_filter_t *f, apr_bucket_brigade *b,
- ap_input_mode_t mode, apr_read_type_e block,
- apr_off_t readbytes)
-{
- net_time_filter_ctx_t *ctx = f->ctx;
- int keptalive = f->c->keepalive == AP_CONN_KEEPALIVE;
-
- if (!ctx) {
- f->ctx = ctx = apr_palloc(f->r->pool, sizeof(*ctx));
- ctx->first_line = 1;
- ctx->csd = ap_get_module_config(f->c->conn_config, &core_module);
- }
-
- if (mode != AP_MODE_INIT && mode != AP_MODE_EATCRLF) {
- if (ctx->first_line) {
- apr_socket_timeout_set(ctx->csd,
- keptalive
- ? f->c->base_server->keep_alive_timeout
- : f->c->base_server->timeout);
- ctx->first_line = 0;
- }
- else {
- if (keptalive) {
- apr_socket_timeout_set(ctx->csd, f->c->base_server->timeout);
- }
- }
- }
- return ap_get_brigade(f->next, b, mode, block, readbytes);
-}
-
/**
* Remove all zero length buckets from the brigade.
*/
apr_bucket_delete(e); \
e = d; \
} \
- e = APR_BUCKET_NEXT(e); \
+ else { \
+ e = APR_BUCKET_NEXT(e); \
+ } \
} while (!APR_BRIGADE_EMPTY(b) && (e != APR_BRIGADE_SENTINEL(b))); \
} while (0)
+/* we know core's module_index is 0 */
+#undef APLOG_MODULE_INDEX
+#define APLOG_MODULE_INDEX AP_CORE_MODULE_INDEX
-/**
- * Split the contents of a brigade after bucket 'e' to an existing brigade
- *
- * XXXX: Should this function be added to APR-Util?
- */
-static void brigade_move(apr_bucket_brigade *b, apr_bucket_brigade *a,
- apr_bucket *e)
-{
- apr_bucket *f;
+struct core_output_filter_ctx {
+ apr_bucket_brigade *empty_bb;
+ apr_size_t bytes_written;
+ struct iovec *vec;
+ apr_size_t nvec;
+};
- if (e != APR_BRIGADE_SENTINEL(b)) {
- f = APR_RING_LAST(&b->list);
- APR_RING_UNSPLICE(e, f, link);
- APR_RING_SPLICE_HEAD(&a->list, e, f, apr_bucket, link);
- }
+struct core_filter_ctx {
+ apr_bucket_brigade *bb;
+ apr_bucket_brigade *tmpbb;
+};
- APR_BRIGADE_CHECK_CONSISTENCY(a);
- APR_BRIGADE_CHECK_CONSISTENCY(b);
-}
-int ap_core_input_filter(ap_filter_t *f, apr_bucket_brigade *b,
- ap_input_mode_t mode, apr_read_type_e block,
- apr_off_t readbytes)
+apr_status_t ap_core_input_filter(ap_filter_t *f, apr_bucket_brigade *b,
+ ap_input_mode_t mode, apr_read_type_e block,
+ apr_off_t readbytes)
{
- apr_bucket *e;
- apr_status_t rv;
+ apr_status_t rv = APR_SUCCESS;
core_net_rec *net = f->ctx;
core_ctx_t *ctx = net->in_ctx;
const char *str;
if (!ctx)
{
- ctx = apr_pcalloc(f->c->pool, sizeof(*ctx));
- ctx->b = apr_brigade_create(f->c->pool, f->c->bucket_alloc);
- ctx->tmpbb = apr_brigade_create(ctx->b->p, ctx->b->bucket_alloc);
+ net->in_ctx = ctx = apr_palloc(f->c->pool, sizeof(*ctx));
+ ctx->bb = apr_brigade_create(f->c->pool, f->c->bucket_alloc);
+ ctx->tmpbb = apr_brigade_create(f->c->pool, f->c->bucket_alloc);
/* seed the brigade with the client socket. */
- e = apr_bucket_socket_create(net->client_socket, f->c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL(ctx->b, e);
- net->in_ctx = ctx;
+ rv = ap_run_insert_network_bucket(f->c, ctx->bb, net->client_socket);
+ if (rv != APR_SUCCESS)
+ return rv;
}
- else if (APR_BRIGADE_EMPTY(ctx->b)) {
- return APR_EOF;
+ else {
+ ap_filter_reinstate_brigade(f, ctx->bb, NULL);
+ if (APR_BRIGADE_EMPTY(ctx->bb)) {
+ return APR_EOF;
+ }
}
/* ### This is bad. */
- BRIGADE_NORMALIZE(ctx->b);
+ BRIGADE_NORMALIZE(ctx->bb);
/* check for empty brigade again *AFTER* BRIGADE_NORMALIZE()
* If we have lost our socket bucket (see above), we are EOF.
* Ideally, this should be returning SUCCESS with EOS bucket, but
* some higher-up APIs (spec. read_request_line via ap_rgetline)
* want an error code. */
- if (APR_BRIGADE_EMPTY(ctx->b)) {
+ if (APR_BRIGADE_EMPTY(ctx->bb)) {
return APR_EOF;
}
if (mode == AP_MODE_GETLINE) {
/* we are reading a single LF line, e.g. the HTTP headers */
- rv = apr_brigade_split_line(b, ctx->b, block, HUGE_STRING_LEN);
+ rv = apr_brigade_split_line(b, ctx->bb, block, HUGE_STRING_LEN);
/* We should treat EAGAIN here the same as we do for EOF (brigade is
* empty). We do this by returning whatever we have read. This may
* or may not be bogus, but is consistent (for now) with EOF logic.
*/
- if (APR_STATUS_IS_EAGAIN(rv)) {
+ if (APR_STATUS_IS_EAGAIN(rv) && block == APR_NONBLOCK_READ) {
rv = APR_SUCCESS;
}
- return rv;
+ goto cleanup;
}
/* ### AP_MODE_PEEK is a horrific name for this mode because we also
* mean that there is another request, just a blank line.
*/
while (1) {
- if (APR_BRIGADE_EMPTY(ctx->b))
- return APR_EOF;
-
- e = APR_BRIGADE_FIRST(ctx->b);
+ if (APR_BRIGADE_EMPTY(ctx->bb)) {
+ rv = APR_EOF;
+ goto cleanup;
+ }
+ e = APR_BRIGADE_FIRST(ctx->bb);
rv = apr_bucket_read(e, &str, &len, APR_NONBLOCK_READ);
-
- if (rv != APR_SUCCESS)
- return rv;
+ if (rv != APR_SUCCESS) {
+ goto cleanup;
+ }
c = str;
while (c < str + len) {
else if (*c == APR_ASCII_CR && *(c + 1) == APR_ASCII_LF)
c += 2;
else
- return APR_SUCCESS;
+ goto cleanup;
}
/* If we reach here, we were a bucket just full of CRLFs, so
/* FIXME: Is this the right thing to do in the core? */
apr_bucket_delete(e);
}
- return APR_SUCCESS;
+
+ /* UNREACHABLE */
+ ap_assert(0);
}
/* If mode is EXHAUSTIVE, we want to just read everything until the end
* the brigade that was passed down, and send that brigade back.
*
* NOTE: This is VERY dangerous to use, and should only be done with
- * extreme caution. However, the Perchild MPM needs this feature
- * if it is ever going to work correctly again. With this, the Perchild
- * MPM can easily request the socket and all data that has been read,
- * which means that it can pass it to the correct child process.
+ * extreme caution. FWLIW, this would be needed by an MPM like Perchild;
+ * such an MPM can easily request the socket and all data that has been
+ * read, which means that it can pass it to the correct child process.
*/
if (mode == AP_MODE_EXHAUSTIVE) {
apr_bucket *e;
/* Tack on any buckets that were set aside. */
- APR_BRIGADE_CONCAT(b, ctx->b);
+ APR_BRIGADE_CONCAT(b, ctx->bb);
/* Since we've just added all potential buckets (which will most
* likely simply be the socket bucket) we know this is the end,
* must be EOS. */
e = apr_bucket_eos_create(f->c->bucket_alloc);
APR_BRIGADE_INSERT_TAIL(b, e);
- return APR_SUCCESS;
+
+ rv = APR_SUCCESS;
+ goto cleanup;
}
/* read up to the amount they specified. */
AP_DEBUG_ASSERT(readbytes > 0);
- e = APR_BRIGADE_FIRST(ctx->b);
+ e = APR_BRIGADE_FIRST(ctx->bb);
rv = apr_bucket_read(e, &str, &len, block);
-
- if (APR_STATUS_IS_EAGAIN(rv)) {
- return APR_SUCCESS;
- }
- else if (rv != APR_SUCCESS) {
- return rv;
+ if (rv != APR_SUCCESS) {
+ if (APR_STATUS_IS_EAGAIN(rv) && block == APR_NONBLOCK_READ) {
+ /* getting EAGAIN for a blocking read is an error; not for a
+ * non-blocking read, return an empty brigade. */
+ rv = APR_SUCCESS;
+ }
+ goto cleanup;
}
else if (block == APR_BLOCK_READ && len == 0) {
/* We wanted to read some bytes in blocking mode. We read
*
* When we are in normal mode, return an EOS bucket to the
* caller.
- * When we are in speculative mode, leave ctx->b empty, so
+ * When we are in speculative mode, leave ctx->bb empty, so
* that the next call returns an EOS bucket.
*/
apr_bucket_delete(e);
e = apr_bucket_eos_create(f->c->bucket_alloc);
APR_BRIGADE_INSERT_TAIL(b, e);
}
- return APR_SUCCESS;
+ goto cleanup;
+ }
+
+ /* Have we read as much data as we wanted (be greedy)? */
+ if (len < readbytes) {
+ apr_size_t bucket_len;
+
+ /* We already registered the data in e in len */
+ e = APR_BUCKET_NEXT(e);
+ while ((len < readbytes) && (rv == APR_SUCCESS)
+ && (e != APR_BRIGADE_SENTINEL(ctx->bb))) {
+ /* Check for the availability of buckets with known length */
+ if (e->length != -1) {
+ len += e->length;
+ e = APR_BUCKET_NEXT(e);
+ }
+ else {
+ /*
+ * Read from bucket, but non blocking. If there isn't any
+ * more data, well than this is fine as well, we will
+ * not wait for more since we already got some and we are
+ * only checking if there isn't more.
+ */
+ rv = apr_bucket_read(e, &str, &bucket_len,
+ APR_NONBLOCK_READ);
+ if (rv == APR_SUCCESS) {
+ len += bucket_len;
+ e = APR_BUCKET_NEXT(e);
+ }
+ }
+ }
}
/* We can only return at most what we read. */
readbytes = len;
}
- rv = apr_brigade_partition(ctx->b, readbytes, &e);
+ rv = apr_brigade_partition(ctx->bb, readbytes, &e);
if (rv != APR_SUCCESS) {
- return rv;
+ goto cleanup;
}
/* Must do move before CONCAT */
- brigade_move(ctx->b, ctx->tmpbb, e);
+ ctx->tmpbb = apr_brigade_split_ex(ctx->bb, e, ctx->tmpbb);
if (mode == AP_MODE_READBYTES) {
- APR_BRIGADE_CONCAT(b, ctx->b);
+ APR_BRIGADE_CONCAT(b, ctx->bb);
}
else if (mode == AP_MODE_SPECULATIVE) {
apr_bucket *copy_bucket;
- for (e = APR_BRIGADE_FIRST(ctx->b);
- e != APR_BRIGADE_SENTINEL(ctx->b);
+ for (e = APR_BRIGADE_FIRST(ctx->bb);
+ e != APR_BRIGADE_SENTINEL(ctx->bb);
e = APR_BUCKET_NEXT(e))
{
rv = apr_bucket_copy(e, ©_bucket);
if (rv != APR_SUCCESS) {
- return rv;
+ goto cleanup;
}
APR_BRIGADE_INSERT_TAIL(b, copy_bucket);
}
}
- /* Take what was originally there and place it back on ctx->b */
- APR_BRIGADE_CONCAT(ctx->b, ctx->tmpbb);
+ /* Take what was originally there and place it back on ctx->bb */
+ APR_BRIGADE_CONCAT(ctx->bb, ctx->tmpbb);
}
- return APR_SUCCESS;
-}
-static apr_status_t writev_it_all(apr_socket_t *s,
- struct iovec *vec, int nvec,
- apr_size_t len, apr_size_t *nbytes)
-{
- apr_size_t bytes_written = 0;
- apr_status_t rv;
- apr_size_t n = len;
- int i = 0;
-
- *nbytes = 0;
-
- /* XXX handle checking for non-blocking socket */
- while (bytes_written != len) {
- rv = apr_socket_sendv(s, vec + i, nvec - i, &n);
- *nbytes += n;
- bytes_written += n;
- if (rv != APR_SUCCESS)
- return rv;
+cleanup:
+ ap_filter_adopt_brigade(f, ctx->bb);
+ return rv;
+}
- /* If the write did not complete, adjust the iovecs and issue
- * apr_socket_sendv again
- */
- if (bytes_written < len) {
- /* Skip over the vectors that have already been written */
- apr_size_t cnt = vec[i].iov_len;
- while (n >= cnt && i + 1 < nvec) {
- i++;
- cnt += vec[i].iov_len;
- }
+static apr_status_t send_brigade_nonblocking(apr_socket_t *s,
+ apr_bucket_brigade *bb,
+ core_output_filter_ctx_t *ctx,
+ conn_rec *c);
- if (n < cnt) {
- /* Handle partial write of vec i */
- vec[i].iov_base = (char *) vec[i].iov_base +
- (vec[i].iov_len - (cnt - n));
- vec[i].iov_len = cnt -n;
- }
- }
+static apr_status_t writev_nonblocking(apr_socket_t *s,
+ apr_bucket_brigade *bb,
+ core_output_filter_ctx_t *ctx,
+ apr_size_t bytes_to_write,
+ apr_size_t nvec,
+ conn_rec *c);
- n = len - bytes_written;
- }
-
- return APR_SUCCESS;
-}
+#if APR_HAS_SENDFILE
+static apr_status_t sendfile_nonblocking(apr_socket_t *s,
+ apr_bucket *bucket,
+ core_output_filter_ctx_t *ctx,
+ conn_rec *c);
+#endif
-/* sendfile_it_all()
- * send the entire file using sendfile()
- * handle partial writes
- * return only when all bytes have been sent or an error is encountered.
+/* Optional function coming from mod_logio, used for logging of output
+ * traffic
*/
+extern APR_OPTIONAL_FN_TYPE(ap_logio_add_bytes_out) *ap__logio_add_bytes_out;
-#if APR_HAS_SENDFILE
-static apr_status_t sendfile_it_all(core_net_rec *c,
- apr_file_t *fd,
- apr_hdtr_t *hdtr,
- apr_off_t file_offset,
- apr_size_t file_bytes_left,
- apr_size_t total_bytes_left,
- apr_size_t *bytes_sent,
- apr_int32_t flags)
+apr_status_t ap_core_output_filter(ap_filter_t *f, apr_bucket_brigade *bb)
{
+ conn_rec *c = f->c;
+ core_net_rec *net = f->ctx;
+ apr_socket_t *sock = net->client_socket;
+ core_output_filter_ctx_t *ctx = net->out_ctx;
+ apr_interval_time_t sock_timeout = 0;
apr_status_t rv;
-#ifdef AP_DEBUG
- apr_interval_time_t timeout = 0;
-#endif
-
- AP_DEBUG_ASSERT((apr_socket_timeout_get(c->client_socket, &timeout)
- == APR_SUCCESS)
- && timeout > 0); /* socket must be in timeout mode */
-
- /* Reset the bytes_sent field */
- *bytes_sent = 0;
-
- do {
- apr_size_t tmplen = file_bytes_left;
-
- rv = apr_socket_sendfile(c->client_socket, fd, hdtr, &file_offset, &tmplen,
- flags);
- *bytes_sent += tmplen;
- total_bytes_left -= tmplen;
- if (!total_bytes_left || rv != APR_SUCCESS) {
- return rv; /* normal case & error exit */
- }
-
- AP_DEBUG_ASSERT(total_bytes_left > 0 && tmplen > 0);
-
- /* partial write, oooh noooo...
- * Skip over any header data which was written
- */
- while (tmplen && hdtr->numheaders) {
- if (tmplen >= hdtr->headers[0].iov_len) {
- tmplen -= hdtr->headers[0].iov_len;
- --hdtr->numheaders;
- ++hdtr->headers;
- }
- else {
- char *iov_base = (char *)hdtr->headers[0].iov_base;
- hdtr->headers[0].iov_len -= tmplen;
- iov_base += tmplen;
- hdtr->headers[0].iov_base = iov_base;
- tmplen = 0;
- }
- }
-
- /* Skip over any file data which was written */
-
- if (tmplen <= file_bytes_left) {
- file_offset += tmplen;
- file_bytes_left -= tmplen;
- continue;
- }
-
- tmplen -= file_bytes_left;
- file_bytes_left = 0;
- file_offset = 0;
-
- /* Skip over any trailer data which was written */
+ /* Fail quickly if the connection has already been aborted. */
+ if (c->aborted) {
+ apr_brigade_cleanup(bb);
+ return APR_ECONNABORTED;
+ }
- while (tmplen && hdtr->numtrailers) {
- if (tmplen >= hdtr->trailers[0].iov_len) {
- tmplen -= hdtr->trailers[0].iov_len;
- --hdtr->numtrailers;
- ++hdtr->trailers;
- }
- else {
- char *iov_base = (char *)hdtr->trailers[0].iov_base;
+ if (ctx == NULL) {
+ ctx = apr_pcalloc(c->pool, sizeof(*ctx));
+ net->out_ctx = (core_output_filter_ctx_t *)ctx;
+ }
- hdtr->trailers[0].iov_len -= tmplen;
- iov_base += tmplen;
- hdtr->trailers[0].iov_base = iov_base;
- tmplen = 0;
- }
+ /* remain compatible with legacy MPMs that passed NULL to this filter */
+ if (bb == NULL) {
+ if (ctx->empty_bb == NULL) {
+ ctx->empty_bb = apr_brigade_create(c->pool, c->bucket_alloc);
}
- } while (1);
-}
-#endif
-
-/*
- * emulate_sendfile()
- * Sends the contents of file fd along with header/trailer bytes, if any,
- * to the network. emulate_sendfile will return only when all the bytes have been
- * sent (i.e., it handles partial writes) or on a network error condition.
- */
-static apr_status_t emulate_sendfile(core_net_rec *c, apr_file_t *fd,
- apr_hdtr_t *hdtr, apr_off_t offset,
- apr_size_t length, apr_size_t *nbytes)
-{
- apr_status_t rv = APR_SUCCESS;
- apr_size_t togo; /* Remaining number of bytes in the file to send */
- apr_size_t sendlen = 0;
- apr_size_t bytes_sent;
- apr_int32_t i;
- apr_off_t o; /* Track the file offset for partial writes */
- char buffer[8192];
-
- *nbytes = 0;
-
- /* Send the headers
- * writev_it_all handles partial writes.
- * XXX: optimization... if headers are less than MIN_WRITE_SIZE, copy
- * them into buffer
- */
- if (hdtr && hdtr->numheaders > 0 ) {
- for (i = 0; i < hdtr->numheaders; i++) {
- sendlen += hdtr->headers[i].iov_len;
+ else {
+ apr_brigade_cleanup(ctx->empty_bb);
}
-
- rv = writev_it_all(c->client_socket, hdtr->headers, hdtr->numheaders,
- sendlen, &bytes_sent);
- *nbytes += bytes_sent; /* track total bytes sent */
+ bb = ctx->empty_bb;
}
- /* Seek the file to 'offset' */
- if (offset >= 0 && rv == APR_SUCCESS) {
- rv = apr_file_seek(fd, APR_SET, &offset);
+ /* Prepend buckets set aside, if any. */
+ ap_filter_reinstate_brigade(f, bb, NULL);
+ if (APR_BRIGADE_EMPTY(bb)) {
+ return APR_SUCCESS;
}
- /* Send the file, making sure to handle partial writes */
- togo = length;
- while (rv == APR_SUCCESS && togo) {
- sendlen = togo > sizeof(buffer) ? sizeof(buffer) : togo;
- o = 0;
- rv = apr_file_read(fd, buffer, &sendlen);
- if (rv == APR_SUCCESS && sendlen) {
- while ((rv == APR_SUCCESS || rv == APR_EAGAIN) && sendlen) {
- bytes_sent = sendlen;
- rv = apr_socket_send(c->client_socket, &buffer[o], &bytes_sent);
- *nbytes += bytes_sent;
- if (rv == APR_SUCCESS) {
- sendlen -= bytes_sent; /* sendlen != bytes_sent ==> partial write */
- o += bytes_sent; /* o is where we are in the buffer */
- togo -= bytes_sent; /* track how much of the file we've sent */
- }
+ /* Non-blocking writes on the socket in any case. */
+ apr_socket_timeout_get(sock, &sock_timeout);
+ apr_socket_timeout_set(sock, 0);
+
+ do {
+ rv = send_brigade_nonblocking(sock, bb, ctx, c);
+ if (APR_STATUS_IS_EAGAIN(rv)) {
+ /* Scan through the brigade and decide whether we must absolutely
+ * flush the remaining data, based on ap_filter_reinstate_brigade()
+ * rules. If so, wait for writability and retry, otherwise we did
+ * our best already and can wait for the next call.
+ */
+ apr_bucket *flush_upto;
+ ap_filter_reinstate_brigade(f, bb, &flush_upto);
+ if (flush_upto) {
+ apr_int32_t nfd;
+ apr_pollfd_t pfd;
+ memset(&pfd, 0, sizeof(pfd));
+ pfd.reqevents = APR_POLLOUT;
+ pfd.desc_type = APR_POLL_SOCKET;
+ pfd.desc.s = sock;
+ pfd.p = c->pool;
+ do {
+ rv = apr_poll(&pfd, 1, &nfd, sock_timeout);
+ } while (APR_STATUS_IS_EINTR(rv));
}
}
- }
+ } while (rv == APR_SUCCESS && !APR_BRIGADE_EMPTY(bb));
- /* Send the trailers
- * XXX: optimization... if it will fit, send this on the last send in the
- * loop above
- */
- sendlen = 0;
- if ( rv == APR_SUCCESS && hdtr && hdtr->numtrailers > 0 ) {
- for (i = 0; i < hdtr->numtrailers; i++) {
- sendlen += hdtr->trailers[i].iov_len;
- }
- rv = writev_it_all(c->client_socket, hdtr->trailers, hdtr->numtrailers,
- sendlen, &bytes_sent);
- *nbytes += bytes_sent;
+ /* Restore original socket timeout before leaving. */
+ apr_socket_timeout_set(sock, sock_timeout);
+
+ if (rv != APR_SUCCESS && !APR_STATUS_IS_EAGAIN(rv)) {
+ /* The client has aborted the connection */
+ ap_log_cerror(
+ APLOG_MARK, APLOG_TRACE1, rv, c,
+ "core_output_filter: writing data to the network");
+ /*
+ * Set c->aborted before apr_brigade_cleanup to have the correct status
+ * when logging the request as apr_brigade_cleanup triggers the logging
+ * of the request if it contains an EOR bucket.
+ */
+ c->aborted = 1;
+ apr_brigade_cleanup(bb);
+ return rv;
}
- return rv;
+ return ap_filter_setaside_brigade(f, bb);
}
-#ifndef APR_MAX_IOVEC_SIZE
-#define MAX_IOVEC_TO_WRITE 16
+#ifndef APR_MAX_IOVEC_SIZE
+#define NVEC_MIN 16
+#define NVEC_MAX NVEC_MIN
#else
#if APR_MAX_IOVEC_SIZE > 16
-#define MAX_IOVEC_TO_WRITE 16
+#define NVEC_MIN 16
#else
-#define MAX_IOVEC_TO_WRITE APR_MAX_IOVEC_SIZE
+#define NVEC_MIN APR_MAX_IOVEC_SIZE
#endif
+#define NVEC_MAX APR_MAX_IOVEC_SIZE
#endif
-/* Optional function coming from mod_logio, used for logging of output
- * traffic
- */
-extern APR_OPTIONAL_FN_TYPE(ap_logio_add_bytes_out) *logio_add_bytes_out;
-
-apr_status_t ap_core_output_filter(ap_filter_t *f, apr_bucket_brigade *b)
+static APR_INLINE int is_in_memory_bucket(apr_bucket *b)
{
- apr_status_t rv;
- apr_bucket_brigade *more;
- conn_rec *c = f->c;
- core_net_rec *net = f->ctx;
- core_output_filter_ctx_t *ctx = net->out_ctx;
- apr_read_type_e eblock = APR_NONBLOCK_READ;
- apr_pool_t *input_pool = b->p;
+ /* These buckets' data are already in memory. */
+ return APR_BUCKET_IS_HEAP(b)
+ || APR_BUCKET_IS_POOL(b)
+ || APR_BUCKET_IS_TRANSIENT(b)
+ || APR_BUCKET_IS_IMMORTAL(b);
+}
- if (ctx == NULL) {
- ctx = apr_pcalloc(c->pool, sizeof(*ctx));
- net->out_ctx = ctx;
+#if APR_HAS_SENDFILE
+static APR_INLINE int can_sendfile_bucket(apr_bucket *b)
+{
+ /* Use sendfile to send the bucket unless:
+ * - the bucket is not a file bucket, or
+ * - the file is too small for sendfile to be useful, or
+ * - sendfile is disabled in the httpd config via "EnableSendfile off".
+ */
+ if (APR_BUCKET_IS_FILE(b) && b->length >= AP_MIN_SENDFILE_BYTES) {
+ apr_file_t *file = ((apr_bucket_file *)b->data)->fd;
+ return apr_file_flags_get(file) & APR_SENDFILE_ENABLED;
}
-
- /* If we have a saved brigade, concatenate the new brigade to it */
- if (ctx->b) {
- APR_BRIGADE_CONCAT(ctx->b, b);
- b = ctx->b;
- ctx->b = NULL;
+ else {
+ return 0;
}
+}
+#endif
- /* Perform multiple passes over the brigade, sending batches of output
- to the connection. */
- while (b && !APR_BRIGADE_EMPTY(b)) {
- apr_size_t nbytes = 0;
- apr_bucket *last_e = NULL; /* initialized for debugging */
- apr_bucket *e;
-
- /* one group of iovecs per pass over the brigade */
- apr_size_t nvec = 0;
- apr_size_t nvec_trailers = 0;
- struct iovec vec[MAX_IOVEC_TO_WRITE];
- struct iovec vec_trailers[MAX_IOVEC_TO_WRITE];
-
- /* one file per pass over the brigade */
- apr_file_t *fd = NULL;
- apr_size_t flen = 0;
- apr_off_t foffset = 0;
-
- /* keep track of buckets that we've concatenated
- * to avoid small writes
- */
- apr_bucket *last_merged_bucket = NULL;
-
- /* tail of brigade if we need another pass */
- more = NULL;
-
- /* Iterate over the brigade: collect iovecs and/or a file */
- for (e = APR_BRIGADE_FIRST(b);
- e != APR_BRIGADE_SENTINEL(b);
- e = APR_BUCKET_NEXT(e))
- {
- /* keep track of the last bucket processed */
- last_e = e;
- if (APR_BUCKET_IS_EOS(e) || AP_BUCKET_IS_EOC(e)) {
- break;
- }
- else if (APR_BUCKET_IS_FLUSH(e)) {
- if (e != APR_BRIGADE_LAST(b)) {
- more = apr_brigade_split(b, APR_BUCKET_NEXT(e));
- }
- break;
- }
+static apr_status_t send_brigade_nonblocking(apr_socket_t *s,
+ apr_bucket_brigade *bb,
+ core_output_filter_ctx_t *ctx,
+ conn_rec *c)
+{
+ apr_status_t rv = APR_SUCCESS;
+ core_server_config *conf =
+ ap_get_core_module_config(c->base_server->module_config);
+ apr_size_t nvec = 0, nbytes = 0;
+ apr_bucket *bucket, *next;
+ const char *data;
+ apr_size_t length;
+
+ for (bucket = APR_BRIGADE_FIRST(bb);
+ bucket != APR_BRIGADE_SENTINEL(bb);
+ bucket = next) {
+ next = APR_BUCKET_NEXT(bucket);
- /* It doesn't make any sense to use sendfile for a file bucket
- * that represents 10 bytes.
- */
- else if (APR_BUCKET_IS_FILE(e)
- && (e->length >= AP_MIN_SENDFILE_BYTES)) {
- apr_bucket_file *a = e->data;
-
- /* We can't handle more than one file bucket at a time
- * so we split here and send the file we have already
- * found.
- */
- if (fd) {
- more = apr_brigade_split(b, e);
- break;
+#if APR_HAS_SENDFILE
+ if (can_sendfile_bucket(bucket)) {
+ if (nvec > 0) {
+ (void)apr_socket_opt_set(s, APR_TCP_NOPUSH, 1);
+ rv = writev_nonblocking(s, bb, ctx, nbytes, nvec, c);
+ if (rv != APR_SUCCESS) {
+ goto cleanup;
}
-
- fd = a->fd;
- flen = e->length;
- foffset = e->start;
+ nbytes = 0;
+ nvec = 0;
}
- else {
- const char *str;
- apr_size_t n;
-
- rv = apr_bucket_read(e, &str, &n, eblock);
- if (APR_STATUS_IS_EAGAIN(rv)) {
- /* send what we have so far since we shouldn't expect more
- * output for a while... next time we read, block
- */
- more = apr_brigade_split(b, e);
- eblock = APR_BLOCK_READ;
- break;
- }
- eblock = APR_NONBLOCK_READ;
- if (n) {
- if (!fd) {
- if (nvec == MAX_IOVEC_TO_WRITE) {
- /* woah! too many. buffer them up, for use later. */
- apr_bucket *temp, *next;
- apr_bucket_brigade *temp_brig;
-
- if (nbytes >= AP_MIN_BYTES_TO_WRITE) {
- /* We have enough data in the iovec
- * to justify doing a writev
- */
- more = apr_brigade_split(b, e);
- break;
- }
-
- /* Create a temporary brigade as a means
- * of concatenating a bunch of buckets together
- */
- if (last_merged_bucket) {
- /* If we've concatenated together small
- * buckets already in a previous pass,
- * the initial buckets in this brigade
- * are heap buckets that may have extra
- * space left in them (because they
- * were created by apr_brigade_write()).
- * We can take advantage of this by
- * building the new temp brigade out of
- * these buckets, so that the content
- * in them doesn't have to be copied again.
- */
- apr_bucket_brigade *bb;
- bb = apr_brigade_split(b,
- APR_BUCKET_NEXT(last_merged_bucket));
- temp_brig = b;
- b = bb;
- }
- else {
- temp_brig = apr_brigade_create(f->c->pool,
- f->c->bucket_alloc);
- }
-
- temp = APR_BRIGADE_FIRST(b);
- while (temp != e) {
- apr_bucket *d;
- rv = apr_bucket_read(temp, &str, &n, APR_BLOCK_READ);
- apr_brigade_write(temp_brig, NULL, NULL, str, n);
- d = temp;
- temp = APR_BUCKET_NEXT(temp);
- apr_bucket_delete(d);
- }
-
- nvec = 0;
- nbytes = 0;
- temp = APR_BRIGADE_FIRST(temp_brig);
- APR_BUCKET_REMOVE(temp);
- APR_BRIGADE_INSERT_HEAD(b, temp);
- apr_bucket_read(temp, &str, &n, APR_BLOCK_READ);
- vec[nvec].iov_base = (char*) str;
- vec[nvec].iov_len = n;
- nvec++;
-
- /* Just in case the temporary brigade has
- * multiple buckets, recover the rest of
- * them and put them in the brigade that
- * we're sending.
- */
- for (next = APR_BRIGADE_FIRST(temp_brig);
- next != APR_BRIGADE_SENTINEL(temp_brig);
- next = APR_BRIGADE_FIRST(temp_brig)) {
- APR_BUCKET_REMOVE(next);
- APR_BUCKET_INSERT_AFTER(temp, next);
- temp = next;
- apr_bucket_read(next, &str, &n,
- APR_BLOCK_READ);
- vec[nvec].iov_base = (char*) str;
- vec[nvec].iov_len = n;
- nvec++;
- }
-
- apr_brigade_destroy(temp_brig);
-
- last_merged_bucket = temp;
- e = temp;
- last_e = e;
- }
- else {
- vec[nvec].iov_base = (char*) str;
- vec[nvec].iov_len = n;
- nvec++;
- }
- }
- else {
- /* The bucket is a trailer to a file bucket */
-
- if (nvec_trailers == MAX_IOVEC_TO_WRITE) {
- /* woah! too many. stop now. */
- more = apr_brigade_split(b, e);
- break;
- }
-
- vec_trailers[nvec_trailers].iov_base = (char*) str;
- vec_trailers[nvec_trailers].iov_len = n;
- nvec_trailers++;
- }
-
- nbytes += n;
- }
+ rv = sendfile_nonblocking(s, bucket, ctx, c);
+ if (rv != APR_SUCCESS) {
+ goto cleanup;
}
+ continue;
}
-
-
- /* Completed iterating over the brigade, now determine if we want
- * to buffer the brigade or send the brigade out on the network.
- *
- * Save if we haven't accumulated enough bytes to send, the connection
- * is not about to be closed, and:
- *
- * 1) we didn't see a file, we don't have more passes over the
- * brigade to perform, AND we didn't stop at a FLUSH bucket.
- * (IOW, we will save plain old bytes such as HTTP headers)
- * or
- * 2) we hit the EOS and have a keep-alive connection
- * (IOW, this response is a bit more complex, but we save it
- * with the hope of concatenating with another response)
- */
- if (nbytes + flen < AP_MIN_BYTES_TO_WRITE
- && !AP_BUCKET_IS_EOC(last_e)
- && ((!fd && !more && !APR_BUCKET_IS_FLUSH(last_e))
- || (APR_BUCKET_IS_EOS(last_e)
- && c->keepalive == AP_CONN_KEEPALIVE))) {
-
- /* NEVER save an EOS in here. If we are saving a brigade with
- * an EOS bucket, then we are doing keepalive connections, and
- * we want to process to second request fully.
- */
- if (APR_BUCKET_IS_EOS(last_e)) {
- apr_bucket *bucket;
- int file_bucket_saved = 0;
- apr_bucket_delete(last_e);
- for (bucket = APR_BRIGADE_FIRST(b);
- bucket != APR_BRIGADE_SENTINEL(b);
- bucket = APR_BUCKET_NEXT(bucket)) {
-
- /* Do a read on each bucket to pull in the
- * data from pipe and socket buckets, so
- * that we don't leave their file descriptors
- * open indefinitely. Do the same for file
- * buckets, with one exception: allow the
- * first file bucket in the brigade to remain
- * a file bucket, so that we don't end up
- * doing an mmap+memcpy every time a client
- * requests a <8KB file over a keepalive
- * connection.
- */
- if (APR_BUCKET_IS_FILE(bucket) && !file_bucket_saved) {
- file_bucket_saved = 1;
- }
- else {
- const char *buf;
- apr_size_t len = 0;
- rv = apr_bucket_read(bucket, &buf, &len,
- APR_BLOCK_READ);
- if (rv != APR_SUCCESS) {
- ap_log_cerror(APLOG_MARK, APLOG_ERR, rv,
- c, "core_output_filter:"
- " Error reading from bucket.");
- return HTTP_INTERNAL_SERVER_ERROR;
- }
+#endif /* APR_HAS_SENDFILE */
+
+ if (bucket->length) {
+ /* Non-blocking read first, in case this is a morphing
+ * bucket type. */
+ rv = apr_bucket_read(bucket, &data, &length, APR_NONBLOCK_READ);
+ if (APR_STATUS_IS_EAGAIN(rv)) {
+ /* Read would block; flush any pending data and retry. */
+ if (nvec) {
+ rv = writev_nonblocking(s, bb, ctx, nbytes, nvec, c);
+ if (rv != APR_SUCCESS) {
+ goto cleanup;
}
+ nbytes = 0;
+ nvec = 0;
}
+ (void)apr_socket_opt_set(s, APR_TCP_NOPUSH, 0);
+
+ rv = apr_bucket_read(bucket, &data, &length, APR_BLOCK_READ);
}
- if (!ctx->deferred_write_pool) {
- apr_pool_create(&ctx->deferred_write_pool, c->pool);
- apr_pool_tag(ctx->deferred_write_pool, "deferred_write");
+ if (rv != APR_SUCCESS) {
+ goto cleanup;
}
- ap_save_brigade(f, &ctx->b, &b, ctx->deferred_write_pool);
- return APR_SUCCESS;
+ /* reading may have split the bucket, so recompute next: */
+ next = APR_BUCKET_NEXT(bucket);
}
- if (fd) {
- apr_hdtr_t hdtr;
- apr_size_t bytes_sent;
-
-#if APR_HAS_SENDFILE
- apr_int32_t flags = 0;
-#endif
-
- memset(&hdtr, '\0', sizeof(hdtr));
- if (nvec) {
- hdtr.numheaders = nvec;
- hdtr.headers = vec;
+ if (!bucket->length) {
+ /* Don't delete empty buckets until all the previous ones have been
+ * sent (nvec == 0); this must happen in sequence since metabuckets
+ * like EOR could free the data still pointed to by the iovec. So
+ * unless the latter is empty, let writev_nonblocking() cleanup the
+ * brigade in order.
+ */
+ if (!nvec) {
+ apr_bucket_delete(bucket);
}
+ continue;
+ }
- if (nvec_trailers) {
- hdtr.numtrailers = nvec_trailers;
- hdtr.trailers = vec_trailers;
+ /* Make sure that these new data fit in our iovec. */
+ if (nvec == ctx->nvec) {
+ if (nvec == NVEC_MAX) {
+ (void)apr_socket_opt_set(s, APR_TCP_NOPUSH, 1);
+ rv = writev_nonblocking(s, bb, ctx, nbytes, nvec, c);
+ if (rv != APR_SUCCESS) {
+ goto cleanup;
+ }
+ nbytes = 0;
+ nvec = 0;
}
-
-#if APR_HAS_SENDFILE
- if (apr_file_flags_get(fd) & APR_SENDFILE_ENABLED) {
-
- if (c->keepalive == AP_CONN_CLOSE && APR_BUCKET_IS_EOS(last_e)) {
- /* Prepare the socket to be reused */
- flags |= APR_SENDFILE_DISCONNECT_SOCKET;
+ else {
+ struct iovec *newvec;
+ apr_size_t newn = nvec * 2;
+ if (newn < NVEC_MIN) {
+ newn = NVEC_MIN;
}
-
- rv = sendfile_it_all(net, /* the network information */
- fd, /* the file to send */
- &hdtr, /* header and trailer iovecs */
- foffset, /* offset in the file to begin
- sending from */
- flen, /* length of file */
- nbytes + flen, /* total length including
- headers */
- &bytes_sent, /* how many bytes were
- sent */
- flags); /* apr_sendfile flags */
+ else if (newn > NVEC_MAX) {
+ newn = NVEC_MAX;
+ }
+ newvec = apr_palloc(c->pool, newn * sizeof(struct iovec));
+ if (nvec) {
+ memcpy(newvec, ctx->vec, nvec * sizeof(struct iovec));
+ }
+ ctx->vec = newvec;
+ ctx->nvec = newn;
}
- else
-#endif
- {
- rv = emulate_sendfile(net, fd, &hdtr, foffset, flen,
- &bytes_sent);
+ }
+ nbytes += length;
+ ctx->vec[nvec].iov_base = (void *)data;
+ ctx->vec[nvec].iov_len = length;
+ nvec++;
+
+ /* Flush above max threshold, unless the brigade still contains in
+ * memory buckets which we want to try writing in the same pass (if
+ * we are at the end of the brigade, the write will happen outside
+ * the loop anyway).
+ */
+ if (nbytes >= conf->flush_max_threshold
+ && next != APR_BRIGADE_SENTINEL(bb)
+ && !is_in_memory_bucket(next)) {
+ (void)apr_socket_opt_set(s, APR_TCP_NOPUSH, 1);
+ rv = writev_nonblocking(s, bb, ctx, nbytes, nvec, c);
+ if (rv != APR_SUCCESS) {
+ goto cleanup;
}
-
- if (logio_add_bytes_out && bytes_sent > 0)
- logio_add_bytes_out(c, bytes_sent);
-
- fd = NULL;
+ nbytes = 0;
+ nvec = 0;
}
- else {
- apr_size_t bytes_sent;
+ }
+ if (nvec > 0) {
+ rv = writev_nonblocking(s, bb, ctx, nbytes, nvec, c);
+ }
- rv = writev_it_all(net->client_socket,
- vec, nvec,
- nbytes, &bytes_sent);
+cleanup:
+ (void)apr_socket_opt_set(s, APR_TCP_NOPUSH, 0);
+ return rv;
+}
- if (logio_add_bytes_out && bytes_sent > 0)
- logio_add_bytes_out(c, bytes_sent);
- }
+static apr_status_t writev_nonblocking(apr_socket_t *s,
+ apr_bucket_brigade *bb,
+ core_output_filter_ctx_t *ctx,
+ apr_size_t bytes_to_write,
+ apr_size_t nvec,
+ conn_rec *c)
+{
+ apr_status_t rv;
+ struct iovec *vec = ctx->vec;
+ apr_size_t bytes_written = 0;
+ apr_size_t i, offset = 0;
- apr_brigade_destroy(b);
-
- /* drive cleanups for resources which were set aside
- * this may occur before or after termination of the request which
- * created the resource
- */
- if (ctx->deferred_write_pool) {
- if (more && more->p == ctx->deferred_write_pool) {
- /* "more" belongs to the deferred_write_pool,
- * which is about to be cleared.
- */
- if (APR_BRIGADE_EMPTY(more)) {
- more = NULL;
- }
- else {
- /* uh oh... change more's lifetime
- * to the input brigade's lifetime
- */
- apr_bucket_brigade *tmp_more = more;
- more = NULL;
- ap_save_brigade(f, &more, &tmp_more, input_pool);
+ do {
+ apr_size_t n = 0;
+ rv = apr_socket_sendv(s, vec + offset, nvec - offset, &n);
+ bytes_written += n;
+
+ for (i = offset; i < nvec; ) {
+ apr_bucket *bucket = APR_BRIGADE_FIRST(bb);
+ if (!bucket->length) {
+ apr_bucket_delete(bucket);
+ }
+ else if (n >= vec[i].iov_len) {
+ apr_bucket_delete(bucket);
+ n -= vec[i++].iov_len;
+ offset++;
+ }
+ else {
+ if (n) {
+ apr_bucket_split(bucket, n);
+ apr_bucket_delete(bucket);
+ vec[i].iov_len -= n;
+ vec[i].iov_base = (char *) vec[i].iov_base + n;
}
+ break;
}
- apr_pool_clear(ctx->deferred_write_pool);
}
+ } while (rv == APR_SUCCESS && bytes_written < bytes_to_write);
- if (rv != APR_SUCCESS) {
- ap_log_cerror(APLOG_MARK, APLOG_INFO, rv, c,
- "core_output_filter: writing data to the network");
+ if ((ap__logio_add_bytes_out != NULL) && (bytes_written > 0)) {
+ ap__logio_add_bytes_out(c, bytes_written);
+ }
+ ctx->bytes_written += bytes_written;
- if (more)
- apr_brigade_destroy(more);
+ ap_log_cerror(APLOG_MARK, APLOG_TRACE6, rv, c,
+ "writev_nonblocking: %"APR_SIZE_T_FMT"/%"APR_SIZE_T_FMT,
+ bytes_written, bytes_to_write);
+ return rv;
+}
- /* No need to check for SUCCESS, we did that above. */
- if (!APR_STATUS_IS_EAGAIN(rv)) {
- c->aborted = 1;
- }
+#if APR_HAS_SENDFILE
- /* The client has aborted, but the request was successful. We
- * will report success, and leave it to the access and error
- * logs to note that the connection was aborted.
- */
- return APR_SUCCESS;
- }
+static apr_status_t sendfile_nonblocking(apr_socket_t *s,
+ apr_bucket *bucket,
+ core_output_filter_ctx_t *ctx,
+ conn_rec *c)
+{
+ apr_status_t rv;
+ apr_file_t *file = ((apr_bucket_file *)bucket->data)->fd;
+ apr_size_t bytes_written = bucket->length; /* bytes_to_write for now */
+ apr_off_t file_offset = bucket->start;
- b = more;
- more = NULL;
- } /* end while () */
+ rv = apr_socket_sendfile(s, file, NULL, &file_offset, &bytes_written, 0);
+ if ((ap__logio_add_bytes_out != NULL) && (bytes_written > 0)) {
+ ap__logio_add_bytes_out(c, bytes_written);
+ }
+ ctx->bytes_written += bytes_written;
- return APR_SUCCESS;
+ ap_log_cerror(APLOG_MARK, APLOG_TRACE6, rv, c,
+ "sendfile_nonblocking: %" APR_SIZE_T_FMT "/%" APR_SIZE_T_FMT,
+ bytes_written, bucket->length);
+ if (bytes_written >= bucket->length) {
+ apr_bucket_delete(bucket);
+ }
+ else if (bytes_written > 0) {
+ apr_bucket_split(bucket, bytes_written);
+ apr_bucket_delete(bucket);
+ if (rv == APR_SUCCESS) {
+ rv = APR_EAGAIN;
+ }
+ }
+ return rv;
}
+
+#endif