1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * @file core_filters.c
19 * @brief Core input/output network filters.
23 #include "apr_strings.h"
25 #include "apr_fnmatch.h"
27 #include "apr_thread_proc.h" /* for RLIMIT stuff */
28 #include "apr_hooks.h"
30 #define APR_WANT_IOVEC
31 #define APR_WANT_STRFUNC
32 #define APR_WANT_MEMFUNC
36 #include "ap_config.h"
38 #include "http_config.h"
39 #include "http_core.h"
40 #include "http_protocol.h" /* For index_of_response(). Grump. */
41 #include "http_request.h"
42 #include "http_vhost.h"
43 #include "http_main.h" /* For the default_handler below... */
46 #include "http_connection.h"
47 #include "apr_buckets.h"
48 #include "util_filter.h"
49 #include "util_ebcdic.h"
51 #include "mpm_common.h"
52 #include "scoreboard.h"
54 #include "mod_proxy.h"
55 #include "ap_listen.h"
57 #include "mod_so.h" /* for ap_find_loaded_module_symbol */
59 #define AP_MIN_SENDFILE_BYTES (256)
62 * Remove all zero length buckets from the brigade.
64 #define BRIGADE_NORMALIZE(b) \
66 apr_bucket *e = APR_BRIGADE_FIRST(b); \
68 if (e->length == 0 && !APR_BUCKET_IS_METADATA(e)) { \
70 d = APR_BUCKET_NEXT(e); \
71 apr_bucket_delete(e); \
75 e = APR_BUCKET_NEXT(e); \
77 } while (!APR_BRIGADE_EMPTY(b) && (e != APR_BRIGADE_SENTINEL(b))); \
82 * Split the contents of a brigade after bucket 'e' to an existing brigade
84 * XXXX: Should this function be added to APR-Util?
86 static void brigade_move(apr_bucket_brigade *b, apr_bucket_brigade *a,
91 if (e != APR_BRIGADE_SENTINEL(b)) {
92 f = APR_RING_LAST(&b->list);
93 APR_RING_UNSPLICE(e, f, link);
94 APR_RING_SPLICE_HEAD(&a->list, e, f, apr_bucket, link);
97 APR_BRIGADE_CHECK_CONSISTENCY(a);
98 APR_BRIGADE_CHECK_CONSISTENCY(b);
101 int ap_core_input_filter(ap_filter_t *f, apr_bucket_brigade *b,
102 ap_input_mode_t mode, apr_read_type_e block,
107 core_net_rec *net = f->ctx;
108 core_ctx_t *ctx = net->in_ctx;
112 if (mode == AP_MODE_INIT) {
114 * this mode is for filters that might need to 'initialize'
115 * a connection before reading request data from a client.
116 * NNTP over SSL for example needs to handshake before the
117 * server sends the welcome message.
118 * such filters would have changed the mode before this point
119 * is reached. however, protocol modules such as NNTP should
120 * not need to know anything about SSL. given the example, if
121 * SSL is not in the filter chain, AP_MODE_INIT is a noop.
128 ctx = apr_pcalloc(f->c->pool, sizeof(*ctx));
129 ctx->b = apr_brigade_create(f->c->pool, f->c->bucket_alloc);
130 ctx->tmpbb = apr_brigade_create(ctx->b->p, ctx->b->bucket_alloc);
131 /* seed the brigade with the client socket. */
132 e = apr_bucket_socket_create(net->client_socket, f->c->bucket_alloc);
133 APR_BRIGADE_INSERT_TAIL(ctx->b, e);
136 else if (APR_BRIGADE_EMPTY(ctx->b)) {
140 /* ### This is bad. */
141 BRIGADE_NORMALIZE(ctx->b);
143 /* check for empty brigade again *AFTER* BRIGADE_NORMALIZE()
144 * If we have lost our socket bucket (see above), we are EOF.
146 * Ideally, this should be returning SUCCESS with EOS bucket, but
147 * some higher-up APIs (spec. read_request_line via ap_rgetline)
148 * want an error code. */
149 if (APR_BRIGADE_EMPTY(ctx->b)) {
153 if (mode == AP_MODE_GETLINE) {
154 /* we are reading a single LF line, e.g. the HTTP headers */
155 rv = apr_brigade_split_line(b, ctx->b, block, HUGE_STRING_LEN);
156 /* We should treat EAGAIN here the same as we do for EOF (brigade is
157 * empty). We do this by returning whatever we have read. This may
158 * or may not be bogus, but is consistent (for now) with EOF logic.
160 if (APR_STATUS_IS_EAGAIN(rv)) {
166 /* ### AP_MODE_PEEK is a horrific name for this mode because we also
167 * eat any CRLFs that we see. That's not the obvious intention of
168 * this mode. Determine whether anyone actually uses this or not. */
169 if (mode == AP_MODE_EATCRLF) {
173 /* The purpose of this loop is to ignore any CRLF (or LF) at the end
174 * of a request. Many browsers send extra lines at the end of POST
175 * requests. We use the PEEK method to determine if there is more
176 * data on the socket, so that we know if we should delay sending the
177 * end of one request until we have served the second request in a
178 * pipelined situation. We don't want to actually delay sending a
179 * response if the server finds a CRLF (or LF), becuause that doesn't
180 * mean that there is another request, just a blank line.
183 if (APR_BRIGADE_EMPTY(ctx->b))
186 e = APR_BRIGADE_FIRST(ctx->b);
188 rv = apr_bucket_read(e, &str, &len, APR_NONBLOCK_READ);
190 if (rv != APR_SUCCESS)
194 while (c < str + len) {
195 if (*c == APR_ASCII_LF)
197 else if (*c == APR_ASCII_CR && *(c + 1) == APR_ASCII_LF)
203 /* If we reach here, we were a bucket just full of CRLFs, so
204 * just toss the bucket. */
205 /* FIXME: Is this the right thing to do in the core? */
206 apr_bucket_delete(e);
211 /* If mode is EXHAUSTIVE, we want to just read everything until the end
212 * of the brigade, which in this case means the end of the socket.
213 * To do this, we attach the brigade that has currently been setaside to
214 * the brigade that was passed down, and send that brigade back.
216 * NOTE: This is VERY dangerous to use, and should only be done with
217 * extreme caution. However, the Perchild MPM needs this feature
218 * if it is ever going to work correctly again. With this, the Perchild
219 * MPM can easily request the socket and all data that has been read,
220 * which means that it can pass it to the correct child process.
222 if (mode == AP_MODE_EXHAUSTIVE) {
225 /* Tack on any buckets that were set aside. */
226 APR_BRIGADE_CONCAT(b, ctx->b);
228 /* Since we've just added all potential buckets (which will most
229 * likely simply be the socket bucket) we know this is the end,
230 * so tack on an EOS too. */
231 /* We have read until the brigade was empty, so we know that we
233 e = apr_bucket_eos_create(f->c->bucket_alloc);
234 APR_BRIGADE_INSERT_TAIL(b, e);
238 /* read up to the amount they specified. */
239 if (mode == AP_MODE_READBYTES || mode == AP_MODE_SPECULATIVE) {
242 AP_DEBUG_ASSERT(readbytes > 0);
244 e = APR_BRIGADE_FIRST(ctx->b);
245 rv = apr_bucket_read(e, &str, &len, block);
247 if (APR_STATUS_IS_EAGAIN(rv)) {
250 else if (rv != APR_SUCCESS) {
253 else if (block == APR_BLOCK_READ && len == 0) {
254 /* We wanted to read some bytes in blocking mode. We read
255 * 0 bytes. Hence, we now assume we are EOS.
257 * When we are in normal mode, return an EOS bucket to the
259 * When we are in speculative mode, leave ctx->b empty, so
260 * that the next call returns an EOS bucket.
262 apr_bucket_delete(e);
264 if (mode == AP_MODE_READBYTES) {
265 e = apr_bucket_eos_create(f->c->bucket_alloc);
266 APR_BRIGADE_INSERT_TAIL(b, e);
271 /* We can only return at most what we read. */
272 if (len < readbytes) {
276 rv = apr_brigade_partition(ctx->b, readbytes, &e);
277 if (rv != APR_SUCCESS) {
281 /* Must do move before CONCAT */
282 brigade_move(ctx->b, ctx->tmpbb, e);
284 if (mode == AP_MODE_READBYTES) {
285 APR_BRIGADE_CONCAT(b, ctx->b);
287 else if (mode == AP_MODE_SPECULATIVE) {
288 apr_bucket *copy_bucket;
290 for (e = APR_BRIGADE_FIRST(ctx->b);
291 e != APR_BRIGADE_SENTINEL(ctx->b);
292 e = APR_BUCKET_NEXT(e))
294 rv = apr_bucket_copy(e, ©_bucket);
295 if (rv != APR_SUCCESS) {
298 APR_BRIGADE_INSERT_TAIL(b, copy_bucket);
302 /* Take what was originally there and place it back on ctx->b */
303 APR_BRIGADE_CONCAT(ctx->b, ctx->tmpbb);
308 static void setaside_remaining_output(ap_filter_t *f,
309 core_output_filter_ctx_t *ctx,
310 apr_bucket_brigade *bb,
311 int make_a_copy, conn_rec *c);
313 static apr_status_t send_brigade_nonblocking(apr_socket_t *s,
314 apr_bucket_brigade *bb,
315 apr_size_t *bytes_written,
318 static void remove_empty_buckets(apr_bucket_brigade *bb);
320 static apr_status_t send_brigade_blocking(apr_socket_t *s,
321 apr_bucket_brigade *bb,
322 apr_size_t *bytes_written,
325 static apr_status_t writev_nonblocking(apr_socket_t *s,
326 struct iovec *vec, apr_size_t nvec,
327 apr_bucket_brigade *bb,
328 apr_size_t *cumulative_bytes_written,
332 static apr_status_t sendfile_nonblocking(apr_socket_t *s,
334 apr_size_t *cumulative_bytes_written,
338 #define THRESHOLD_MIN_WRITE 4096
339 #define THRESHOLD_MAX_BUFFER 65536
341 /* Optional function coming from mod_logio, used for logging of output
344 extern APR_OPTIONAL_FN_TYPE(ap_logio_add_bytes_out) *logio_add_bytes_out;
346 apr_status_t ap_core_output_filter(ap_filter_t *f, apr_bucket_brigade *new_bb)
349 core_net_rec *net = f->ctx;
350 core_output_filter_ctx_t *ctx = net->out_ctx;
351 apr_bucket_brigade *bb;
352 apr_bucket *bucket, *next;
353 apr_size_t bytes_in_brigade, non_file_bytes_in_brigade;
355 /* Fail quickly if the connection has already been aborted. */
357 if (new_bb != NULL) {
358 apr_brigade_cleanup(new_bb);
360 return APR_ECONNABORTED;
365 ctx = apr_pcalloc(c->pool, sizeof(*ctx));
366 net->out_ctx = (core_output_filter_ctx_t *)ctx;
367 rv = apr_socket_opt_set(net->client_socket, APR_SO_NONBLOCK, 1);
368 if (rv != APR_SUCCESS) {
373 if (new_bb != NULL) {
374 for (bucket = APR_BRIGADE_FIRST(new_bb); bucket != APR_BRIGADE_SENTINEL(new_bb); bucket = APR_BUCKET_NEXT(bucket)) {
375 if (bucket->length > 0) {
376 ctx->bytes_in += bucket->length;
381 if ((ctx->buffered_bb != NULL) &&
382 !APR_BRIGADE_EMPTY(ctx->buffered_bb)) {
383 bb = ctx->buffered_bb;
384 ctx->buffered_bb = NULL;
385 if (new_bb != NULL) {
386 APR_BRIGADE_CONCAT(bb, new_bb);
388 c->data_in_output_filters = 0;
390 else if (new_bb != NULL) {
397 /* Scan through the brigade and decide whether to attempt a write,
398 * based on the following rules:
400 * 1) The new_bb is null: Do a nonblocking write of as much as
401 * possible: do a nonblocking write of as much data as possible,
402 * then save the rest in ctx->buffered_bb. (If new_bb == NULL,
403 * it probably means that the MPM is doing asynchronous write
404 * completion and has just determined that this connection
407 * 2) The brigade contains a flush bucket: Do a blocking write
408 * of everything up that point.
410 * 3) The request is in CONN_STATE_HANDLER state, and the brigade
411 * contains at least THRESHOLD_MAX_BUFFER bytes in non-file
412 * buckets: Do blocking writes until the amount of data in the
413 * buffer is less than THRESHOLD_MAX_BUFFER. (The point of this
414 * rule is to provide flow control, in case a handler is
415 * streaming out lots of data faster than the data can be
416 * sent to the client.)
418 * 4) The brigade contains at least THRESHOLD_MIN_WRITE
419 * bytes: Do a nonblocking write of as much data as possible,
420 * then save the rest in ctx->buffered_bb.
423 if (new_bb == NULL) {
424 apr_status_t rv = send_brigade_nonblocking(net->client_socket, bb,
425 &(ctx->bytes_written), c);
426 if (APR_STATUS_IS_EAGAIN(rv)) {
429 else if (rv != APR_SUCCESS) {
430 /* The client has aborted the connection */
433 setaside_remaining_output(f, ctx, bb, 0, c);
437 bytes_in_brigade = 0;
438 non_file_bytes_in_brigade = 0;
439 for (bucket = APR_BRIGADE_FIRST(bb); bucket != APR_BRIGADE_SENTINEL(bb);
441 next = APR_BUCKET_NEXT(bucket);
442 if (APR_BUCKET_IS_FLUSH(bucket)) {
443 apr_bucket_brigade *remainder = apr_brigade_split(bb, next);
444 apr_status_t rv = send_brigade_blocking(net->client_socket, bb,
445 &(ctx->bytes_written), c);
446 if (rv != APR_SUCCESS) {
447 /* The client has aborted the connection */
452 next = APR_BRIGADE_FIRST(bb);
453 bytes_in_brigade = 0;
454 non_file_bytes_in_brigade = 0;
456 else if (!APR_BUCKET_IS_METADATA(bucket)) {
457 if (bucket->length < 0) {
460 /* XXX support nonblocking read here? */
462 apr_bucket_read(bucket, &data, &length, APR_BLOCK_READ);
463 if (rv != APR_SUCCESS) {
466 /* reading may have split the bucket, so recompute next: */
467 next = APR_BUCKET_NEXT(bucket);
469 bytes_in_brigade += bucket->length;
470 if (!APR_BUCKET_IS_FILE(bucket)) {
471 non_file_bytes_in_brigade += bucket->length;
476 if (non_file_bytes_in_brigade >= THRESHOLD_MAX_BUFFER) {
477 /* ### Writing the entire brigade may be excessive; we really just
478 * ### need to send enough data to be under THRESHOLD_MAX_BUFFER.
480 apr_status_t rv = send_brigade_blocking(net->client_socket, bb,
481 &(ctx->bytes_written), c);
482 if (rv != APR_SUCCESS) {
483 /* The client has aborted the connection */
488 else if (bytes_in_brigade >= THRESHOLD_MIN_WRITE) {
489 apr_status_t rv = send_brigade_nonblocking(net->client_socket, bb,
490 &(ctx->bytes_written), c);
491 if ((rv != APR_SUCCESS) && (!APR_STATUS_IS_EAGAIN(rv))) {
492 /* The client has aborted the connection */
498 setaside_remaining_output(f, ctx, bb, 1, c);
502 static void setaside_remaining_output(ap_filter_t *f,
503 core_output_filter_ctx_t *ctx,
504 apr_bucket_brigade *bb,
505 int make_a_copy, conn_rec *c)
510 remove_empty_buckets(bb);
511 if (!APR_BRIGADE_EMPTY(bb)) {
512 c->data_in_output_filters = 1;
514 /* XXX should this use a separate deferred write pool, like
515 * the original ap_core_output_filter?
517 ap_save_brigade(f, &(ctx->buffered_bb), &bb, c->pool);
518 apr_brigade_destroy(bb);
521 ctx->buffered_bb = bb;
525 apr_brigade_destroy(bb);
529 #ifndef APR_MAX_IOVEC_SIZE
530 #define MAX_IOVEC_TO_WRITE 16
532 #if APR_MAX_IOVEC_SIZE > 16
533 #define MAX_IOVEC_TO_WRITE 16
535 #define MAX_IOVEC_TO_WRITE APR_MAX_IOVEC_SIZE
539 static apr_status_t send_brigade_nonblocking(apr_socket_t *s,
540 apr_bucket_brigade *bb,
541 apr_size_t *bytes_written,
544 apr_bucket *bucket, *next;
546 struct iovec vec[MAX_IOVEC_TO_WRITE];
549 remove_empty_buckets(bb);
551 for (bucket = APR_BRIGADE_FIRST(bb);
552 bucket != APR_BRIGADE_SENTINEL(bb);
554 int did_sendfile = 0;
555 next = APR_BUCKET_NEXT(bucket);
557 if (APR_BUCKET_IS_FILE(bucket)) {
558 apr_bucket_file *file_bucket = (apr_bucket_file *)(bucket->data);
559 apr_file_t *fd = file_bucket->fd;
560 /* Use sendfile to send this file unless:
561 * - the platform doesn't support sendfile,
562 * - the file is too small for sendfile to be useful, or
563 * - sendfile is disabled in the httpd config via "EnableSendfile off"
566 if ((apr_file_flags_get(fd) & APR_SENDFILE_ENABLED) &&
567 (bucket->length >= AP_MIN_SENDFILE_BYTES)) {
570 (void)apr_socket_opt_set(s, APR_TCP_NOPUSH, 1);
571 rv = writev_nonblocking(s, vec, nvec, bb, bytes_written, c);
573 if (rv != APR_SUCCESS) {
574 (void)apr_socket_opt_set(s, APR_TCP_NOPUSH, 0);
578 rv = sendfile_nonblocking(s, bucket, bytes_written, c);
580 (void)apr_socket_opt_set(s, APR_TCP_NOPUSH, 0);
582 if (rv != APR_SUCCESS) {
588 #endif /* APR_HAS_SENDFILE */
589 if (!did_sendfile && !APR_BUCKET_IS_METADATA(bucket)) {
592 rv = apr_bucket_read(bucket, &data, &length, APR_BLOCK_READ);
593 if (rv != APR_SUCCESS) {
596 /* reading may have split the bucket, so recompute next: */
597 next = APR_BUCKET_NEXT(bucket);
598 vec[nvec].iov_base = (char *)data;
599 vec[nvec].iov_len = length;
601 if (nvec == MAX_IOVEC_TO_WRITE) {
602 rv = writev_nonblocking(s, vec, nvec, bb, bytes_written, c);
604 if (rv != APR_SUCCESS) {
613 rv = writev_nonblocking(s, vec, nvec, bb, bytes_written, c);
614 if (rv != APR_SUCCESS) {
619 remove_empty_buckets(bb);
624 static void remove_empty_buckets(apr_bucket_brigade *bb)
627 while (((bucket = APR_BRIGADE_FIRST(bb)) != APR_BRIGADE_SENTINEL(bb)) &&
628 (APR_BUCKET_IS_METADATA(bucket) || (bucket->length == 0))) {
629 APR_BUCKET_REMOVE(bucket);
630 apr_bucket_destroy(bucket);
634 static apr_status_t send_brigade_blocking(apr_socket_t *s,
635 apr_bucket_brigade *bb,
636 apr_size_t *bytes_written,
642 while (!APR_BRIGADE_EMPTY(bb)) {
643 rv = send_brigade_nonblocking(s, bb, bytes_written, c);
644 if (rv != APR_SUCCESS) {
645 if (APR_STATUS_IS_EAGAIN(rv)) {
646 /* Wait until we can send more data */
648 apr_interval_time_t timeout;
649 apr_pollfd_t pollset;
652 pollset.desc_type = APR_POLL_SOCKET;
653 pollset.reqevents = APR_POLLOUT;
655 apr_socket_timeout_get(s, &timeout);
656 rv = apr_poll(&pollset, 1, &nsds, timeout);
657 if (rv != APR_SUCCESS) {
669 static apr_status_t writev_nonblocking(apr_socket_t *s,
670 struct iovec *vec, apr_size_t nvec,
671 apr_bucket_brigade *bb,
672 apr_size_t *cumulative_bytes_written,
675 apr_status_t rv = APR_SUCCESS, arv;
676 apr_size_t bytes_written = 0, bytes_to_write = 0;
677 apr_size_t i, offset;
678 apr_interval_time_t old_timeout;
680 arv = apr_socket_timeout_get(s, &old_timeout);
681 if (arv != APR_SUCCESS) {
684 arv = apr_socket_timeout_set(s, 0);
685 if (arv != APR_SUCCESS) {
689 for (i = 0; i < nvec; i++) {
690 bytes_to_write += vec[i].iov_len;
693 while (bytes_written < bytes_to_write) {
695 rv = apr_socket_sendv(s, vec + offset, nvec - offset, &n);
698 for (i = offset; i < nvec; ) {
699 apr_bucket *bucket = APR_BRIGADE_FIRST(bb);
700 if (APR_BUCKET_IS_METADATA(bucket)) {
701 APR_BUCKET_REMOVE(bucket);
702 apr_bucket_destroy(bucket);
704 else if (n >= vec[i].iov_len) {
705 APR_BUCKET_REMOVE(bucket);
706 apr_bucket_destroy(bucket);
708 n -= vec[i++].iov_len;
711 apr_bucket_split(bucket, n);
712 APR_BUCKET_REMOVE(bucket);
713 apr_bucket_destroy(bucket);
715 vec[i].iov_base = (char *) vec[i].iov_base + n;
720 if (rv != APR_SUCCESS) {
724 if ((logio_add_bytes_out != NULL) && (bytes_written > 0)) {
725 logio_add_bytes_out(c, bytes_written);
727 *cumulative_bytes_written += bytes_written;
729 arv = apr_socket_timeout_set(s, old_timeout);
730 if ((arv != APR_SUCCESS) && (rv == APR_SUCCESS)) {
740 static apr_status_t sendfile_nonblocking(apr_socket_t *s,
742 apr_size_t *cumulative_bytes_written,
745 apr_status_t rv = APR_SUCCESS;
746 apr_bucket_file *file_bucket;
748 apr_size_t file_length;
749 apr_off_t file_offset;
750 apr_size_t bytes_written = 0;
752 if (!APR_BUCKET_IS_FILE(bucket)) {
753 ap_log_error(APLOG_MARK, APLOG_ERR, rv, c->base_server,
754 "core_filter: sendfile_nonblocking: "
755 "this should never happen");
758 file_bucket = (apr_bucket_file *)(bucket->data);
759 fd = file_bucket->fd;
760 file_length = bucket->length;
761 file_offset = bucket->start;
763 if (bytes_written < file_length) {
764 apr_size_t n = file_length - bytes_written;
766 apr_interval_time_t old_timeout;
768 arv = apr_socket_timeout_get(s, &old_timeout);
769 if (arv != APR_SUCCESS) {
772 arv = apr_socket_timeout_set(s, 0);
773 if (arv != APR_SUCCESS) {
776 rv = apr_socket_sendfile(s, fd, NULL, &file_offset, &n, 0);
777 if (rv == APR_SUCCESS) {
781 arv = apr_socket_timeout_set(s, old_timeout);
782 if ((arv != APR_SUCCESS) && (rv == APR_SUCCESS)) {
786 if ((logio_add_bytes_out != NULL) && (bytes_written > 0)) {
787 logio_add_bytes_out(c, bytes_written);
789 *cumulative_bytes_written += bytes_written;
790 if ((bytes_written < file_length) && (bytes_written > 0)) {
791 apr_bucket_split(bucket, bytes_written);
792 APR_BUCKET_REMOVE(bucket);
793 apr_bucket_destroy(bucket);
795 else if (bytes_written == file_length) {
796 APR_BUCKET_REMOVE(bucket);
797 apr_bucket_destroy(bucket);