From 08eb7609da3a8fa149113a1068cb5b3d676614a5 Mon Sep 17 00:00:00 2001 From: Justin Erenkrantz Date: Sat, 29 Sep 2001 08:17:11 +0000 Subject: [PATCH] Input filtering rewrite. Consolidate how we handle HTTP input parsing by rearranging and rethinking some things. The net result is that the HTTP filter is now a request filter and is now only responsible for HTTP things. The core input filter is now responsible for handling all of the dirty work. Highlights: - Removes the dechunk filter and merges it with ap_http_filter (aka HTTP_IN). The dechunk filter was incorrectly handling certain cases (trailers). - Moves ap_http_filter from a connection filter to a request filter to support the consolidation above (it needs header info). - Change support code to allow the http_filter to be a request filter (how the request is setup initially). - Move most of the logic from HTTP_IN to CORE_IN (core_input_filter). HTTP_IN is now only concerned about HTTP things. The core filter is now responsible for returning data. It is impossible to consolidate dechunk and http without this because HTTP_IN previously buffered data. As Greg has suggested, it may make sense to write some brigade functions that handle input (getline). It should be fairly trivial to add these. Some of the calls in ap_http_filter could be switched as well. This is the original patch as submitted to dev@httpd on Monday, Sep. 24th. Additional comments and some minor tweaks done after that submission are coming up next. This should allow people who reviewed the original patch to see what has changed and review them piecemeal. This test passes all current tests in httpd-test. Please perform chicken sacrifices to verify that this hasn't blown up your favorite input. Reviewed by: Greg Stein, Ryan Bloom, and Cliff Woolley (buckets) git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@91189 13f79535-47bb-0310-9956-ffa450edef68 --- modules/http/http_core.c | 12 +- modules/http/http_protocol.c | 315 +++++++++-------------------------- server/core.c | 138 ++++++++++++++- server/protocol.c | 8 +- 4 files changed, 227 insertions(+), 246 deletions(-) diff --git a/modules/http/http_core.c b/modules/http/http_core.c index 9a86031e5a..fc51a1558c 100644 --- a/modules/http/http_core.c +++ b/modules/http/http_core.c @@ -262,7 +262,6 @@ static apr_port_t http_port(const request_rec *r) static int ap_pre_http_connection(conn_rec *c) { - ap_add_input_filter("HTTP_IN", NULL, NULL, c); ap_add_input_filter("CORE_IN", NULL, NULL, c); ap_add_output_filter("CORE", NULL, NULL, c); return OK; @@ -302,6 +301,15 @@ static int ap_process_http_connection(conn_rec *c) return OK; } +static int ap_http_create_req(request_rec *r) +{ + if (!r->main) + { + ap_add_input_filter("HTTP_IN", NULL, r, r->connection); + } + return OK; +} + static void ap_http_insert_filter(request_rec *r) { if (!r->main) { @@ -320,10 +328,10 @@ static void register_hooks(apr_pool_t *p) ap_hook_map_to_storage(ap_send_http_trace,NULL,NULL,APR_HOOK_MIDDLE); ap_hook_http_method(http_method,NULL,NULL,APR_HOOK_REALLY_LAST); ap_hook_default_port(http_port,NULL,NULL,APR_HOOK_REALLY_LAST); + ap_hook_create_request(ap_http_create_req, NULL, NULL, APR_HOOK_MIDDLE); ap_hook_insert_filter(ap_http_insert_filter, NULL, NULL, APR_HOOK_REALLY_LAST); ap_register_input_filter("HTTP_IN", ap_http_filter, AP_FTYPE_CONNECTION); - ap_register_input_filter("DECHUNK", ap_dechunk_filter, AP_FTYPE_TRANSCODE); ap_register_output_filter("HTTP_HEADER", ap_http_header_filter, AP_FTYPE_HTTP_HEADER); ap_register_output_filter("CHUNK", chunk_filter, AP_FTYPE_TRANSCODE); diff --git a/modules/http/http_protocol.c b/modules/http/http_protocol.c index e34b65891b..83c9714fc1 100644 --- a/modules/http/http_protocol.c +++ b/modules/http/http_protocol.c @@ -481,265 +481,117 @@ AP_DECLARE(const char *) ap_method_name_of(int methnum) return AP_HTTP_METHODS[methnum]; } -struct dechunk_ctx { - apr_size_t chunk_size; - apr_size_t bytes_delivered; - enum { - WANT_HDR /* must have value zero */, - WANT_BODY, - WANT_TRL - } state; -}; - static long get_chunk_size(char *); -apr_status_t ap_dechunk_filter(ap_filter_t *f, apr_bucket_brigade *bb, - ap_input_mode_t mode, apr_off_t *readbytes) -{ - apr_status_t rv; - struct dechunk_ctx *ctx = f->ctx; - apr_bucket *b; - const char *buf; - apr_size_t len; - - if (!ctx) { - f->ctx = ctx = apr_pcalloc(f->r->pool, sizeof(struct dechunk_ctx)); - } - - do { - if (ctx->chunk_size == ctx->bytes_delivered) { - /* Time to read another chunk header or trailer... ap_http_filter() is - * the next filter in line and it knows how to return a brigade with - * one line. - */ - char line[30]; - - if ((rv = ap_getline(line, sizeof(line), f->r, - 0 /* readline */)) < 0) { - return rv; - } - switch(ctx->state) { - case WANT_HDR: - ctx->chunk_size = get_chunk_size(line); - ctx->bytes_delivered = 0; - if (ctx->chunk_size == 0) { - ctx->state = WANT_TRL; - } - else { - ctx->state = WANT_BODY; - } - break; - case WANT_TRL: - /* XXX sanity check end chunk here */ - if (strlen(line)) { - /* bad trailer */ - } - if (ctx->chunk_size == 0) { /* we just finished the last chunk? */ - /* ### woah... ap_http_filter() is doing this, too */ - /* append eos bucket and get out */ - b = apr_bucket_eos_create(); - APR_BRIGADE_INSERT_TAIL(bb, b); - return APR_SUCCESS; - } - ctx->state = WANT_HDR; - break; - default: - ap_assert(ctx->state == WANT_HDR || ctx->state == WANT_TRL); - } - } - } while (ctx->state != WANT_BODY); - - if (ctx->state == WANT_BODY) { - /* Tell ap_http_filter() how many bytes to deliver. */ - apr_off_t chunk_bytes = ctx->chunk_size - ctx->bytes_delivered; - - if ((rv = ap_get_brigade(f->next, bb, mode, - &chunk_bytes)) != APR_SUCCESS) { - return rv; - } - - /* Walk through the body, accounting for bytes, and removing an eos - * bucket if ap_http_filter() delivered the entire chunk. - * - * ### this shouldn't be necessary. 1) ap_http_filter shouldn't be - * ### adding EOS buckets. 2) it shouldn't return more bytes than - * ### we requested, therefore the total len can be found with a - * ### simple call to apr_brigade_length(). no further munging - * ### would be needed. - */ - b = APR_BRIGADE_FIRST(bb); - while (b != APR_BRIGADE_SENTINEL(bb) && !APR_BUCKET_IS_EOS(b)) { - apr_bucket_read(b, &buf, &len, mode); - AP_DEBUG_ASSERT(len <= ctx->chunk_size - ctx->bytes_delivered); - ctx->bytes_delivered += len; - b = APR_BUCKET_NEXT(b); - } - if (ctx->bytes_delivered == ctx->chunk_size) { - AP_DEBUG_ASSERT(APR_BUCKET_IS_EOS(b)); - apr_bucket_delete(b); - ctx->state = WANT_TRL; - } - } - - return APR_SUCCESS; -} - typedef struct http_filter_ctx { - apr_bucket_brigade *b; + int status; + apr_size_t remaining; + enum { + BODY_NONE /* must have value of zero */, + BODY_LENGTH, + BODY_CHUNK + } state; } http_ctx_t; +/* Hi, I'm the main input filter for HTTP requests. + * I handle chunked and content-length bodies. */ apr_status_t ap_http_filter(ap_filter_t *f, apr_bucket_brigade *b, ap_input_mode_t mode, apr_off_t *readbytes) { apr_bucket *e; - char *buff; - apr_size_t len; - char *pos; http_ctx_t *ctx = f->ctx; apr_status_t rv; if (!ctx) { - f->ctx = ctx = apr_pcalloc(f->c->pool, sizeof(*ctx)); - ctx->b = apr_brigade_create(f->c->pool); + f->ctx = ctx = apr_pcalloc(f->r->pool, sizeof(*ctx)); + ctx->status = f->r->status; } - if (mode == AP_MODE_PEEK) { - apr_bucket *e; - const char *str; - apr_size_t length; - - /* The purpose of this loop is to ignore any CRLF (or LF) at the end - * of a request. Many browsers send extra lines at the end of POST - * requests. We use the PEEK method to determine if there is more - * data on the socket, so that we know if we should delay sending the - * end of one request until we have served the second request in a - * pipelined situation. We don't want to actually delay sending a - * response if the server finds a CRLF (or LF), becuause that doesn't - * mean that there is another request, just a blank line. - */ - while (1) { - if (APR_BRIGADE_EMPTY(ctx->b)) { - e = NULL; - } - else { - e = APR_BRIGADE_FIRST(ctx->b); - } - if (!e || apr_bucket_read(e, &str, &length, APR_NONBLOCK_READ) != APR_SUCCESS) { - return APR_EOF; + /* Basically, we have to stay out of the way until server/protocol.c + * says it is okay - which it does by setting r->status to OK. */ + if (f->r->status != ctx->status) + { + int old_status; + /* Allow us to be reentrant! */ + old_status = ctx->status; + ctx->status = f->r->status; + + if (old_status == HTTP_REQUEST_TIME_OUT && f->r->status == HTTP_OK) + { + const char *tenc, *lenp; + tenc = apr_table_get(f->r->headers_in, "Transfer-Encoding"); + lenp = apr_table_get(f->r->headers_in, "Content-Length"); + + if (tenc) { + if (!strcasecmp(tenc, "chunked")) { + char line[30]; + + if ((rv = ap_getline(line, sizeof(line), f->r, 0)) < 0) { + return rv; + } + ctx->state = BODY_CHUNK; + ctx->remaining = get_chunk_size(line); + } } - else { - const char *c = str; - while (c < str + length) { - if (*c == APR_ASCII_LF) - c++; - else if (*c == APR_ASCII_CR && *(c + 1) == APR_ASCII_LF) - c += 2; - else return APR_SUCCESS; + else if (lenp) { + const char *pos = lenp; + + while (apr_isdigit(*pos) || apr_isspace(*pos)) + ++pos; + if (*pos == '\0') { + ctx->state = BODY_LENGTH; + ctx->remaining = atol(lenp); } - apr_bucket_delete(e); } } } - if (APR_BRIGADE_EMPTY(ctx->b)) { - if ((rv = ap_get_brigade(f->next, ctx->b, mode, readbytes)) != APR_SUCCESS) { - return rv; - } - } + if (!ctx->remaining) + { + switch (ctx->state) + { + case BODY_NONE: + break; + case BODY_LENGTH: + e = apr_bucket_eos_create(); + APR_BRIGADE_INSERT_TAIL(b, e); + return APR_SUCCESS; + case BODY_CHUNK: + { + char line[30]; + + ctx->state = BODY_NONE; - /* If readbytes is -1, we want to just read everything until the end - * of the brigade, which in this case means the end of the socket. To - * do this, we loop through the entire brigade, until the socket is - * exhausted, at which point, it will automagically remove itself from - * the brigade. - */ - if (*readbytes == -1) { - apr_bucket *e; - apr_off_t total; - APR_BRIGADE_FOREACH(e, ctx->b) { - const char *str; - apr_size_t len; - apr_bucket_read(e, &str, &len, APR_BLOCK_READ); - } - APR_BRIGADE_CONCAT(b, ctx->b); - apr_brigade_length(b, 1, &total); - *readbytes = total; - e = apr_bucket_eos_create(); - APR_BRIGADE_INSERT_TAIL(b, e); - return APR_SUCCESS; - } - /* readbytes == 0 is "read a single line". otherwise, read a block. */ - if (*readbytes) { - apr_off_t total; + /* We need to read the CRLF after the chunk. */ + if ((rv = ap_getline(line, sizeof(line), f->r, 0)) < 0) { + return rv; + } - /* ### the code below, which moves bytes from one brigade to the - ### other is probably bogus. presuming the next filter down was - ### working properly, it should not have returned more than - ### READBYTES bytes, and we wouldn't have to do any work. - */ + /* Read the real chunk line. */ + if ((rv = ap_getline(line, sizeof(line), f->r, 0)) < 0) { + return rv; + } + ctx->state = BODY_CHUNK; + ctx->remaining = get_chunk_size(line); - APR_BRIGADE_NORMALIZE(ctx->b); - if (APR_BRIGADE_EMPTY(ctx->b)) { - if ((rv = ap_get_brigade(f->next, ctx->b, mode, readbytes)) != APR_SUCCESS) { - return rv; - } - } - - apr_brigade_partition(ctx->b, *readbytes, &e); - APR_BRIGADE_CONCAT(b, ctx->b); - if (e != APR_BRIGADE_SENTINEL(ctx->b)) { - apr_bucket_brigade *temp; - - temp = apr_brigade_split(b, e); - - /* ### darn. gotta ensure the split brigade is in the proper pool. - ### this is a band-aid solution; we shouldn't even be doing - ### all of this brigade munging (per the comment above). - ### until then, this will get the right lifetimes. */ - APR_BRIGADE_CONCAT(ctx->b, temp); - } - else { - if (!APR_BRIGADE_EMPTY(ctx->b)) { - ctx->b = NULL; /*XXX*/ + if (!ctx->remaining) + { + e = apr_bucket_eos_create(); + APR_BRIGADE_INSERT_TAIL(b, e); + return APR_SUCCESS; + } } + break; } - apr_brigade_length(b, 1, &total); - *readbytes -= total; - - /* ### this is a hack. it is saying, "if we have read everything - ### that was requested, then we are at the end of the request." - ### it presumes that the next filter up will *only* call us - ### with readbytes set to the Content-Length of the request. - ### that may not always be true, and this code is *definitely* - ### too presumptive of the caller's intent. the point is: this - ### filter is not the guy that is parsing the headers or the - ### chunks to determine where the end of the request is, so we - ### shouldn't be monkeying with EOS buckets. - */ - if (*readbytes == 0) { - apr_bucket *eos = apr_bucket_eos_create(); - - APR_BRIGADE_INSERT_TAIL(b, eos); - } - return APR_SUCCESS; } - /* we are reading a single line, e.g. the HTTP headers */ - while (!APR_BRIGADE_EMPTY(ctx->b)) { - e = APR_BRIGADE_FIRST(ctx->b); - if ((rv = apr_bucket_read(e, (const char **)&buff, &len, mode)) != APR_SUCCESS) { - return rv; - } + rv = ap_get_brigade(f->next, b, mode, readbytes); + + if (rv != APR_SUCCESS) + return rv; + + if (ctx->state != BODY_NONE) + ctx->remaining -= *readbytes; - pos = memchr(buff, APR_ASCII_LF, len); - if (pos != NULL) { - apr_bucket_split(e, pos - buff + 1); - APR_BUCKET_REMOVE(e); - APR_BRIGADE_INSERT_TAIL(b, e); - return APR_SUCCESS; - } - APR_BUCKET_REMOVE(e); - APR_BRIGADE_INSERT_TAIL(b, e); - } return APR_SUCCESS; } @@ -1406,7 +1258,6 @@ AP_DECLARE(int) ap_setup_client_block(request_rec *r, int read_policy) } r->read_chunked = 1; - ap_add_input_filter("DECHUNK", NULL, r, r->connection); } else if (lenp) { const char *pos = lenp; diff --git a/server/core.c b/server/core.c index 16e66422d0..58d90c2d3d 100644 --- a/server/core.c +++ b/server/core.c @@ -2747,23 +2747,145 @@ static int default_handler(request_rec *r) return ap_pass_brigade(r->output_filters, bb); } +typedef struct core_filter_ctx { + apr_bucket_brigade *b; +} core_ctx_t; + static int core_input_filter(ap_filter_t *f, apr_bucket_brigade *b, ap_input_mode_t mode, apr_off_t *readbytes) { apr_bucket *e; + apr_status_t rv; + core_ctx_t *ctx = f->ctx; + char *buff, *pos; + apr_size_t len; - if (!f->ctx) { /* If we haven't passed up the socket yet... */ - f->ctx = (void *)1; + if (!ctx) + { + f->ctx = ctx = apr_pcalloc(f->c->pool, sizeof(*ctx)); + ctx->b = apr_brigade_create(f->c->pool); + + /* seed the brigade with the client socket. */ e = apr_bucket_socket_create(f->c->client_socket); + APR_BRIGADE_INSERT_TAIL(ctx->b, e); + } + + if (mode == AP_MODE_PEEK) { + apr_bucket *e; + const char *str, *c; + apr_size_t length; + + /* The purpose of this loop is to ignore any CRLF (or LF) at the end + * of a request. Many browsers send extra lines at the end of POST + * requests. We use the PEEK method to determine if there is more + * data on the socket, so that we know if we should delay sending the + * end of one request until we have served the second request in a + * pipelined situation. We don't want to actually delay sending a + * response if the server finds a CRLF (or LF), becuause that doesn't + * mean that there is another request, just a blank line. + */ + while (1) { + + if (APR_BRIGADE_EMPTY(ctx->b)) + return APR_EOF; + + e = APR_BRIGADE_FIRST(ctx->b); + + rv = apr_bucket_read(e, &str, &length, APR_NONBLOCK_READ); + + if (rv != APR_SUCCESS) + return rv; + + c = str; + while (c < str + length) { + if (*c == APR_ASCII_LF) + c++; + else if (*c == APR_ASCII_CR && *(c + 1) == APR_ASCII_LF) + c += 2; + else + return APR_SUCCESS; + } + /* If we reach here, we were a bucket just full of CRLFs, so + * just toss the bucket. */ + /* FIXME: Is this the right thing to do in the core? */ + apr_bucket_delete(e); + } + } + + /* If readbytes is -1, we want to just read everything until the end + * of the brigade, which in this case means the end of the socket. To + * do this, we loop through the entire brigade, until the socket is + * exhausted, at which point, it will automagically remove itself from + * the brigade. + */ + if (*readbytes == -1) { + apr_bucket *e; + apr_off_t total; + const char *str; + apr_size_t len; + APR_BRIGADE_FOREACH(e, ctx->b) { + /* We don't care about these values. We just want to force the + * lower level to just read it. */ + apr_bucket_read(e, &str, &len, APR_BLOCK_READ); + } + APR_BRIGADE_CONCAT(b, ctx->b); + + /* Force a recompute of the length */ + apr_brigade_length(b, 1, &total); + *readbytes = total; + /* We have read until the brigade was empty, so we know that we + * must be EOS. */ + e = apr_bucket_eos_create(); APR_BRIGADE_INSERT_TAIL(b, e); return APR_SUCCESS; } - else { - /* Either some code lost track of the socket - * bucket or we already found out that the - * client closed. - */ - return APR_EOF; + /* readbytes == 0 is "read a single line". otherwise, read a block. */ + if (*readbytes) { + apr_off_t total; + apr_bucket *e; + apr_bucket_brigade *newbb; + + newbb = NULL; + + apr_brigade_partition(ctx->b, *readbytes, &e); + /* Must do split before CONCAT */ + if (e != APR_BRIGADE_SENTINEL(ctx->b)) { + newbb = apr_brigade_split(ctx->b, e); + } + APR_BRIGADE_CONCAT(b, ctx->b); + + /* FIXME: Is this really needed? Due to pointer use in sentinels, + * I think so. */ + if (newbb) + APR_BRIGADE_CONCAT(ctx->b, newbb); + + apr_brigade_length(b, 1, &total); + *readbytes = total; + + return APR_SUCCESS; } + + /* we are reading a single LF line, e.g. the HTTP headers */ + while (!APR_BRIGADE_EMPTY(ctx->b)) { + e = APR_BRIGADE_FIRST(ctx->b); + if ((rv = apr_bucket_read(e, (const char **)&buff, &len, + mode)) != APR_SUCCESS) { + return rv; + } + + pos = memchr(buff, APR_ASCII_LF, len); + /* We found a match. */ + if (pos != NULL) { + apr_bucket_split(e, pos - buff + 1); + APR_BUCKET_REMOVE(e); + APR_BRIGADE_INSERT_TAIL(b, e); + return APR_SUCCESS; + } + APR_BUCKET_REMOVE(e); + APR_BRIGADE_INSERT_TAIL(b, e); + *readbytes += len; + } + + return APR_SUCCESS; } /* Default filter. This filter should almost always be used. Its only job diff --git a/server/protocol.c b/server/protocol.c index 7cb370a89e..c6d18235d1 100644 --- a/server/protocol.c +++ b/server/protocol.c @@ -205,7 +205,6 @@ AP_CORE_DECLARE(int) ap_getline(char *s, int n, request_rec *r, int fold) int total = 0; int looking_ahead = 0; apr_size_t length; - conn_rec *c = r->connection; core_request_config *req_cfg; apr_bucket_brigade *b; apr_bucket *e; @@ -219,7 +218,7 @@ AP_CORE_DECLARE(int) ap_getline(char *s, int n, request_rec *r, int fold) while (1) { if (APR_BRIGADE_EMPTY(b)) { apr_off_t zero = 0; - if ((retval = ap_get_brigade(c->input_filters, b, + if ((retval = ap_get_brigade(r->input_filters, b, AP_MODE_BLOCKING, &zero /* readline */)) != APR_SUCCESS || APR_BRIGADE_EMPTY(b)) { @@ -562,6 +561,9 @@ request_rec *ap_read_request(conn_rec *conn) r->notes = apr_table_make(r->pool, 5); r->request_config = ap_create_request_config(r->pool); + /* Must be set before we run create request hook */ + r->output_filters = conn->output_filters; + r->input_filters = conn->input_filters; ap_run_create_request(r); r->per_dir_config = r->server->lookup_defaults; @@ -572,8 +574,6 @@ request_rec *ap_read_request(conn_rec *conn) r->status = HTTP_REQUEST_TIME_OUT; /* Until we get a request */ r->the_request = NULL; - r->output_filters = conn->output_filters; - r->input_filters = conn->input_filters; apr_setsocketopt(conn->client_socket, APR_SO_TIMEOUT, (int)(keptalive -- 2.50.1