From: Yann Ylavic Date: Sat, 13 Jan 2018 15:57:11 +0000 (+0000) Subject: Merge r1599012, r1601559, r1818624, r1819969, r1819970 from trunk: X-Git-Tag: 2.4.30~145 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=bf9f2a71aebfdc54d77f53c381da58af066bdf3e;p=apache Merge r1599012, r1601559, r1818624, r1819969, r1819970 from trunk: mod_proxy_html: skip documents < 4 bytes PR 56286 Micha Lenk follow up r1599012: C99 fix Fix some style mod_proxy_html: follow up to r1599012. To determine whether or not HTML data are lower than 4 bytes, use a retain buffer rather than assuming that all should be contained in a single bucket with the next one being EOS (if any). mod_proxy_html: don't depend on NUL terminated bucket data. ap_regexec() wants NUL terminated strings, so use ap_regexec_len() instead. Submitted by: niq, takashi, jailletc36, ylavic, ylavic Reviewed by: jim, ylavic, icing git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/branches/2.4.x@1821073 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/CHANGES b/CHANGES index 23ab298032..75fbdf34df 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,9 @@ -*- coding: utf-8 -*- Changes with Apache 2.4.30 + *) mod_proxy_html: skip documents shorter than 4 bytes + PR 56286 [Micha Lenk ] + *) core, mpm_event: Avoid a small memory leak of the scoreboard handle, for the lifetime of the connection, each time it is processed by MPM event. [Yann Ylavic] diff --git a/modules/filters/mod_proxy_html.c b/modules/filters/mod_proxy_html.c index 1ea005bf28..78ebdab34e 100644 --- a/modules/filters/mod_proxy_html.c +++ b/modules/filters/mod_proxy_html.c @@ -108,6 +108,9 @@ typedef struct { size_t avail; const char *encoding; urlmap *map; + char rbuf[4]; + apr_size_t rlen; + apr_size_t rmin; } saxctxt; @@ -638,7 +641,7 @@ static void pstartElement(void *ctxt, const xmlChar *uname, } } -static meta *metafix(request_rec *r, const char *buf) +static meta *metafix(request_rec *r, const char *buf, apr_size_t len) { meta *ret = NULL; size_t offs = 0; @@ -649,7 +652,8 @@ static meta *metafix(request_rec *r, const char *buf) ap_regmatch_t pmatch[2]; char delim; - while (!ap_regexec(seek_meta, buf+offs, 2, pmatch, 0)) { + while (offs < len && + !ap_regexec_len(seek_meta, buf + offs, len - offs, 2, pmatch, 0)) { header = NULL; content = NULL; p = buf+offs+pmatch[1].rm_eo; @@ -844,6 +848,17 @@ static saxctxt *check_filter_init (ap_filter_t *f) return f->ctx; } +static void prepend_rbuf(saxctxt *ctxt, apr_bucket_brigade *bb) +{ + if (ctxt->rlen) { + apr_bucket *b = apr_bucket_transient_create(ctxt->rbuf, + ctxt->rlen, + bb->bucket_alloc); + APR_BRIGADE_INSERT_HEAD(bb, b); + ctxt->rlen = 0; + } +} + static apr_status_t proxy_html_filter(ap_filter_t *f, apr_bucket_brigade *bb) { apr_bucket* b; @@ -865,11 +880,15 @@ static apr_status_t proxy_html_filter(ap_filter_t *f, apr_bucket_brigade *bb) if (APR_BUCKET_IS_METADATA(b)) { if (APR_BUCKET_IS_EOS(b)) { if (ctxt->parser != NULL) { - consume_buffer(ctxt, buf, 0, 1); + consume_buffer(ctxt, "", 0, 1); + } + else { + prepend_rbuf(ctxt, ctxt->bb); } APR_BRIGADE_INSERT_TAIL(ctxt->bb, - apr_bucket_eos_create(ctxt->bb->bucket_alloc)); + apr_bucket_eos_create(ctxt->bb->bucket_alloc)); ap_pass_brigade(ctxt->f->next, ctxt->bb); + apr_brigade_cleanup(ctxt->bb); } else if (APR_BUCKET_IS_FLUSH(b)) { /* pass on flush, except at start where it would cause @@ -884,11 +903,30 @@ static apr_status_t proxy_html_filter(ap_filter_t *f, apr_bucket_brigade *bb) == APR_SUCCESS) { if (ctxt->parser == NULL) { const char *cenc; + + /* For documents smaller than four bytes, there is no reason to do + * HTML rewriting. The URL schema (i.e. 'http') needs four bytes alone. + * And the HTML parser needs at least four bytes to initialise correctly. + */ + ctxt->rmin += bytes; + if (ctxt->rmin < sizeof(ctxt->rbuf)) { + memcpy(ctxt->rbuf + ctxt->rlen, buf, bytes); + ctxt->rlen += bytes; + continue; + } + if (ctxt->rlen && ctxt->rlen < sizeof(ctxt->rbuf)) { + apr_size_t rem = sizeof(ctxt->rbuf) - ctxt->rlen; + memcpy(ctxt->rbuf + ctxt->rlen, buf, rem); + ctxt->rlen += rem; + buf += rem; + bytes -= rem; + } + if (!xml2enc_charset || (xml2enc_charset(f->r, &enc, &cenc) != APR_SUCCESS)) { if (!xml2enc_charset) ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r, APLOGNO(01422) - "No i18n support found. Install mod_xml2enc if required"); + "No i18n support found. Install mod_xml2enc if required"); enc = XML_CHAR_ENCODING_NONE; ap_set_content_type(f->r, "text/html;charset=utf-8"); } @@ -910,15 +948,25 @@ static apr_status_t proxy_html_filter(ap_filter_t *f, apr_bucket_brigade *bb) } ap_fputs(f->next, ctxt->bb, ctxt->cfg->doctype); - ctxt->parser = htmlCreatePushParserCtxt(&sax, ctxt, buf, - 4, 0, enc); - buf += 4; - bytes -= 4; + + if (ctxt->rlen) { + ctxt->parser = htmlCreatePushParserCtxt(&sax, ctxt, + ctxt->rbuf, + ctxt->rlen, + NULL, enc); + } + else { + ctxt->parser = htmlCreatePushParserCtxt(&sax, ctxt, buf, 4, + NULL, enc); + buf += 4; + bytes -= 4; + } if (ctxt->parser == NULL) { - apr_status_t rv = ap_pass_brigade(f->next, bb); + prepend_rbuf(ctxt, bb); ap_remove_output_filter(f); - return rv; + return ap_pass_brigade(f->next, bb); } + ctxt->rlen = 0; apr_pool_cleanup_register(f->r->pool, ctxt->parser, (int(*)(void*))htmlFreeParserCtxt, apr_pool_cleanup_null); @@ -928,7 +976,7 @@ static apr_status_t proxy_html_filter(ap_filter_t *f, apr_bucket_brigade *bb) "Unsupported parser opts %x", xmlopts); #endif if (ctxt->cfg->metafix) - m = metafix(f->r, buf); + m = metafix(f->r, buf, bytes); if (m) { consume_buffer(ctxt, buf, m->start, 0); consume_buffer(ctxt, buf+m->end, bytes-m->end, 0);