1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * mod_substitute.c: Perform content rewriting on the fly
22 #include "http_config.h"
23 #include "http_core.h"
25 #include "apr_general.h"
26 #include "apr_strings.h"
27 #include "apr_strmatch.h"
29 #include "util_filter.h"
30 #include "util_varbuf.h"
31 #include "apr_buckets.h"
32 #include "http_request.h"
33 #define APR_WANT_STRFUNC
37 * We want to limit the memory usage in a way that is predictable.
38 * Therefore we limit the resulting length of the line.
39 * This is the default value.
41 #define AP_SUBST_MAX_LINE_LENGTH (1024*1024)
43 static const char substitute_filter_name[] = "SUBSTITUTE";
45 module AP_MODULE_DECLARE_DATA substitute_module;
47 typedef struct subst_pattern_t {
48 const apr_strmatch_pattern *pattern;
49 const ap_regex_t *regexp;
50 const char *replacement;
57 apr_array_header_t *patterns;
58 apr_size_t max_line_length;
59 int max_line_length_set;
64 apr_bucket_brigade *linebb;
65 apr_bucket_brigade *linesbb;
66 apr_bucket_brigade *passbb;
67 apr_bucket_brigade *pattbb;
69 } substitute_module_ctx;
71 static void *create_substitute_dcfg(apr_pool_t *p, char *d)
73 subst_dir_conf *dcfg =
74 (subst_dir_conf *) apr_palloc(p, sizeof(subst_dir_conf));
76 dcfg->patterns = apr_array_make(p, 10, sizeof(subst_pattern_t));
77 dcfg->max_line_length = AP_SUBST_MAX_LINE_LENGTH;
78 dcfg->max_line_length_set = 0;
79 dcfg->inherit_before = -1;
83 static void *merge_substitute_dcfg(apr_pool_t *p, void *basev, void *overv)
86 (subst_dir_conf *) apr_palloc(p, sizeof(subst_dir_conf));
87 subst_dir_conf *base = (subst_dir_conf *) basev;
88 subst_dir_conf *over = (subst_dir_conf *) overv;
90 a->inherit_before = (over->inherit_before != -1)
91 ? over->inherit_before
92 : base->inherit_before;
93 /* SubstituteInheritBefore wasn't the default behavior until 2.5.x,
94 * and may be re-disabled as desired; the original default behavior
95 * was to apply inherited subst patterns after locally scoped patterns.
96 * In later 2.2 and 2.4 versions, SubstituteInheritBefore may be toggled
97 * 'on' to follow the corrected/expected behavior, without violating POLS.
99 if (a->inherit_before == 1) {
100 a->patterns = apr_array_append(p, base->patterns,
104 a->patterns = apr_array_append(p, over->patterns,
107 a->max_line_length = over->max_line_length_set ?
108 over->max_line_length : base->max_line_length;
109 a->max_line_length_set = over->max_line_length_set
110 | base->max_line_length_set;
114 #define AP_MAX_BUCKETS 1000
116 #define SEDRMPATBCKT(b, offset, tmp_b, patlen) do { \
117 apr_bucket_split(b, offset); \
118 tmp_b = APR_BUCKET_NEXT(b); \
119 apr_bucket_split(tmp_b, patlen); \
120 b = APR_BUCKET_NEXT(tmp_b); \
121 apr_bucket_delete(tmp_b); \
124 static apr_status_t do_pattmatch(ap_filter_t *f, apr_bucket *inb,
125 apr_bucket_brigade *mybb,
130 ap_regmatch_t regm[AP_MAX_REG_MATCH];
138 subst_dir_conf *cfg =
139 (subst_dir_conf *) ap_get_module_config(f->r->per_dir_config,
141 subst_pattern_t *script;
143 APR_BRIGADE_INSERT_TAIL(mybb, inb);
144 ap_varbuf_init(pool, &vb, 0);
146 script = (subst_pattern_t *) cfg->patterns->elts;
148 * Simple optimization. If we only have one pattern, then
149 * we can safely avoid the overhead of flattening
151 if (cfg->patterns->nelts == 1) {
154 for (i = 0; i < cfg->patterns->nelts; i++) {
155 for (b = APR_BRIGADE_FIRST(mybb);
156 b != APR_BRIGADE_SENTINEL(mybb);
157 b = APR_BUCKET_NEXT(b)) {
158 if (APR_BUCKET_IS_METADATA(b)) {
160 * we should NEVER see this, because we should never
161 * be passed any, but "handle" it just in case.
165 if (apr_bucket_read(b, &buff, &bytes, APR_BLOCK_READ)
169 if (script->pattern) {
172 * space_left counts how many bytes we have left until the
173 * line length reaches max_line_length.
175 apr_size_t space_left = cfg->max_line_length;
176 apr_size_t repl_len = strlen(script->replacement);
177 while ((repl = apr_strmatch(script->pattern, buff, bytes)))
180 /* get offset into buff for pattern */
181 len = (apr_size_t) (repl - buff);
182 if (script->flatten && !force_quick) {
184 * We are flattening the buckets here, meaning
185 * that we don't do the fast bucket splits.
186 * Instead we copy over what the buckets would
187 * contain and use them. This is slow, since we
188 * are constanting allocing space and copying
191 if (vb.strlen + len + repl_len > cfg->max_line_length)
193 ap_varbuf_strmemcat(&vb, buff, len);
194 ap_varbuf_strmemcat(&vb, script->replacement, repl_len);
198 * The string before the match but after the
199 * previous match (if any) has length 'len'.
200 * Check if we still have space for this string and
201 * the replacement string.
203 if (space_left < len + repl_len)
205 space_left -= len + repl_len;
207 * We now split off the string before the match
208 * as its own bucket, then isolate the matched
209 * string and delete it.
211 SEDRMPATBCKT(b, len, tmp_b, script->patlen);
213 * Finally, we create a bucket that contains the
216 tmp_b = apr_bucket_transient_create(script->replacement,
218 f->r->connection->bucket_alloc);
219 /* ... and insert it */
220 APR_BUCKET_INSERT_BEFORE(b, tmp_b);
222 /* now we need to adjust buff for all these changes */
223 len += script->patlen;
228 if (script->flatten && !force_quick) {
229 /* XXX: we should check for AP_MAX_BUCKETS here and
230 * XXX: call ap_pass_brigade accordingly
232 char *copy = ap_varbuf_pdup(pool, &vb, NULL, 0,
234 tmp_b = apr_bucket_pool_create(copy, len, pool,
235 f->r->connection->bucket_alloc);
236 APR_BUCKET_INSERT_BEFORE(b, tmp_b);
237 apr_bucket_delete(b);
242 * We want the behaviour to be predictable.
243 * Therefore we try to always error out if the
244 * line length is larger than the limit,
245 * regardless of the content of the line. So,
246 * let's check if the remaining non-matching
247 * string does not exceed the limit.
249 if (space_left < b->length)
254 else if (script->regexp) {
256 const char *pos = buff;
258 apr_size_t space_left = cfg->max_line_length;
259 while (!ap_regexec_len(script->regexp, pos, left,
260 AP_MAX_REG_MATCH, regm, 0)) {
263 if (script->flatten && !force_quick) {
264 /* check remaining buffer size */
265 /* Note that the last param in ap_varbuf_regsub below
266 * must stay positive. If it gets 0, it would mean
267 * unlimited space available. */
268 if (vb.strlen + regm[0].rm_so >= cfg->max_line_length)
270 /* copy bytes before the match */
271 if (regm[0].rm_so > 0)
272 ap_varbuf_strmemcat(&vb, pos, regm[0].rm_so);
273 /* add replacement string, last argument is unsigned! */
274 rv = ap_varbuf_regsub(&vb, script->replacement, pos,
275 AP_MAX_REG_MATCH, regm,
276 cfg->max_line_length - vb.strlen);
277 if (rv != APR_SUCCESS)
282 /* acount for string before the match */
283 if (space_left <= regm[0].rm_so)
285 space_left -= regm[0].rm_so;
286 rv = ap_pregsub_ex(pool, &repl,
287 script->replacement, pos,
288 AP_MAX_REG_MATCH, regm,
290 if (rv != APR_SUCCESS)
292 repl_len = strlen(repl);
293 space_left -= repl_len;
294 len = (apr_size_t) (regm[0].rm_eo - regm[0].rm_so);
295 SEDRMPATBCKT(b, regm[0].rm_so, tmp_b, len);
296 tmp_b = apr_bucket_transient_create(repl, repl_len,
297 f->r->connection->bucket_alloc);
298 APR_BUCKET_INSERT_BEFORE(b, tmp_b);
301 * reset to past what we just did. pos now maps to b
304 pos += regm[0].rm_eo;
305 left -= regm[0].rm_eo;
307 if (have_match && script->flatten && !force_quick) {
309 /* Copy result plus the part after the last match into
312 copy = ap_varbuf_pdup(pool, &vb, NULL, 0, pos, left,
314 tmp_b = apr_bucket_pool_create(copy, len, pool,
315 f->r->connection->bucket_alloc);
316 APR_BUCKET_INSERT_BEFORE(b, tmp_b);
317 apr_bucket_delete(b);
333 static apr_status_t substitute_filter(ap_filter_t *f, apr_bucket_brigade *bb)
339 const char *nl = NULL;
343 apr_bucket_brigade *tmp_bb = NULL;
345 subst_dir_conf *cfg =
346 (subst_dir_conf *) ap_get_module_config(f->r->per_dir_config,
349 substitute_module_ctx *ctx = f->ctx;
352 * First time around? Create the saved bb that we used for each pass
353 * through. Note that we can also get here when we explicitly clear ctx,
357 f->ctx = ctx = apr_pcalloc(f->r->pool, sizeof(*ctx));
359 * Create all the temporary brigades we need and reuse them to avoid
360 * creating them over and over again from r->pool which would cost a
361 * lot of memory in some cases.
363 ctx->linebb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
364 ctx->linesbb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
365 ctx->pattbb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
367 * Everything to be passed to the next filter goes in
368 * here, our pass brigade.
370 ctx->passbb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
371 /* Create our temporary pool only once */
372 apr_pool_create(&(ctx->tpool), f->r->pool);
373 apr_table_unset(f->r->headers_out, "Content-Length");
377 * Shortcircuit processing
379 if (APR_BRIGADE_EMPTY(bb))
383 * Here's the concept:
384 * Read in the data and look for newlines. Once we
385 * find a full "line", add it to our working brigade.
386 * If we've finished reading the brigade and we have
387 * any left over data (not a "full" line), store that
390 * Note: anything stored in ctx->linebb for sure does not have
391 * a newline char, so we don't concat that bb with the
392 * new bb, since we would spending time searching for the newline
393 * in data we know it doesn't exist. So instead, we simply scan
394 * our current bb and, if we see a newline, prepend ctx->linebb
395 * to the front of it. This makes the code much less straight-
396 * forward (otherwise we could APR_BRIGADE_CONCAT(ctx->linebb, bb)
397 * and just scan for newlines and not bother with needing to know
398 * when ctx->linebb needs to be reset) but also faster. We'll take
401 * Note: apr_brigade_split_line would be nice here, but we
402 * really can't use it since we need more control and we want
403 * to re-use already read bucket data.
405 * See mod_include if still confused :)
408 while ((b = APR_BRIGADE_FIRST(bb)) && (b != APR_BRIGADE_SENTINEL(bb))) {
409 if (APR_BUCKET_IS_EOS(b)) {
411 * if we see the EOS, then we need to pass along everything we
412 * have. But if the ctx->linebb isn't empty, then we need to add
413 * that to the end of what we'll be passing.
415 if (!APR_BRIGADE_EMPTY(ctx->linebb)) {
416 rv = apr_brigade_pflatten(ctx->linebb, &bflat,
417 &fbytes, ctx->tpool);
418 if (rv != APR_SUCCESS)
420 if (fbytes > cfg->max_line_length) {
424 tmp_b = apr_bucket_transient_create(bflat, fbytes,
425 f->r->connection->bucket_alloc);
426 rv = do_pattmatch(f, tmp_b, ctx->pattbb, ctx->tpool);
427 if (rv != APR_SUCCESS)
429 APR_BRIGADE_CONCAT(ctx->passbb, ctx->pattbb);
430 apr_brigade_cleanup(ctx->linebb);
432 APR_BUCKET_REMOVE(b);
433 APR_BRIGADE_INSERT_TAIL(ctx->passbb, b);
436 * No need to handle FLUSH buckets separately as we call
437 * ap_pass_brigade anyway at the end of the loop.
439 else if (APR_BUCKET_IS_METADATA(b)) {
440 APR_BUCKET_REMOVE(b);
441 APR_BRIGADE_INSERT_TAIL(ctx->passbb, b);
445 * We have actual "data" so read in as much as we can and start
446 * scanning and splitting from our read buffer
448 rv = apr_bucket_read(b, &buff, &bytes, APR_BLOCK_READ);
449 if (rv != APR_SUCCESS || bytes == 0) {
450 apr_bucket_delete(b);
455 nl = memchr(buff, APR_ASCII_LF, bytes);
457 len = (apr_size_t) (nl - buff) + 1;
458 /* split *after* the newline */
459 apr_bucket_split(b, len);
461 * We've likely read more data, so bypass rereading
462 * bucket data and continue scanning through this
468 * we need b to be updated for future potential
471 tmp_b = APR_BUCKET_NEXT(b);
472 APR_BUCKET_REMOVE(b);
474 * Hey, we found a newline! Don't forget the old
475 * stuff that needs to be added to the front. So we
476 * add the split bucket to the end, flatten the whole
477 * bb, morph the whole shebang into a bucket which is
478 * then added to the tail of the newline bb.
480 if (!APR_BRIGADE_EMPTY(ctx->linebb)) {
481 APR_BRIGADE_INSERT_TAIL(ctx->linebb, b);
482 rv = apr_brigade_pflatten(ctx->linebb, &bflat,
483 &fbytes, ctx->tpool);
484 if (rv != APR_SUCCESS)
486 if (fbytes > cfg->max_line_length) {
487 /* Avoid pflattening further lines, we will
488 * abort later on anyway.
493 b = apr_bucket_transient_create(bflat, fbytes,
494 f->r->connection->bucket_alloc);
495 apr_brigade_cleanup(ctx->linebb);
497 rv = do_pattmatch(f, b, ctx->pattbb, ctx->tpool);
498 if (rv != APR_SUCCESS)
501 * Count how many buckets we have in ctx->passbb
502 * so far. Yes, this is correct we count ctx->passbb
503 * and not ctx->pattbb as we do not reset num on every
506 for (b = APR_BRIGADE_FIRST(ctx->pattbb);
507 b != APR_BRIGADE_SENTINEL(ctx->pattbb);
508 b = APR_BUCKET_NEXT(b)) {
511 APR_BRIGADE_CONCAT(ctx->passbb, ctx->pattbb);
513 * If the number of buckets in ctx->passbb reaches an
514 * "insane" level, we consume much memory for all the
515 * buckets as such. So lets flush them down the chain
516 * in this case and thus clear ctx->passbb. This frees
517 * the buckets memory for further processing.
518 * Usually this condition should not become true, but
519 * it is a safety measure for edge cases.
521 if (num > AP_MAX_BUCKETS) {
522 b = apr_bucket_flush_create(
523 f->r->connection->bucket_alloc);
524 APR_BRIGADE_INSERT_TAIL(ctx->passbb, b);
525 rv = ap_pass_brigade(f->next, ctx->passbb);
526 apr_brigade_cleanup(ctx->passbb);
528 apr_pool_clear(ctx->tpool);
529 if (rv != APR_SUCCESS)
536 * no newline in whatever is left of this buffer so
537 * tuck data away and get next bucket
539 APR_BUCKET_REMOVE(b);
540 APR_BRIGADE_INSERT_TAIL(ctx->linebb, b);
546 if (!APR_BRIGADE_EMPTY(ctx->passbb)) {
547 rv = ap_pass_brigade(f->next, ctx->passbb);
548 apr_brigade_cleanup(ctx->passbb);
549 if (rv != APR_SUCCESS)
552 apr_pool_clear(ctx->tpool);
555 /* Anything left we want to save/setaside for the next go-around */
556 if (!APR_BRIGADE_EMPTY(ctx->linebb)) {
558 * Provide ap_save_brigade with an existing empty brigade
559 * (ctx->linesbb) to avoid creating a new one.
561 ap_save_brigade(f, &(ctx->linesbb), &(ctx->linebb), f->r->pool);
562 tmp_bb = ctx->linebb;
563 ctx->linebb = ctx->linesbb;
564 ctx->linesbb = tmp_bb;
569 if (rv == APR_ENOMEM)
570 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r, APLOGNO(01328) "Line too long, URI %s",
572 apr_pool_clear(ctx->tpool);
576 static const char *set_pattern(cmd_parms *cmd, void *cfg, const char *line)
583 subst_pattern_t *nscript;
587 ap_regex_t *r = NULL;
589 if (apr_tolower(*line) != 's') {
590 return "Bad Substitute format, must be an s/// pattern";
592 ourline = apr_pstrdup(cmd->pool, line);
597 if (*ourline != delim) {
598 while (*++ourline && *ourline != delim);
606 if (*ourline != delim) {
607 while (*++ourline && *ourline != delim);
615 if (!delim || !from || !*from || !to) {
616 return "Bad Substitute format, must be a complete s/// pattern";
621 delim = apr_tolower(*flags); /* re-use */
624 else if (delim == 'n')
626 else if (delim == 'f')
628 else if (delim == 'q')
631 return "Bad Substitute flag, only s///[infq] are supported";
636 /* first see if we can compile the regex */
638 r = ap_pregcomp(cmd->pool, from, AP_REG_EXTENDED |
639 (ignore_case ? AP_REG_ICASE : 0));
641 return "Substitute could not compile regex";
643 nscript = apr_array_push(((subst_dir_conf *) cfg)->patterns);
644 /* init the new entries */
645 nscript->pattern = NULL;
646 nscript->regexp = NULL;
647 nscript->replacement = NULL;
651 nscript->patlen = strlen(from);
652 nscript->pattern = apr_strmatch_precompile(cmd->pool, from,
659 nscript->replacement = to;
660 nscript->replen = strlen(to);
661 nscript->flatten = flatten;
667 #define MBYTE 1048576
668 #define GBYTE 1073741824
670 static const char *set_max_line_length(cmd_parms *cmd, void *cfg, const char *arg)
672 subst_dir_conf *dcfg = (subst_dir_conf *)cfg;
677 rv = apr_strtoff(&max, arg, &end, 10);
678 if (rv == APR_SUCCESS) {
679 if ((*end == 'K' || *end == 'k') && !end[1]) {
682 else if ((*end == 'M' || *end == 'm') && !end[1]) {
685 else if ((*end == 'G' || *end == 'g') && !end[1]) {
688 else if (*end && /* neither empty nor [Bb] */
689 ((*end != 'B' && *end != 'b') || end[1])) {
694 if (rv != APR_SUCCESS || max < 0)
696 return "SubstituteMaxLineLength must be a non-negative integer optionally "
697 "suffixed with 'b', 'k', 'm' or 'g'.";
699 dcfg->max_line_length = (apr_size_t)max;
700 dcfg->max_line_length_set = 1;
704 #define PROTO_FLAGS AP_FILTER_PROTO_CHANGE|AP_FILTER_PROTO_CHANGE_LENGTH
705 static void register_hooks(apr_pool_t *pool)
707 ap_register_output_filter(substitute_filter_name, substitute_filter,
708 NULL, AP_FTYPE_RESOURCE);
711 static const command_rec substitute_cmds[] = {
712 AP_INIT_TAKE1("Substitute", set_pattern, NULL, OR_FILEINFO,
713 "Pattern to filter the response content (s/foo/bar/[inf])"),
714 AP_INIT_TAKE1("SubstituteMaxLineLength", set_max_line_length, NULL, OR_FILEINFO,
715 "Maximum line length"),
716 AP_INIT_FLAG("SubstituteInheritBefore", ap_set_flag_slot,
717 (void *)APR_OFFSETOF(subst_dir_conf, inherit_before), OR_FILEINFO,
718 "Apply inherited patterns before those of the current context"),
722 AP_DECLARE_MODULE(substitute) = {
723 STANDARD20_MODULE_STUFF,
724 create_substitute_dcfg, /* dir config creater */
725 merge_substitute_dcfg, /* dir merger --- default is to override */
726 NULL, /* server config */
727 NULL, /* merge server config */
728 substitute_cmds, /* command table */
729 register_hooks /* register hooks */