1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * mod_substitute.c: Perform content rewriting on the fly
22 #include "http_config.h"
23 #include "http_core.h"
25 #include "apr_general.h"
26 #include "apr_strings.h"
27 #include "apr_strmatch.h"
29 #include "util_filter.h"
30 #include "util_varbuf.h"
31 #include "apr_buckets.h"
32 #include "http_request.h"
33 #define APR_WANT_STRFUNC
36 static const char substitute_filter_name[] = "SUBSTITUTE";
38 module AP_MODULE_DECLARE_DATA substitute_module;
40 typedef struct subst_pattern_t {
41 const apr_strmatch_pattern *pattern;
42 const ap_regex_t *regexp;
43 const char *replacement;
50 apr_array_header_t *patterns;
54 apr_bucket_brigade *linebb;
55 apr_bucket_brigade *linesbb;
56 apr_bucket_brigade *passbb;
57 apr_bucket_brigade *pattbb;
59 } substitute_module_ctx;
61 static void *create_substitute_dcfg(apr_pool_t *p, char *d)
63 subst_dir_conf *dcfg =
64 (subst_dir_conf *) apr_pcalloc(p, sizeof(subst_dir_conf));
66 dcfg->patterns = apr_array_make(p, 10, sizeof(subst_pattern_t));
70 static void *merge_substitute_dcfg(apr_pool_t *p, void *basev, void *overv)
73 (subst_dir_conf *) apr_pcalloc(p, sizeof(subst_dir_conf));
74 subst_dir_conf *base = (subst_dir_conf *) basev;
75 subst_dir_conf *over = (subst_dir_conf *) overv;
77 a->patterns = apr_array_append(p, over->patterns,
82 #define AP_MAX_BUCKETS 1000
83 #define AP_SUBST_MAX_LINE_LENGTH (128*MAX_STRING_LEN)
85 #define SEDRMPATBCKT(b, offset, tmp_b, patlen) do { \
86 apr_bucket_split(b, offset); \
87 tmp_b = APR_BUCKET_NEXT(b); \
88 apr_bucket_split(tmp_b, patlen); \
89 b = APR_BUCKET_NEXT(tmp_b); \
90 apr_bucket_delete(tmp_b); \
93 static apr_status_t do_pattmatch(ap_filter_t *f, apr_bucket *inb,
94 apr_bucket_brigade *mybb,
99 ap_regmatch_t regm[AP_MAX_REG_MATCH];
107 subst_dir_conf *cfg =
108 (subst_dir_conf *) ap_get_module_config(f->r->per_dir_config,
110 subst_pattern_t *script;
112 APR_BRIGADE_INSERT_TAIL(mybb, inb);
113 ap_varbuf_init(pool, &vb, 0);
115 script = (subst_pattern_t *) cfg->patterns->elts;
117 * Simple optimization. If we only have one pattern, then
118 * we can safely avoid the overhead of flattening
120 if (cfg->patterns->nelts == 1) {
123 for (i = 0; i < cfg->patterns->nelts; i++) {
124 for (b = APR_BRIGADE_FIRST(mybb);
125 b != APR_BRIGADE_SENTINEL(mybb);
126 b = APR_BUCKET_NEXT(b)) {
127 if (APR_BUCKET_IS_METADATA(b)) {
129 * we should NEVER see this, because we should never
130 * be passed any, but "handle" it just in case.
134 if (apr_bucket_read(b, &buff, &bytes, APR_BLOCK_READ)
138 if (script->pattern) {
140 apr_size_t space_left = AP_SUBST_MAX_LINE_LENGTH;
141 apr_size_t repl_len = strlen(script->replacement);
142 while ((repl = apr_strmatch(script->pattern, buff, bytes)))
145 /* get offset into buff for pattern */
146 len = (apr_size_t) (repl - buff);
147 if (script->flatten && !force_quick) {
149 * We are flattening the buckets here, meaning
150 * that we don't do the fast bucket splits.
151 * Instead we copy over what the buckets would
152 * contain and use them. This is slow, since we
153 * are constanting allocing space and copying
156 if (vb.strlen + len + repl_len > AP_SUBST_MAX_LINE_LENGTH)
158 ap_varbuf_strmemcat(&vb, buff, len);
159 ap_varbuf_strmemcat(&vb, script->replacement, repl_len);
163 * We now split off the stuff before the regex
164 * as its own bucket, then isolate the pattern
167 if (space_left < len + repl_len)
169 space_left -= len + repl_len;
170 SEDRMPATBCKT(b, len, tmp_b, script->patlen);
172 * Finally, we create a bucket that contains the
175 tmp_b = apr_bucket_transient_create(script->replacement,
177 f->r->connection->bucket_alloc);
178 /* ... and insert it */
179 APR_BUCKET_INSERT_BEFORE(b, tmp_b);
181 /* now we need to adjust buff for all these changes */
182 len += script->patlen;
186 if (have_match && script->flatten && !force_quick) {
187 /* XXX: we should check for AP_MAX_BUCKETS here and
188 * XXX: call ap_pass_brigade accordingly
190 char *copy = ap_varbuf_pdup(pool, &vb, NULL, 0,
192 tmp_b = apr_bucket_pool_create(copy, len, pool,
193 f->r->connection->bucket_alloc);
194 APR_BUCKET_INSERT_BEFORE(b, tmp_b);
195 apr_bucket_delete(b);
199 else if (script->regexp) {
201 const char *pos = buff;
203 apr_size_t space_left = AP_SUBST_MAX_LINE_LENGTH;
204 while (!ap_regexec_len(script->regexp, pos, left,
205 AP_MAX_REG_MATCH, regm, 0)) {
208 if (script->flatten && !force_quick) {
209 /* copy bytes before the match */
210 if (regm[0].rm_so > 0)
211 ap_varbuf_strmemcat(&vb, pos, regm[0].rm_so);
212 /* add replacement string */
213 rv = ap_varbuf_regsub(&vb, script->replacement, pos,
214 AP_MAX_REG_MATCH, regm,
215 AP_SUBST_MAX_LINE_LENGTH - vb.strlen);
216 if (rv != APR_SUCCESS)
221 rv = ap_pregsub_ex(pool, &repl,
222 script->replacement, pos,
223 AP_MAX_REG_MATCH, regm,
225 if (rv != APR_SUCCESS)
227 len = (apr_size_t) (regm[0].rm_eo - regm[0].rm_so);
228 repl_len = strlen(repl);
229 space_left -= len + repl_len;
230 SEDRMPATBCKT(b, regm[0].rm_so, tmp_b, len);
231 tmp_b = apr_bucket_transient_create(repl, repl_len,
232 f->r->connection->bucket_alloc);
233 APR_BUCKET_INSERT_BEFORE(b, tmp_b);
236 * reset to past what we just did. pos now maps to b
239 pos += regm[0].rm_eo;
240 left -= regm[0].rm_eo;
242 if (have_match && script->flatten && !force_quick) {
244 /* Copy result plus the part after the last match into
247 copy = ap_varbuf_pdup(pool, &vb, NULL, 0, pos, left,
249 tmp_b = apr_bucket_pool_create(copy, len, pool,
250 f->r->connection->bucket_alloc);
251 APR_BUCKET_INSERT_BEFORE(b, tmp_b);
252 apr_bucket_delete(b);
268 static apr_status_t substitute_filter(ap_filter_t *f, apr_bucket_brigade *bb)
274 const char *nl = NULL;
278 apr_bucket_brigade *tmp_bb = NULL;
281 substitute_module_ctx *ctx = f->ctx;
284 * First time around? Create the saved bb that we used for each pass
285 * through. Note that we can also get here when we explicitly clear ctx,
289 f->ctx = ctx = apr_pcalloc(f->r->pool, sizeof(*ctx));
291 * Create all the temporary brigades we need and reuse them to avoid
292 * creating them over and over again from r->pool which would cost a
293 * lot of memory in some cases.
295 ctx->linebb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
296 ctx->linesbb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
297 ctx->pattbb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
299 * Everything to be passed to the next filter goes in
300 * here, our pass brigade.
302 ctx->passbb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
303 /* Create our temporary pool only once */
304 apr_pool_create(&(ctx->tpool), f->r->pool);
305 apr_table_unset(f->r->headers_out, "Content-Length");
309 * Shortcircuit processing
311 if (APR_BRIGADE_EMPTY(bb))
315 * Here's the concept:
316 * Read in the data and look for newlines. Once we
317 * find a full "line", add it to our working brigade.
318 * If we've finished reading the brigade and we have
319 * any left over data (not a "full" line), store that
322 * Note: anything stored in ctx->linebb for sure does not have
323 * a newline char, so we don't concat that bb with the
324 * new bb, since we would spending time searching for the newline
325 * in data we know it doesn't exist. So instead, we simply scan
326 * our current bb and, if we see a newline, prepend ctx->linebb
327 * to the front of it. This makes the code much less straight-
328 * forward (otherwise we could APR_BRIGADE_CONCAT(ctx->linebb, bb)
329 * and just scan for newlines and not bother with needing to know
330 * when ctx->linebb needs to be reset) but also faster. We'll take
333 * Note: apr_brigade_split_line would be nice here, but we
334 * really can't use it since we need more control and we want
335 * to re-use already read bucket data.
337 * See mod_include if still confused :)
340 while ((b = APR_BRIGADE_FIRST(bb)) && (b != APR_BRIGADE_SENTINEL(bb))) {
341 if (APR_BUCKET_IS_EOS(b)) {
343 * if we see the EOS, then we need to pass along everything we
344 * have. But if the ctx->linebb isn't empty, then we need to add
345 * that to the end of what we'll be passing.
347 if (!APR_BRIGADE_EMPTY(ctx->linebb)) {
348 rv = apr_brigade_pflatten(ctx->linebb, &bflat,
349 &fbytes, ctx->tpool);
350 if (rv != APR_SUCCESS)
352 if (fbytes > AP_SUBST_MAX_LINE_LENGTH) {
356 tmp_b = apr_bucket_transient_create(bflat, fbytes,
357 f->r->connection->bucket_alloc);
358 rv = do_pattmatch(f, tmp_b, ctx->pattbb, ctx->tpool);
359 if (rv != APR_SUCCESS)
361 APR_BRIGADE_CONCAT(ctx->passbb, ctx->pattbb);
363 apr_brigade_cleanup(ctx->linebb);
364 APR_BUCKET_REMOVE(b);
365 APR_BRIGADE_INSERT_TAIL(ctx->passbb, b);
368 * No need to handle FLUSH buckets separately as we call
369 * ap_pass_brigade anyway at the end of the loop.
371 else if (APR_BUCKET_IS_METADATA(b)) {
372 APR_BUCKET_REMOVE(b);
373 APR_BRIGADE_INSERT_TAIL(ctx->passbb, b);
377 * We have actual "data" so read in as much as we can and start
378 * scanning and splitting from our read buffer
380 rv = apr_bucket_read(b, &buff, &bytes, APR_BLOCK_READ);
381 if (rv != APR_SUCCESS || bytes == 0) {
382 apr_bucket_delete(b);
387 nl = memchr(buff, APR_ASCII_LF, bytes);
389 len = (apr_size_t) (nl - buff) + 1;
390 /* split *after* the newline */
391 apr_bucket_split(b, len);
393 * We've likely read more data, so bypass rereading
394 * bucket data and continue scanning through this
400 * we need b to be updated for future potential
403 tmp_b = APR_BUCKET_NEXT(b);
404 APR_BUCKET_REMOVE(b);
406 * Hey, we found a newline! Don't forget the old
407 * stuff that needs to be added to the front. So we
408 * add the split bucket to the end, flatten the whole
409 * bb, morph the whole shebang into a bucket which is
410 * then added to the tail of the newline bb.
412 if (!APR_BRIGADE_EMPTY(ctx->linebb)) {
413 APR_BRIGADE_INSERT_TAIL(ctx->linebb, b);
414 rv = apr_brigade_pflatten(ctx->linebb, &bflat,
415 &fbytes, ctx->tpool);
416 if (rv != APR_SUCCESS)
418 if (fbytes > AP_SUBST_MAX_LINE_LENGTH) {
419 /* Avoid pflattening further lines, we will
420 * abort later on anyway.
425 b = apr_bucket_transient_create(bflat, fbytes,
426 f->r->connection->bucket_alloc);
427 apr_brigade_cleanup(ctx->linebb);
429 rv = do_pattmatch(f, b, ctx->pattbb, ctx->tpool);
430 if (rv != APR_SUCCESS)
433 * Count how many buckets we have in ctx->passbb
434 * so far. Yes, this is correct we count ctx->passbb
435 * and not ctx->pattbb as we do not reset num on every
438 for (b = APR_BRIGADE_FIRST(ctx->pattbb);
439 b != APR_BRIGADE_SENTINEL(ctx->pattbb);
440 b = APR_BUCKET_NEXT(b)) {
443 APR_BRIGADE_CONCAT(ctx->passbb, ctx->pattbb);
445 * If the number of buckets in ctx->passbb reaches an
446 * "insane" level, we consume much memory for all the
447 * buckets as such. So lets flush them down the chain
448 * in this case and thus clear ctx->passbb. This frees
449 * the buckets memory for further processing.
450 * Usually this condition should not become true, but
451 * it is a safety measure for edge cases.
453 if (num > AP_MAX_BUCKETS) {
454 b = apr_bucket_flush_create(
455 f->r->connection->bucket_alloc);
456 APR_BRIGADE_INSERT_TAIL(ctx->passbb, b);
457 rv = ap_pass_brigade(f->next, ctx->passbb);
458 apr_brigade_cleanup(ctx->passbb);
460 apr_pool_clear(ctx->tpool);
461 if (rv != APR_SUCCESS)
468 * no newline in whatever is left of this buffer so
469 * tuck data away and get next bucket
471 APR_BUCKET_REMOVE(b);
472 APR_BRIGADE_INSERT_TAIL(ctx->linebb, b);
478 if (!APR_BRIGADE_EMPTY(ctx->passbb)) {
479 rv = ap_pass_brigade(f->next, ctx->passbb);
480 apr_brigade_cleanup(ctx->passbb);
481 if (rv != APR_SUCCESS)
484 apr_pool_clear(ctx->tpool);
487 /* Anything left we want to save/setaside for the next go-around */
488 if (!APR_BRIGADE_EMPTY(ctx->linebb)) {
490 * Provide ap_save_brigade with an existing empty brigade
491 * (ctx->linesbb) to avoid creating a new one.
493 ap_save_brigade(f, &(ctx->linesbb), &(ctx->linebb), f->r->pool);
494 tmp_bb = ctx->linebb;
495 ctx->linebb = ctx->linesbb;
496 ctx->linesbb = tmp_bb;
501 if (rv == APR_ENOMEM)
502 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r, "Line too long, URI %s",
504 apr_pool_clear(ctx->tpool);
508 static const char *set_pattern(cmd_parms *cmd, void *cfg, const char *line)
515 subst_pattern_t *nscript;
519 ap_regex_t *r = NULL;
521 if (apr_tolower(*line) != 's') {
522 return "Bad Substitute format, must be an s/// pattern";
524 ourline = apr_pstrdup(cmd->pool, line);
529 if (*ourline != delim) {
530 while (*++ourline && *ourline != delim);
538 if (*ourline != delim) {
539 while (*++ourline && *ourline != delim);
547 if (!delim || !from || !*from || !to) {
548 return "Bad Substitute format, must be a complete s/// pattern";
553 delim = apr_tolower(*flags); /* re-use */
556 else if (delim == 'n')
558 else if (delim == 'f')
560 else if (delim == 'q')
563 return "Bad Substitute flag, only s///[infq] are supported";
568 /* first see if we can compile the regex */
570 r = ap_pregcomp(cmd->pool, from, AP_REG_EXTENDED |
571 (ignore_case ? AP_REG_ICASE : 0));
573 return "Substitute could not compile regex";
575 nscript = apr_array_push(((subst_dir_conf *) cfg)->patterns);
576 /* init the new entries */
577 nscript->pattern = NULL;
578 nscript->regexp = NULL;
579 nscript->replacement = NULL;
583 nscript->patlen = strlen(from);
584 nscript->pattern = apr_strmatch_precompile(cmd->pool, from,
591 nscript->replacement = to;
592 nscript->replen = strlen(to);
593 nscript->flatten = flatten;
598 #define PROTO_FLAGS AP_FILTER_PROTO_CHANGE|AP_FILTER_PROTO_CHANGE_LENGTH
599 static void register_hooks(apr_pool_t *pool)
601 ap_register_output_filter(substitute_filter_name, substitute_filter,
602 NULL, AP_FTYPE_RESOURCE);
605 static const command_rec substitute_cmds[] = {
606 AP_INIT_TAKE1("Substitute", set_pattern, NULL, OR_ALL,
607 "Pattern to filter the response content (s/foo/bar/[inf])"),
611 AP_DECLARE_MODULE(substitute) = {
612 STANDARD20_MODULE_STUFF,
613 create_substitute_dcfg, /* dir config creater */
614 merge_substitute_dcfg, /* dir merger --- default is to override */
615 NULL, /* server config */
616 NULL, /* merge server config */
617 substitute_cmds, /* command table */
618 register_hooks /* register hooks */