1 /* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements. See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * mod_substitute.c: Perform content rewriting on the fly
22 #include "http_config.h"
23 #include "http_core.h"
24 #include "apr_general.h"
25 #include "apr_strings.h"
26 #include "apr_strmatch.h"
28 #include "util_filter.h"
29 #include "apr_buckets.h"
30 #include "http_request.h"
31 #define APR_WANT_STRFUNC
34 static const char substitute_filter_name[] = "SUBSTITUTE";
36 module AP_MODULE_DECLARE_DATA substitute_module;
38 typedef struct subst_pattern_t {
39 const apr_strmatch_pattern *pattern;
40 const ap_regex_t *regexp;
41 const char *replacement;
48 apr_array_header_t *patterns;
52 apr_bucket_brigade *linebb;
53 apr_bucket_brigade *linesbb;
54 apr_bucket_brigade *passbb;
55 apr_bucket_brigade *pattbb;
57 } substitute_module_ctx;
59 static void *create_substitute_dcfg(apr_pool_t *p, char *d)
61 subst_dir_conf *dcfg =
62 (subst_dir_conf *) apr_pcalloc(p, sizeof(subst_dir_conf));
64 dcfg->patterns = apr_array_make(p, 10, sizeof(subst_pattern_t));
68 static void *merge_substitute_dcfg(apr_pool_t *p, void *basev, void *overv)
71 (subst_dir_conf *) apr_pcalloc(p, sizeof(subst_dir_conf));
72 subst_dir_conf *base = (subst_dir_conf *) basev;
73 subst_dir_conf *over = (subst_dir_conf *) overv;
75 a->patterns = apr_array_append(p, over->patterns,
80 #define AP_MAX_BUCKETS 1000
82 #define SEDSCAT(s1, s2, pool, buff, blen, repl) do { \
84 s1 = apr_pstrmemdup(pool, buff, blen); \
87 s2 = apr_pstrmemdup(pool, buff, blen); \
88 s1 = apr_pstrcat(pool, s1, s2, NULL); \
90 s1 = apr_pstrcat(pool, s1, repl, NULL); \
93 #define SEDRMPATBCKT(b, offset, tmp_b, patlen) do { \
94 apr_bucket_split(b, offset); \
95 tmp_b = APR_BUCKET_NEXT(b); \
96 apr_bucket_split(tmp_b, patlen); \
97 b = APR_BUCKET_NEXT(tmp_b); \
98 apr_bucket_delete(tmp_b); \
101 static void do_pattmatch(ap_filter_t *f, apr_bucket *inb,
102 apr_bucket_brigade *mybb,
103 apr_pool_t *tmp_pool)
107 ap_regmatch_t regm[AP_MAX_REG_MATCH];
121 subst_dir_conf *cfg =
122 (subst_dir_conf *) ap_get_module_config(f->r->per_dir_config,
124 subst_pattern_t *script;
126 APR_BRIGADE_INSERT_TAIL(mybb, inb);
128 script = (subst_pattern_t *) cfg->patterns->elts;
129 apr_pool_create(&tpool, tmp_pool);
133 * Simple optimization. If we only have one pattern, then
134 * we can safely avoid the overhead of flattening
136 if (cfg->patterns->nelts == 1) {
139 for (i = 0; i < cfg->patterns->nelts; i++) {
140 for (b = APR_BRIGADE_FIRST(mybb);
141 b != APR_BRIGADE_SENTINEL(mybb);
142 b = APR_BUCKET_NEXT(b)) {
143 if (APR_BUCKET_IS_METADATA(b)) {
145 * we should NEVER see this, because we should never
146 * be passed any, but "handle" it just in case.
150 if (apr_bucket_read(b, &buff, &bytes, APR_BLOCK_READ)
153 if (script->pattern) {
154 while ((repl = apr_strmatch(script->pattern, buff, bytes)))
156 /* get offset into buff for pattern */
157 len = (apr_size_t) (repl - buff);
158 if (script->flatten && !force_quick) {
160 * We are flattening the buckets here, meaning
161 * that we don't do the fast bucket splits.
162 * Instead we copy over what the buckets would
163 * contain and use them. This is slow, since we
164 * are constanting allocing space and copying
167 SEDSCAT(s1, s2, tmp_pool, buff, len,
168 script->replacement);
172 * We now split off the stuff before the regex
173 * as its own bucket, then isolate the pattern
176 SEDRMPATBCKT(b, len, tmp_b, script->patlen);
178 * Finally, we create a bucket that contains the
181 tmp_b = apr_bucket_transient_create(script->replacement,
183 f->r->connection->bucket_alloc);
184 /* ... and insert it */
185 APR_BUCKET_INSERT_BEFORE(b, tmp_b);
187 /* now we need to adjust buff for all these changes */
188 len += script->patlen;
192 if (script->flatten && s1 && !force_quick) {
194 * we've finished looking at the bucket, so remove the
195 * old one and add in our new one
197 s2 = apr_pstrmemdup(tmp_pool, buff, bytes);
198 s1 = apr_pstrcat(tmp_pool, s1, s2, NULL);
199 tmp_b = apr_bucket_transient_create(s1, strlen(s1),
200 f->r->connection->bucket_alloc);
201 APR_BUCKET_INSERT_BEFORE(b, tmp_b);
202 apr_bucket_delete(b);
207 else if (script->regexp) {
209 * we need a null terminated string here :(. To hopefully
210 * save time and memory, we don't alloc for each run
211 * through, but only if we need to have a larger chunk
212 * to save the string to. So we keep track of how much
213 * we've allocated and only re-alloc when we need it.
214 * NOTE: this screams for a macro.
216 if (!scratch || (bytes > (fbytes + 1))) {
218 scratch = apr_palloc(tpool, fbytes);
220 /* reset pointer to the scratch space */
222 memcpy(p, buff, bytes);
224 while (!ap_regexec(script->regexp, p,
225 AP_MAX_REG_MATCH, regm, 0)) {
226 /* first, grab the replacement string */
227 repl = ap_pregsub(tmp_pool, script->replacement, p,
228 AP_MAX_REG_MATCH, regm);
229 if (script->flatten && !force_quick) {
230 SEDSCAT(s1, s2, tmp_pool, p, regm[0].rm_so, repl);
233 len = (apr_size_t) (regm[0].rm_eo - regm[0].rm_so);
234 SEDRMPATBCKT(b, regm[0].rm_so, tmp_b, len);
235 tmp_b = apr_bucket_transient_create(repl,
237 f->r->connection->bucket_alloc);
238 APR_BUCKET_INSERT_BEFORE(b, tmp_b);
241 * reset to past what we just did. buff now maps to b
246 if (script->flatten && s1 && !force_quick) {
247 s1 = apr_pstrcat(tmp_pool, s1, p, NULL);
248 tmp_b = apr_bucket_transient_create(s1, strlen(s1),
249 f->r->connection->bucket_alloc);
250 APR_BUCKET_INSERT_BEFORE(b, tmp_b);
251 apr_bucket_delete(b);
265 apr_pool_destroy(tpool);
270 static apr_status_t substitute_filter(ap_filter_t *f, apr_bucket_brigade *bb)
276 const char *nl = NULL;
280 apr_bucket_brigade *tmp_bb = NULL;
283 substitute_module_ctx *ctx = f->ctx;
286 * First time around? Create the saved bb that we used for each pass
287 * through. Note that we can also get here when we explicitly clear ctx,
291 f->ctx = ctx = apr_pcalloc(f->r->pool, sizeof(*ctx));
293 * Create all the temporary brigades we need and reuse them to avoid
294 * creating them over and over again from r->pool which would cost a
295 * lot of memory in some cases.
297 ctx->linebb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
298 ctx->linesbb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
299 ctx->pattbb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
301 * Everything to be passed to the next filter goes in
302 * here, our pass brigade.
304 ctx->passbb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
305 /* Create our temporary pool only once */
306 apr_pool_create(&(ctx->tpool), f->r->pool);
307 apr_table_unset(f->r->headers_out, "Content-Length");
311 * Shortcircuit processing
313 if (APR_BRIGADE_EMPTY(bb))
317 * Here's the concept:
318 * Read in the data and look for newlines. Once we
319 * find a full "line", add it to our working brigade.
320 * If we've finished reading the brigade and we have
321 * any left over data (not a "full" line), store that
324 * Note: anything stored in ctx->linebb for sure does not have
325 * a newline char, so we don't concat that bb with the
326 * new bb, since we would spending time searching for the newline
327 * in data we know it doesn't exist. So instead, we simply scan
328 * our current bb and, if we see a newline, prepend ctx->linebb
329 * to the front of it. This makes the code much less straight-
330 * forward (otherwise we could APR_BRIGADE_CONCAT(ctx->linebb, bb)
331 * and just scan for newlines and not bother with needing to know
332 * when ctx->linebb needs to be reset) but also faster. We'll take
335 * Note: apr_brigade_split_line would be nice here, but we
336 * really can't use it since we need more control and we want
337 * to re-use already read bucket data.
339 * See mod_include if still confused :)
342 while ((b = APR_BRIGADE_FIRST(bb)) && (b != APR_BRIGADE_SENTINEL(bb))) {
343 if (APR_BUCKET_IS_EOS(b)) {
345 * if we see the EOS, then we need to pass along everything we
346 * have. But if the ctx->linebb isn't empty, then we need to add
347 * that to the end of what we'll be passing.
349 if (!APR_BRIGADE_EMPTY(ctx->linebb)) {
350 rv = apr_brigade_pflatten(ctx->linebb, &bflat,
351 &fbytes, ctx->tpool);
352 tmp_b = apr_bucket_transient_create(bflat, fbytes,
353 f->r->connection->bucket_alloc);
354 do_pattmatch(f, tmp_b, ctx->pattbb, ctx->tpool);
355 APR_BRIGADE_CONCAT(ctx->passbb, ctx->pattbb);
357 apr_brigade_cleanup(ctx->linebb);
358 APR_BUCKET_REMOVE(b);
359 APR_BRIGADE_INSERT_TAIL(ctx->passbb, b);
362 * No need to handle FLUSH buckets separately as we call
363 * ap_pass_brigade anyway at the end of the loop.
365 else if (APR_BUCKET_IS_METADATA(b)) {
366 APR_BUCKET_REMOVE(b);
367 APR_BRIGADE_INSERT_TAIL(ctx->passbb, b);
371 * We have actual "data" so read in as much as we can and start
372 * scanning and splitting from our read buffer
374 rv = apr_bucket_read(b, &buff, &bytes, APR_BLOCK_READ);
375 if (rv != APR_SUCCESS || bytes == 0) {
376 apr_bucket_delete(b);
381 nl = memchr(buff, APR_ASCII_LF, bytes);
383 len = (apr_size_t) (nl - buff) + 1;
384 /* split *after* the newline */
385 apr_bucket_split(b, len);
387 * We've likely read more data, so bypass rereading
388 * bucket data and continue scanning through this
394 * we need b to be updated for future potential
397 tmp_b = APR_BUCKET_NEXT(b);
398 APR_BUCKET_REMOVE(b);
400 * Hey, we found a newline! Don't forget the old
401 * stuff that needs to be added to the front. So we
402 * add the split bucket to the end, flatten the whole
403 * bb, morph the whole shebang into a bucket which is
404 * then added to the tail of the newline bb.
406 if (!APR_BRIGADE_EMPTY(ctx->linebb)) {
407 APR_BRIGADE_INSERT_TAIL(ctx->linebb, b);
408 rv = apr_brigade_pflatten(ctx->linebb, &bflat,
409 &fbytes, ctx->tpool);
410 b = apr_bucket_transient_create(bflat, fbytes,
411 f->r->connection->bucket_alloc);
412 apr_brigade_cleanup(ctx->linebb);
414 do_pattmatch(f, b, ctx->pattbb, ctx->tpool);
416 * Count how many buckets we have in ctx->passbb
417 * so far. Yes, this is correct we count ctx->passbb
418 * and not ctx->pattbb as we do not reset num on every
421 for (b = APR_BRIGADE_FIRST(ctx->pattbb);
422 b != APR_BRIGADE_SENTINEL(ctx->pattbb);
423 b = APR_BUCKET_NEXT(b)) {
426 APR_BRIGADE_CONCAT(ctx->passbb, ctx->pattbb);
428 * If the number of buckets in ctx->passbb reaches an
429 * "insane" level, we consume much memory for all the
430 * buckets as such. So lets flush them down the chain
431 * in this case and thus clear ctx->passbb. This frees
432 * the buckets memory for further processing.
433 * Usually this condition should not become true, but
434 * it is a safety measure for edge cases.
436 if (num > AP_MAX_BUCKETS) {
437 b = apr_bucket_flush_create(
438 f->r->connection->bucket_alloc);
439 APR_BRIGADE_INSERT_TAIL(ctx->passbb, b);
440 rv = ap_pass_brigade(f->next, ctx->passbb);
441 apr_brigade_cleanup(ctx->passbb);
443 apr_pool_clear(ctx->tpool);
444 if (rv != APR_SUCCESS)
451 * no newline in whatever is left of this buffer so
452 * tuck data away and get next bucket
454 APR_BUCKET_REMOVE(b);
455 APR_BRIGADE_INSERT_TAIL(ctx->linebb, b);
461 if (!APR_BRIGADE_EMPTY(ctx->passbb)) {
462 rv = ap_pass_brigade(f->next, ctx->passbb);
463 apr_brigade_cleanup(ctx->passbb);
464 if (rv != APR_SUCCESS) {
465 apr_pool_clear(ctx->tpool);
469 apr_pool_clear(ctx->tpool);
472 /* Anything left we want to save/setaside for the next go-around */
473 if (!APR_BRIGADE_EMPTY(ctx->linebb)) {
475 * Provide ap_save_brigade with an existing empty brigade
476 * (ctx->linesbb) to avoid creating a new one.
478 ap_save_brigade(f, &(ctx->linesbb), &(ctx->linebb), f->r->pool);
479 tmp_bb = ctx->linebb;
480 ctx->linebb = ctx->linesbb;
481 ctx->linesbb = tmp_bb;
487 static const char *set_pattern(cmd_parms *cmd, void *cfg, const char *line)
494 subst_pattern_t *nscript;
498 ap_regex_t *r = NULL;
500 if (apr_tolower(*line) != 's') {
501 return "Bad Substitute format, must be an s/// pattern";
503 ourline = apr_pstrdup(cmd->pool, line);
508 if (*ourline != delim) {
509 while (*++ourline && *ourline != delim);
517 if (*ourline != delim) {
518 while (*++ourline && *ourline != delim);
526 if (!delim || !from || !*from || !to) {
527 return "Bad Substitute format, must be a complete s/// pattern";
532 delim = apr_tolower(*flags); /* re-use */
535 else if (delim == 'n')
537 else if (delim == 'f')
539 else if (delim == 'q')
542 return "Bad Substitute flag, only s///[infq] are supported";
547 /* first see if we can compile the regex */
549 r = ap_pregcomp(cmd->pool, from, AP_REG_EXTENDED |
550 (ignore_case ? AP_REG_ICASE : 0));
552 return "Substitute could not compile regex";
554 nscript = apr_array_push(((subst_dir_conf *) cfg)->patterns);
555 /* init the new entries */
556 nscript->pattern = NULL;
557 nscript->regexp = NULL;
558 nscript->replacement = NULL;
562 nscript->patlen = strlen(from);
563 nscript->pattern = apr_strmatch_precompile(cmd->pool, from,
570 nscript->replacement = to;
571 nscript->replen = strlen(to);
572 nscript->flatten = flatten;
577 #define PROTO_FLAGS AP_FILTER_PROTO_CHANGE|AP_FILTER_PROTO_CHANGE_LENGTH
578 static void register_hooks(apr_pool_t *pool)
580 ap_register_output_filter(substitute_filter_name, substitute_filter,
581 NULL, AP_FTYPE_RESOURCE);
584 static const command_rec substitute_cmds[] = {
585 AP_INIT_TAKE1("Substitute", set_pattern, NULL, OR_ALL,
586 "Pattern to filter the response content (s/foo/bar/[inf])"),
590 AP_DECLARE_MODULE(substitute) = {
591 STANDARD20_MODULE_STUFF,
592 create_substitute_dcfg, /* dir config creater */
593 merge_substitute_dcfg, /* dir merger --- default is to override */
594 NULL, /* server config */
595 NULL, /* merge server config */
596 substitute_cmds, /* command table */
597 register_hooks /* register hooks */