From 1b4533436a3fa72058b1ec6a03dc67d3b0376ca3 Mon Sep 17 00:00:00 2001 From: Jim Jagielski Date: Tue, 30 May 2017 12:19:58 +0000 Subject: [PATCH] Merge r1584417, r1585157 from trunk: allow users to workaround the over-agressive backreference escaping by selecting the characters to escape. add BNP flag to give control to the user on whether a space ' ' in an escaped backrefernece is decoded to a + (default) or %20. Useful if your backreference isn't going into the query string. Submitted by: covener Reviewed by: jailletc36, covener, ylavic git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/branches/2.4.x@1796850 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES | 8 ++++++ STATUS | 11 -------- docs/manual/rewrite/flags.xml | 20 ++++++++++++-- modules/mappers/mod_rewrite.c | 50 +++++++++++++++++++++++++++-------- 4 files changed, 65 insertions(+), 24 deletions(-) diff --git a/CHANGES b/CHANGES index cd47e34291..cd68295754 100644 --- a/CHANGES +++ b/CHANGES @@ -14,6 +14,14 @@ Changes with Apache 2.4.26 *) Evaluate nested If/ElseIf/Else configuration blocks. [Luca Toscano, Jacob Champion] + *) mod_rewrite: Add 'BNP' (backreferences-no-plus) flag to RewriteRule to + allow spaces in backreferences to be encoded as %20 instead of '+'. + [Eric Covener] + + *) mod_rewrite: Add the possibility to limit the escaping to specific + characters in backreferences by listing them in the B flag. + [Eric Covener] + *) mod_substitute: Fix spurious AH01328 (Line too long) errors on EBCDIC systems. [Eric Covener] diff --git a/STATUS b/STATUS index b1c98f8155..daad558602 100644 --- a/STATUS +++ b/STATUS @@ -120,17 +120,6 @@ RELEASE SHOWSTOPPERS: PATCHES ACCEPTED TO BACKPORT FROM TRUNK: [ start all new proposals below, under PATCHES PROPOSED. ] - *) mod_rewrite: allow users to workaround the over-agressive backreference - escaping by selecting the characters to escape - mod_rewrite: add BNP flag (backrefnoplus) - trunk patch: http://svn.apache.org/r1584417 - http://svn.apache.org/r1585157 - 2.4.x patch: http://home.apache.org/~jailletc36/BNP.diff - (mod_rewrite.xml from r1584417 has already been eroneously merged - with r1728060 + tweak taken from r1701545 partly backported in r1703403. - I have also added an entry for r1584417) - +1: jailletc36, covener, ylavic - *) mod_rewrite: When a substitution is a fully qualified URL, and the scheme/host/port matches the current virtual host, stop interpreting the path component as a local path just because the first component of the diff --git a/docs/manual/rewrite/flags.xml b/docs/manual/rewrite/flags.xml index a7b6748e83..1ec7ce553a 100644 --- a/docs/manual/rewrite/flags.xml +++ b/docs/manual/rewrite/flags.xml @@ -69,8 +69,11 @@ of how you might use them.

B (escape backreferences)

The [B] flag instructs RewriteRule to escape non-alphanumeric -characters before applying the transformation. -

+characters before applying the transformation.

+

In 2.4.26 and later, you can limit the escaping to specific characters +in backreferences by listing them: [B=#?;]. Note: The space +character can be used in the list of characters to escape, but it cannot be +the last character in the list.

mod_rewrite has to unescape URLs before mapping them, so backreferences are unescaped at the time they are applied. @@ -103,6 +106,19 @@ returns a 404 if it sees one.

This escaping is particularly necessary in a proxy situation, when the backend may break if presented with an unescaped URL.

+

An alternative to this flag is using a RewriteCond to capture against %{THE_REQUEST} which will capture +strings in the encoded form.

+
+ +
BNP|backrefnoplus (don't escape space to +) +

The [BNP] flag instructs RewriteRule to escape the space character +in a backreference to %20 rather than '+'. Useful when the backreference +will be used in the path component rather than the query string.

+ +

This flag is available in version 2.4.26 and later.

+
C|chain diff --git a/modules/mappers/mod_rewrite.c b/modules/mappers/mod_rewrite.c index dcf7988ed0..d54ad8f4a8 100644 --- a/modules/mappers/mod_rewrite.c +++ b/modules/mappers/mod_rewrite.c @@ -166,6 +166,7 @@ static const char* really_last_key = "rewrite_really_last"; #define RULEFLAG_DISCARDPATHINFO (1<<15) #define RULEFLAG_QSDISCARD (1<<16) #define RULEFLAG_END (1<<17) +#define RULEFLAG_ESCAPENOPLUS (1<<18) #define RULEFLAG_QSLAST (1<<19) /* return code of the rewrite rule @@ -317,6 +318,7 @@ typedef struct { data_item *cookie; /* added cookies */ int skip; /* number of next rules to skip */ int maxrounds; /* limit on number of loops with N flag */ + char *escapes; /* specific backref escapes */ } rewriterule_entry; typedef struct { @@ -417,7 +419,7 @@ static const char *rewritemap_mutex_type = "rewrite-map"; /* Optional functions imported from mod_ssl when loaded: */ static APR_OPTIONAL_FN_TYPE(ssl_var_lookup) *rewrite_ssl_lookup = NULL; static APR_OPTIONAL_FN_TYPE(ssl_is_https) *rewrite_is_https = NULL; -static char *escape_uri(apr_pool_t *p, const char *path); +static char *escape_backref(apr_pool_t *p, const char *path, const char *escapeme, int noplus); /* * +-------------------------------------------------------+ @@ -634,24 +636,44 @@ static APR_INLINE unsigned char *c2x(unsigned what, unsigned char prefix, } /* - * Escapes a uri in a similar way as php's urlencode does. + * Escapes a backreference in a similar way as php's urlencode does. * Based on ap_os_escape_path in server/util.c */ -static char *escape_uri(apr_pool_t *p, const char *path) { +static char *escape_backref(apr_pool_t *p, const char *path, const char *escapeme, int noplus) { char *copy = apr_palloc(p, 3 * strlen(path) + 3); const unsigned char *s = (const unsigned char *)path; unsigned char *d = (unsigned char *)copy; unsigned c; while ((c = *s)) { - if (apr_isalnum(c) || c == '_') { - *d++ = c; - } - else if (c == ' ') { - *d++ = '+'; + if (!escapeme) { + if (apr_isalnum(c) || c == '_') { + *d++ = c; + } + else if (c == ' ' && !noplus) { + *d++ = '+'; + } + else { + d = c2x(c, '%', d); + } } - else { - d = c2x(c, '%', d); + else { + const char *esc = escapeme; + while (*esc) { + if (c == *esc) { + if (c == ' ' && !noplus) { + *d++ = '+'; + } + else { + d = c2x(c, '%', d); + } + break; + } + ++esc; + } + if (!*esc) { + *d++ = c; + } } ++s; } @@ -2390,7 +2412,7 @@ static char *do_expand(char *input, rewrite_ctx *ctx, rewriterule_entry *entry) /* escape the backreference */ char *tmp2, *tmp; tmp = apr_pstrmemdup(pool, bri->source + bri->regmatch[n].rm_so, span); - tmp2 = escape_uri(pool, tmp); + tmp2 = escape_backref(pool, tmp, entry->escapes, entry->flags & RULEFLAG_ESCAPENOPLUS); rewritelog((ctx->r, 5, ctx->perdir, "escaping backreference '%s' to '%s'", tmp, tmp2)); @@ -3446,6 +3468,12 @@ static const char *cmd_rewriterule_setflag(apr_pool_t *p, void *_cfg, case 'B': if (!*key || !strcasecmp(key, "ackrefescaping")) { cfg->flags |= RULEFLAG_ESCAPEBACKREF; + if (val && *val) { + cfg->escapes = val; + } + } + else if (!strcasecmp(key, "NP") || !strcasecmp(key, "ackrefernoplus")) { + cfg->flags |= RULEFLAG_ESCAPENOPLUS; } else { ++error; -- 2.40.0