From 1b4533436a3fa72058b1ec6a03dc67d3b0376ca3 Mon Sep 17 00:00:00 2001
From: Jim Jagielski
Date: Tue, 30 May 2017 12:19:58 +0000
Subject: [PATCH] Merge r1584417, r1585157 from trunk:
allow users to workaround the over-agressive backreference
escaping by selecting the characters to escape.
add BNP flag to give control to the user on whether a space ' ' in
an escaped backrefernece is decoded to a + (default) or %20. Useful
if your backreference isn't going into the query string.
Submitted by: covener
Reviewed by: jailletc36, covener, ylavic
git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/branches/2.4.x@1796850 13f79535-47bb-0310-9956-ffa450edef68
---
CHANGES | 8 ++++++
STATUS | 11 --------
docs/manual/rewrite/flags.xml | 20 ++++++++++++--
modules/mappers/mod_rewrite.c | 50 +++++++++++++++++++++++++++--------
4 files changed, 65 insertions(+), 24 deletions(-)
diff --git a/CHANGES b/CHANGES
index cd47e34291..cd68295754 100644
--- a/CHANGES
+++ b/CHANGES
@@ -14,6 +14,14 @@ Changes with Apache 2.4.26
*) Evaluate nested If/ElseIf/Else configuration blocks.
[Luca Toscano, Jacob Champion]
+ *) mod_rewrite: Add 'BNP' (backreferences-no-plus) flag to RewriteRule to
+ allow spaces in backreferences to be encoded as %20 instead of '+'.
+ [Eric Covener]
+
+ *) mod_rewrite: Add the possibility to limit the escaping to specific
+ characters in backreferences by listing them in the B flag.
+ [Eric Covener]
+
*) mod_substitute: Fix spurious AH01328 (Line too long) errors on EBCDIC
systems. [Eric Covener]
diff --git a/STATUS b/STATUS
index b1c98f8155..daad558602 100644
--- a/STATUS
+++ b/STATUS
@@ -120,17 +120,6 @@ RELEASE SHOWSTOPPERS:
PATCHES ACCEPTED TO BACKPORT FROM TRUNK:
[ start all new proposals below, under PATCHES PROPOSED. ]
- *) mod_rewrite: allow users to workaround the over-agressive backreference
- escaping by selecting the characters to escape
- mod_rewrite: add BNP flag (backrefnoplus)
- trunk patch: http://svn.apache.org/r1584417
- http://svn.apache.org/r1585157
- 2.4.x patch: http://home.apache.org/~jailletc36/BNP.diff
- (mod_rewrite.xml from r1584417 has already been eroneously merged
- with r1728060 + tweak taken from r1701545 partly backported in r1703403.
- I have also added an entry for r1584417)
- +1: jailletc36, covener, ylavic
-
*) mod_rewrite: When a substitution is a fully qualified URL, and the
scheme/host/port matches the current virtual host, stop interpreting the
path component as a local path just because the first component of the
diff --git a/docs/manual/rewrite/flags.xml b/docs/manual/rewrite/flags.xml
index a7b6748e83..1ec7ce553a 100644
--- a/docs/manual/rewrite/flags.xml
+++ b/docs/manual/rewrite/flags.xml
@@ -69,8 +69,11 @@ of how you might use them.
B (escape backreferences)
The [B] flag instructs RewriteRule to escape non-alphanumeric
-characters before applying the transformation.
-
+characters before applying the transformation.
+In 2.4.26 and later, you can limit the escaping to specific characters
+in backreferences by listing them: [B=#?;]
. Note: The space
+character can be used in the list of characters to escape, but it cannot be
+the last character in the list.
mod_rewrite
has to unescape URLs before mapping them,
so backreferences are unescaped at the time they are applied.
@@ -103,6 +106,19 @@ returns a 404 if it sees one.
This escaping is particularly necessary in a proxy situation,
when the backend may break if presented with an unescaped URL.
+An alternative to this flag is using a RewriteCond to capture against %{THE_REQUEST} which will capture
+strings in the encoded form.
+
+
+BNP|backrefnoplus (don't escape space to +)
+The [BNP] flag instructs RewriteRule to escape the space character
+in a backreference to %20 rather than '+'. Useful when the backreference
+will be used in the path component rather than the query string.
+
+This flag is available in version 2.4.26 and later.
+
C|chain
diff --git a/modules/mappers/mod_rewrite.c b/modules/mappers/mod_rewrite.c
index dcf7988ed0..d54ad8f4a8 100644
--- a/modules/mappers/mod_rewrite.c
+++ b/modules/mappers/mod_rewrite.c
@@ -166,6 +166,7 @@ static const char* really_last_key = "rewrite_really_last";
#define RULEFLAG_DISCARDPATHINFO (1<<15)
#define RULEFLAG_QSDISCARD (1<<16)
#define RULEFLAG_END (1<<17)
+#define RULEFLAG_ESCAPENOPLUS (1<<18)
#define RULEFLAG_QSLAST (1<<19)
/* return code of the rewrite rule
@@ -317,6 +318,7 @@ typedef struct {
data_item *cookie; /* added cookies */
int skip; /* number of next rules to skip */
int maxrounds; /* limit on number of loops with N flag */
+ char *escapes; /* specific backref escapes */
} rewriterule_entry;
typedef struct {
@@ -417,7 +419,7 @@ static const char *rewritemap_mutex_type = "rewrite-map";
/* Optional functions imported from mod_ssl when loaded: */
static APR_OPTIONAL_FN_TYPE(ssl_var_lookup) *rewrite_ssl_lookup = NULL;
static APR_OPTIONAL_FN_TYPE(ssl_is_https) *rewrite_is_https = NULL;
-static char *escape_uri(apr_pool_t *p, const char *path);
+static char *escape_backref(apr_pool_t *p, const char *path, const char *escapeme, int noplus);
/*
* +-------------------------------------------------------+
@@ -634,24 +636,44 @@ static APR_INLINE unsigned char *c2x(unsigned what, unsigned char prefix,
}
/*
- * Escapes a uri in a similar way as php's urlencode does.
+ * Escapes a backreference in a similar way as php's urlencode does.
* Based on ap_os_escape_path in server/util.c
*/
-static char *escape_uri(apr_pool_t *p, const char *path) {
+static char *escape_backref(apr_pool_t *p, const char *path, const char *escapeme, int noplus) {
char *copy = apr_palloc(p, 3 * strlen(path) + 3);
const unsigned char *s = (const unsigned char *)path;
unsigned char *d = (unsigned char *)copy;
unsigned c;
while ((c = *s)) {
- if (apr_isalnum(c) || c == '_') {
- *d++ = c;
- }
- else if (c == ' ') {
- *d++ = '+';
+ if (!escapeme) {
+ if (apr_isalnum(c) || c == '_') {
+ *d++ = c;
+ }
+ else if (c == ' ' && !noplus) {
+ *d++ = '+';
+ }
+ else {
+ d = c2x(c, '%', d);
+ }
}
- else {
- d = c2x(c, '%', d);
+ else {
+ const char *esc = escapeme;
+ while (*esc) {
+ if (c == *esc) {
+ if (c == ' ' && !noplus) {
+ *d++ = '+';
+ }
+ else {
+ d = c2x(c, '%', d);
+ }
+ break;
+ }
+ ++esc;
+ }
+ if (!*esc) {
+ *d++ = c;
+ }
}
++s;
}
@@ -2390,7 +2412,7 @@ static char *do_expand(char *input, rewrite_ctx *ctx, rewriterule_entry *entry)
/* escape the backreference */
char *tmp2, *tmp;
tmp = apr_pstrmemdup(pool, bri->source + bri->regmatch[n].rm_so, span);
- tmp2 = escape_uri(pool, tmp);
+ tmp2 = escape_backref(pool, tmp, entry->escapes, entry->flags & RULEFLAG_ESCAPENOPLUS);
rewritelog((ctx->r, 5, ctx->perdir, "escaping backreference '%s' to '%s'",
tmp, tmp2));
@@ -3446,6 +3468,12 @@ static const char *cmd_rewriterule_setflag(apr_pool_t *p, void *_cfg,
case 'B':
if (!*key || !strcasecmp(key, "ackrefescaping")) {
cfg->flags |= RULEFLAG_ESCAPEBACKREF;
+ if (val && *val) {
+ cfg->escapes = val;
+ }
+ }
+ else if (!strcasecmp(key, "NP") || !strcasecmp(key, "ackrefernoplus")) {
+ cfg->flags |= RULEFLAG_ESCAPENOPLUS;
}
else {
++error;
--
2.40.0