From: Nick Kew <niq@apache.org>
Date: Sat, 22 Sep 2007 00:10:10 +0000 (+0000)
Subject: * Rationalise the two ap_unescape_url versions
X-Git-Tag: 2.3.0~1403
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4c672a8d724058b7480626e0366f5601a7eac3bc;p=apache

* Rationalise the two ap_unescape_url versions
* Fix ap_unescape_url_keep2f to work as documented (keep %2F encoded,
  bringing it into line with the documentation as well as common sense).
* Introduce suggestion of an extension to the API
The most relevant open PR is 41798


git-svn-id: https://svn.apache.org/repos/asf/httpd/httpd/trunk@578332 13f79535-47bb-0310-9956-ffa450edef68
---

diff --git a/server/util.c b/server/util.c
index e1be3d1831..a7e74710fd 100644
--- a/server/util.c
+++ b/server/util.c
@@ -1542,54 +1542,15 @@ static char x2c(const char *what)
 }
 
 /*
- * Unescapes a URL.
+ * Unescapes a URL, leaving reserved characters intact.
  * Returns 0 on success, non-zero on error
  * Failure is due to
  *   bad % escape       returns HTTP_BAD_REQUEST
  *
- *   decoding %00 -> \0  (the null character)
- *   decoding %2f -> /   (a special character)
- *                      returns HTTP_NOT_FOUND
+ *   decoding %00 or a forbidden character returns HTTP_NOT_FOUND
  */
-AP_DECLARE(int) ap_unescape_url(char *url)
-{
-    register int badesc, badpath;
-    char *x, *y;
 
-    badesc = 0;
-    badpath = 0;
-    /* Initial scan for first '%'. Don't bother writing values before
-     * seeing a '%' */
-    y = strchr(url, '%');
-    if (y == NULL) {
-        return OK;
-    }
-    for (x = y; *y; ++x, ++y) {
-        if (*y != '%')
-            *x = *y;
-        else {
-            if (!apr_isxdigit(*(y + 1)) || !apr_isxdigit(*(y + 2))) {
-                badesc = 1;
-                *x = '%';
-            }
-            else {
-                *x = x2c(y + 1);
-                y += 2;
-                if (IS_SLASH(*x) || *x == '\0')
-                    badpath = 1;
-            }
-        }
-    }
-    *x = '\0';
-    if (badesc)
-        return HTTP_BAD_REQUEST;
-    else if (badpath)
-        return HTTP_NOT_FOUND;
-    else
-        return OK;
-}
-
-AP_DECLARE(int) ap_unescape_url_keep2f(char *url)
+static int unescape_url(char *url, const char *forbid, const char *reserved)
 {
     register int badesc, badpath;
     char *x, *y;
@@ -1614,9 +1575,15 @@ AP_DECLARE(int) ap_unescape_url_keep2f(char *url)
             else {
                 char decoded;
                 decoded = x2c(y + 1);
-                if (decoded == '\0') {
+                if ((decoded == '\0')
+                    || (forbid && ap_strchr_c(forbid, decoded))) {
                     badpath = 1;
                 }
+                else if (reserved && ap_strchr_c(reserved, decoded)) {
+                    *x++ = *y++;
+                    *x++ = *y++;
+                    *x = *y;
+                }
                 else {
                     *x = decoded;
                     y += 2;
@@ -1635,6 +1602,32 @@ AP_DECLARE(int) ap_unescape_url_keep2f(char *url)
         return OK;
     }
 }
+AP_DECLARE(int) ap_unescape_url(char *url)
+{
+    /* Traditional */
+    return unescape_url(url, "/", NULL);
+}
+AP_DECLARE(int) ap_unescape_url_keep2f(char *url)
+{
+    /* AllowEncodedSlashes (corrected) */
+    return unescape_url(url, NULL, "/");
+}
+#ifdef NEW_APIS
+/* IFDEF these out until they've been thought through.
+ * Just a germ of an API extension for now
+ */
+AP_DECLARE(int) ap_unescape_url_proxy(char *url)
+{
+    /* leave RFC1738 reserved characters intact, * so proxied URLs
+     * don't get mangled.  Where does that leave encoded '&' ?
+     */
+    return unescape_url(url, NULL, "/;?");
+}
+AP_DECLARE(int) ap_unescape_url_reserved(char *url, const char *reserved)
+{
+    return unescape_url(url, NULL, reserved);
+}
+#endif
 
 AP_DECLARE(char *) ap_construct_server(apr_pool_t *p, const char *hostname,
                                        apr_port_t port, const request_rec *r)