From a8148cedcd4cbff429cbe60e31ee084d236ccc43 Mon Sep 17 00:00:00 2001
From: Nick Mathewson <nickm@torproject.org>
Date: Fri, 8 Oct 2010 13:05:13 -0400
Subject: [PATCH] New evhttp_uri(encode|decode) functions to handle + and NUL
 characters right

The old evhttp_decode_uri() function would act as tough it was doing
an (illegal, undefined) decode operation on a whole URL at once, and
treat + characters following a ? as different from + characters
preceding one.  But that's not useful: If you are decoding a URI
before splitting off query parameters, you are begging to fail as soon
as somebody gives you a value with an encoded & in it.

The new evhttp_uridecode() function takes an argument that says
whether to decode + signs.  Both uridecode and uriencode also now
support encoding or decoding to strings with internal 0-valued
characters.
---
 http.c                |  75 ++++++++++++++++++++++++-------
 include/event2/http.h |  44 ++++++++++++++++--
 test/regress_http.c   | 101 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 202 insertions(+), 18 deletions(-)

diff --git a/http.c b/http.c
index a472c5be..03d67127 100644
--- a/http.c
+++ b/http.c
@@ -178,7 +178,7 @@ static void evhttp_read_cb(struct bufferevent *, void *);
 static void evhttp_write_cb(struct bufferevent *, void *);
 static void evhttp_error_cb(struct bufferevent *bufev, short what, void *arg);
 static int evhttp_decode_uri_internal(const char *uri, size_t length,
-    char *ret, int always_decode_plus);
+    char *ret, int decode_plus);
 
 #ifndef _EVENT_HAVE_STRSEP
 /* strsep replacement for platforms that lack it.  Only works if
@@ -2322,45 +2322,66 @@ static const char uri_chars[256] = {
  * The returned string must be freed by the caller.
  */
 char *
-evhttp_encode_uri(const char *uri)
+evhttp_uriencode(const char *uri, ev_ssize_t len, int space_as_plus)
 {
 	struct evbuffer *buf = evbuffer_new();
-	char *p;
+	const char *p, *end;
+	char *result;
 
 	if (buf == NULL)
 		return (NULL);
 
-	for (p = (char *)uri; *p != '\0'; p++) {
+	if (len >= 0)
+		end = uri+len;
+	else
+		end = uri+strlen(uri);
+
+	for (p = uri; p < end; p++) {
 		if (uri_chars[(unsigned char)(*p)]) {
 			evbuffer_add(buf, p, 1);
+		} else if (*p == ' ' && space_as_plus) {
+			evbuffer_add(buf, "+", 1);
 		} else {
 			evbuffer_add_printf(buf, "%%%02X", (unsigned char)(*p));
 		}
 	}
-	evbuffer_add(buf, "", 1);
-	p = mm_strdup((char*)evbuffer_pullup(buf, -1));
+	evbuffer_add(buf, "", 1); /* NUL-terminator. */
+	result = mm_malloc(evbuffer_get_length(buf));
+	if (!result)
+		return NULL;
+	evbuffer_remove(buf, result, evbuffer_get_length(buf));
 	evbuffer_free(buf);
 
-	return (p);
+	return (result);
+}
+
+char *
+evhttp_encode_uri(const char *str)
+{
+	return evhttp_uriencode(str, -1, 0);
 }
 
 /*
- * @param always_decode_plus: when true we transform plus to space even
- *     if we have not seen a ?.
+ * @param decode_plus_ctl: if 1, we decode plus into space.  If 0, we don't.
+ *     If -1, when true we transform plus to space only after we've seen
+ *     a ?.  -1 is deprecated.
+ * @return the number of bytes written to 'ret'.
  */
 static int
 evhttp_decode_uri_internal(
-	const char *uri, size_t length, char *ret, int always_decode_plus)
+	const char *uri, size_t length, char *ret, int decode_plus_ctl)
 {
 	char c;
-	int j, in_query = always_decode_plus;
+	int j;
+	int decode_plus = (decode_plus_ctl == 1) ? 1: 0;
 	unsigned i;
 
 	for (i = j = 0; i < length; i++) {
 		c = uri[i];
 		if (c == '?') {
-			in_query = 1;
-		} else if (c == '+' && in_query) {
+			if (decode_plus_ctl < 0)
+				decode_plus = 1;
+		} else if (c == '+' && decode_plus) {
 			c = ' ';
 		} else if (c == '%' && EVUTIL_ISXDIGIT(uri[i+1]) &&
 		    EVUTIL_ISXDIGIT(uri[i+2])) {
@@ -2378,6 +2399,7 @@ evhttp_decode_uri_internal(
 	return (j);
 }
 
+/* deprecated */
 char *
 evhttp_decode_uri(const char *uri)
 {
@@ -2390,7 +2412,30 @@ evhttp_decode_uri(const char *uri)
 	}
 
 	evhttp_decode_uri_internal(uri, strlen(uri),
-	    ret, 0 /*always_decode_plus*/);
+	    ret, -1 /*always_decode_plus*/);
+
+	return (ret);
+}
+
+char *
+evhttp_uridecode(const char *uri, int decode_plus, size_t *size_out)
+{
+	char *ret;
+	int n;
+
+	if ((ret = mm_malloc(strlen(uri) + 1)) == NULL) {
+		event_warn("%s: malloc(%lu)", __func__,
+			  (unsigned long)(strlen(uri) + 1));
+		return (NULL);
+	}
+
+	n = evhttp_decode_uri_internal(uri, strlen(uri),
+	    ret, !!decode_plus/*always_decode_plus*/);
+
+	if (size_out) {
+		EVUTIL_ASSERT(n >= 0);
+		*size_out = (size_t)n;
+	}
 
 	return (ret);
 }
@@ -2485,7 +2530,7 @@ evhttp_dispatch_callback(struct httpcbq *callbacks, struct evhttp_request *req)
 	if ((translated = mm_malloc(offset + 1)) == NULL)
 		return (NULL);
 	offset = evhttp_decode_uri_internal(req->uri, offset,
-	    translated, 0 /* always_decode_plus */);
+	    translated, 0 /* decode_plus */);
 
 	TAILQ_FOREACH(cb, callbacks, next) {
 		int res = 0;
diff --git a/include/event2/http.h b/include/event2/http.h
index a8775aa1..3f84cfa5 100644
--- a/include/event2/http.h
+++ b/include/event2/http.h
@@ -507,7 +507,7 @@ void evhttp_clear_headers(struct evkeyvalq *headers);
 
 /**
    Helper function to encode a string for inclusion in a URI.  All
-   characters are replaced by their hex-escaped (%00) equivalents,
+   characters are replaced by their hex-escaped (%22) equivalents,
    except for characters explicitly unreserved by RFC3986 -- that is,
    ASCII alphanumeric characters, hyphen, dot, underscore, and tilde.
 
@@ -518,17 +518,55 @@ void evhttp_clear_headers(struct evkeyvalq *headers);
  */
 char *evhttp_encode_uri(const char *str);
 
+/**
+   As evhttp_encode_uri, but if 'size' is nonnegative, treat the string
+   as being 'size' bytes long.  This allows you to encode strings that
+   may contain 0-valued bytes.
+
+   The returned string must be freed by the caller.
+
+   @param str an unencoded string
+   @param size the length of the string to encode, or -1 if the string
+      is NUL-terminated
+   @param space_to_plus if true, space characters in 'str' are encoded
+      as +, not %20.
+   @return a newly allocate URI-encoded string, or NULL on failure.
+ */
+char *evhttp_uriencode(const char *str, ev_ssize_t size, int space_to_plus);
 
 /**
-  Helper function to decode a URI.
+  Helper function to sort of decode a URI-encoded string.  Unlike
+  evhttp_get_decoded_uri, it decodes all plus characters that appear
+  _after_ the first question mark character, but no plusses that occur
+  before.  This is not a good way to decode URIs in whole or in part.
 
-  The returned string must be freed by the caller.
+  The returned string must be freed by the caller
+
+  @deprecated  This function is deprecated; you probably want to use
+     evhttp_get_decoded_uri instead.
 
   @param uri an encoded URI
   @return a newly allocated unencoded URI or NULL on failure
  */
 char *evhttp_decode_uri(const char *uri);
 
+/**
+  Helper function to decode a URI-escaped string or HTTP parameter.
+
+  If 'decode_plus' is 1, then we decode the string as an HTTP parameter
+  value, and convert all plus ('+') characters to spaces.  If
+  'decode_plus' is 0, we leave all plus characters unchanged.
+
+  The returned string must be freed by the caller.
+
+  @param uri a URI-encode encoded URI
+  @param decode_plus determines whether we convert '+' to sapce.
+  @param size_out if size_out is not NULL, *size_out is set to the size of the
+     returned string
+  @return a newly allocated unencoded URI or NULL on failure
+ */
+char *evhttp_uridecode(const char *uri, int decode_plus,
+    size_t *size_out);
 
 /**
    Helper function to parse out arguments in a query.
diff --git a/test/regress_http.c b/test/regress_http.c
index 2554b7dd..e94e8229 100644
--- a/test/regress_http.c
+++ b/test/regress_http.c
@@ -1707,6 +1707,106 @@ end:
 	evhttp_clear_headers(&headers);
 }
 
+static void
+http_uriencode_test(void *ptr)
+{
+	char *s=NULL, *s2=NULL;
+	size_t sz;
+
+#define ENC(from,want,plus) do {				\
+		s = evhttp_uriencode((from), -1, (plus));	\
+		tt_assert(s);					\
+		tt_str_op(s,==,(want));				\
+		sz = -1;					\
+		s2 = evhttp_uridecode((s), (plus), &sz);	\
+		tt_assert(s2);					\
+		tt_str_op(s2,==,(from));			\
+		tt_int_op(sz,==,strlen(from));			\
+		free(s);					\
+		free(s2);					\
+		s = s2 = NULL;					\
+	} while (0)
+
+#define DEC(from,want,dp) do {					\
+		s = evhttp_uridecode((from),(dp),&sz);		\
+		tt_assert(s);					\
+		tt_str_op(s,==,(want));				\
+		tt_int_op(sz,==,strlen(want));			\
+		free(s);					\
+		s = NULL;					\
+	} while (0)
+
+#define OLD_DEC(from,want)  do {				\
+		s = evhttp_decode_uri((from));			\
+		tt_assert(s);					\
+		tt_str_op(s,==,(want));				\
+		free(s);					\
+		s = NULL;					\
+	} while (0)
+
+
+      	ENC("Hello", "Hello",0);
+	ENC("99", "99",0);
+	ENC("", "",0);
+	ENC(
+	 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ123456789-.~_",
+	 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ123456789-.~_",0);
+	ENC(" ", "%20",0);
+	ENC(" ", "+",1);
+	ENC("\xff\xf0\xe0", "%FF%F0%E0",0);
+	ENC("\x01\x19", "%01%19",1);
+	ENC("http://www.ietf.org/rfc/rfc3986.txt",
+	    "http%3A%2F%2Fwww.ietf.org%2Frfc%2Frfc3986.txt",1);
+
+	ENC("1+2=3", "1%2B2%3D3",1);
+	ENC("1+2=3", "1%2B2%3D3",0);
+
+	/* Now try encoding with internal NULs. */
+	s = evhttp_uriencode("hello\0world", 11, 0);
+	tt_assert(s);
+	tt_str_op(s,==,"hello%00world");
+	free(s);
+	s = NULL;
+
+	/* Now try out some decoding cases that we don't generate with
+	 * encode_uri: Make sure that malformed stuff doesn't crash... */
+	DEC("%%xhello th+ere \xff",
+	    "%%xhello th+ere \xff", 0);
+	/* Make sure plus decoding works */
+	DEC("plus+should%20work+", "plus should work ",1);
+	/* Try some lowercase hex */
+	DEC("%f0%a0%b0", "\xf0\xa0\xb0",1);
+
+	/* Try an internal NUL. */
+	sz = 0;
+	s = evhttp_uridecode("%00%00x%00%00", 1, &sz);
+	tt_int_op(sz,==,5);
+	tt_assert(!memcmp(s, "\0\0x\0\0", 5));
+	free(s);
+	s = NULL;
+
+	/* Try with size == NULL */
+	sz = 0;
+	s = evhttp_uridecode("%00%00x%00%00", 1, NULL);
+	tt_assert(!memcmp(s, "\0\0x\0\0", 5));
+	free(s);
+	s = NULL;
+
+	/* Test out the crazy old behavior of the deprecated
+	 * evhttp_decode_uri */
+	OLD_DEC("http://example.com/normal+path/?key=val+with+spaces",
+	        "http://example.com/normal+path/?key=val with spaces");
+
+end:
+	if (s)
+		free(s);
+	if (s2)
+		free(s2);
+#undef ENC
+#undef DEC
+#undef OLD_DEC
+}
+
 static void
 http_base_test(void *ptr)
 {
@@ -2701,6 +2801,7 @@ struct testcase_t http_testcases[] = {
 	{ "base", http_base_test, TT_FORK|TT_NEED_BASE, NULL, NULL },
 	{ "bad_headers", http_bad_header_test, 0, NULL, NULL },
 	{ "parse_query", http_parse_query_test, 0, NULL, NULL },
+	{ "uriencode", http_uriencode_test, 0, NULL, NULL },
 	HTTP_LEGACY(basic),
 	HTTP_LEGACY(cancel),
 	HTTP_LEGACY(virtual_host),
-- 
2.40.0