From: Nick Mathewson Date: Sun, 13 Feb 2011 05:41:22 +0000 (-0500) Subject: Make URI parser able to tolerate nonconformant URIs. X-Git-Tag: release-2.0.11-stable~42^2 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=95060b54fe9dba5ec5e22bee3454a1e83b888d48;p=libevent Make URI parser able to tolerate nonconformant URIs. If the EVHTTP_URI_NONCONFORMANT flag is passed in (which it is when parsing URIs we get over the wire), then we relax our checks a lot. Specifically, we do nothing to check for correct characters in the path, query, and fragment parts of such a URI. We could do much more here: we could relax our hostname requirements, deal with spaces differently/better, trap some errors but not others, etc. But this should solve the worst user-agent compatibility issues for now; the other issues can wait for a later release. --- diff --git a/http.c b/http.c index 53b58b41..8fb6010a 100644 --- a/http.c +++ b/http.c @@ -1472,7 +1472,8 @@ evhttp_parse_request_line(struct evhttp_request *req, char *line) return (-1); } - if ((req->uri_elems = evhttp_uri_parse(req->uri)) == NULL) { + if ((req->uri_elems = evhttp_uri_parse_with_flags(req->uri, + EVHTTP_URI_NONCONFORMANT)) == NULL) { return -1; } @@ -3777,6 +3778,7 @@ bind_socket(const char *address, ev_uint16_t port, int reuse) } struct evhttp_uri { + unsigned flags; char *scheme; /* scheme; e.g http, ftp etc */ char *userinfo; /* userinfo (typically username:pass), or NULL */ char *host; /* hostname, IP address, or NULL */ @@ -3795,7 +3797,13 @@ evhttp_uri_new(void) return uri; } -/* Return true of the string starting at s and ending immediately before eos +void +evhttp_uri_set_flags(struct evhttp_uri *uri, unsigned flags) +{ + uri->flags = flags; +} + +/* Return true if the string starting at s and ending immediately before eos * is a valid URI scheme according to RFC3986 */ static int @@ -3987,13 +3995,41 @@ end_of_authority(char *cp) return cp; } +enum uri_part { + PART_PATH, + PART_QUERY, + PART_FRAGMENT +}; + /* Return the character after the longest prefix of 'cp' that matches... * *pchar / "/" if allow_qchars is false, or - * *(pchar / "/" / "?") if allow_chars is true. + * *(pchar / "/" / "?") if allow_qchars is true. */ static char * -end_of_path(char *cp, int allow_qchars) +end_of_path(char *cp, enum uri_part part, unsigned flags) { + if (flags & EVHTTP_URI_NONCONFORMANT) { + /* If NONCONFORMANT: + * Path is everything up to a # or ? or nul. + * Query is everything up a # or nul + * Fragment is everything up to a nul. + */ + switch (part) { + case PART_PATH: + while (*cp && *cp != '#' && *cp != '?') + ++cp; + break; + case PART_QUERY: + while (*cp && *cp != '#') + ++cp; + break; + case PART_FRAGMENT: + cp += strlen(cp); + break; + }; + return cp; + } + while (*cp) { if (CHAR_IS_UNRESERVED(*cp) || strchr(SUBDELIMS, *cp) || @@ -4002,7 +4038,7 @@ end_of_path(char *cp, int allow_qchars) else if (*cp == '%' && EVUTIL_ISXDIGIT(cp[1]) && EVUTIL_ISXDIGIT(cp[2])) cp += 3; - else if (*cp == '?' && allow_qchars) + else if (*cp == '?' && part != PART_PATH) ++cp; else return cp; @@ -4025,6 +4061,12 @@ path_matches_noscheme(const char *cp) struct evhttp_uri * evhttp_uri_parse(const char *source_uri) +{ + return evhttp_uri_parse_with_flags(source_uri, 0); +} + +struct evhttp_uri * +evhttp_uri_parse_with_flags(const char *source_uri, unsigned flags) { char *readbuf = NULL, *readp = NULL, *token = NULL, *query = NULL; char *path = NULL, *fragment = NULL; @@ -4036,6 +4078,7 @@ evhttp_uri_parse(const char *source_uri) goto err; } uri->port = -1; + uri->flags = flags; readbuf = mm_strdup(source_uri); if (readbuf == NULL) { @@ -4052,7 +4095,6 @@ evhttp_uri_parse(const char *source_uri) URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] relative-ref = relative-part [ "?" query ] [ "#" fragment ] - */ /* 1. scheme: */ @@ -4082,21 +4124,21 @@ evhttp_uri_parse(const char *source_uri) /* 3. Query: path-abempty, path-absolute, path-rootless, or path-empty */ path = readp; - readp = end_of_path(path, 0); + readp = end_of_path(path, PART_PATH, flags); /* Query */ if (*readp == '?') { *readp = '\0'; ++readp; query = readp; - readp = end_of_path(readp, 1); + readp = end_of_path(readp, PART_QUERY, flags); } /* fragment */ if (*readp == '#') { *readp = '\0'; ++readp; fragment = readp; - readp = end_of_path(readp, 1); + readp = end_of_path(readp, PART_FRAGMENT, flags); } if (*readp != '\0') { goto err; @@ -4324,12 +4366,13 @@ evhttp_uri_set_port(struct evhttp_uri *uri, int port) uri->port = port; return 0; } -#define end_of_cpath(cp,aq) ((const char*)(end_of_path(((char*)(cp)), (aq)))) +#define end_of_cpath(cp,p,f) \ + ((const char*)(end_of_path(((char*)(cp)), (p), (f)))) int evhttp_uri_set_path(struct evhttp_uri *uri, const char *path) { - if (path && end_of_cpath(path, 0) != path+strlen(path)) + if (path && end_of_cpath(path, PART_PATH, uri->flags) != path+strlen(path)) return -1; _URI_SET_STR(path); @@ -4338,7 +4381,7 @@ evhttp_uri_set_path(struct evhttp_uri *uri, const char *path) int evhttp_uri_set_query(struct evhttp_uri *uri, const char *query) { - if (query && end_of_cpath(query, 1) != query+strlen(query)) + if (query && end_of_cpath(query, PART_QUERY, uri->flags) != query+strlen(query)) return -1; _URI_SET_STR(query); return 0; @@ -4346,7 +4389,7 @@ evhttp_uri_set_query(struct evhttp_uri *uri, const char *query) int evhttp_uri_set_fragment(struct evhttp_uri *uri, const char *fragment) { - if (fragment && end_of_cpath(fragment, 1) != fragment+strlen(fragment)) + if (fragment && end_of_cpath(fragment, PART_FRAGMENT, uri->flags) != fragment+strlen(fragment)) return -1; _URI_SET_STR(fragment); return 0; diff --git a/include/event2/http.h b/include/event2/http.h index e05c3bff..84efd6b0 100644 --- a/include/event2/http.h +++ b/include/event2/http.h @@ -707,6 +707,12 @@ char *evhttp_htmlescape(const char *html); */ struct evhttp_uri *evhttp_uri_new(void); +/** + * Changes the flags set on a given URI. See EVHTTP_URI_* for + * a list of flags. + **/ +void evhttp_uri_set_flags(struct evhttp_uri *uri, unsigned flags); + /** Return the scheme of an evhttp_uri, or NULL if there is no scheme has * been set and the evhttp_uri contains a Relative-Ref. */ const char *evhttp_uri_get_scheme(const struct evhttp_uri *uri); @@ -792,9 +798,29 @@ int evhttp_uri_set_fragment(struct evhttp_uri *uri, const char *fragment); * accepts all of them as valid. * * @param source_uri the request URI + * @param flags Zero or more EVHTTP_URI_* flags to affect the behavior + * of the parser. * @return uri container to hold parsed data, or NULL if there is error * @see evhttp_uri_free() */ +struct evhttp_uri *evhttp_uri_parse_with_flags(const char *source_uri, + unsigned flags); + +/** Tolerate URIs that do not conform to RFC3986. + * + * Unfortunately, some HTTP clients generate URIs that, according to RFC3986, + * are not conformant URIs. If you need to support these URIs, you can + * do so by passing this flag to evhttp_uri_parse_with_flags. + * + * Currently, these changes are: + * + */ +#define EVHTTP_URI_NONCONFORMANT 0x01 + +/** Alias for evhttp_uri_parse_with_flags(source_uri, 0) */ struct evhttp_uri *evhttp_uri_parse(const char *source_uri); /** @@ -817,7 +843,7 @@ void evhttp_uri_free(struct evhttp_uri *uri); * @param buf destination buffer * @param limit destination buffer size * @return an joined uri as string or NULL on error - @see evhttp_uri_parse() + * @see evhttp_uri_parse() */ char *evhttp_uri_join(struct evhttp_uri *uri, char *buf, size_t limit); diff --git a/test/regress_http.c b/test/regress_http.c index 4b894acc..a7a14301 100644 --- a/test/regress_http.c +++ b/test/regress_http.c @@ -1990,8 +1990,13 @@ end: static void http_parse_uri_test(void *ptr) { + const int nonconform = (ptr != NULL); + const unsigned parse_flags = + nonconform ? EVHTTP_URI_NONCONFORMANT : 0; struct evhttp_uri *uri = NULL; char url_tmp[4096]; +#define URI_PARSE(uri) \ + evhttp_uri_parse_with_flags((uri), parse_flags) #define TT_URI(want) do { \ char *ret = evhttp_uri_join(uri, url_tmp, sizeof(url_tmp)); \ @@ -2007,21 +2012,37 @@ http_parse_uri_test(void *ptr) /* bad URIs: parsing */ #define BAD(s) do { \ - if (evhttp_uri_parse(s) != NULL) \ + if (URI_PARSE(s) != NULL) \ TT_FAIL(("Expected error parsing \"%s\"",s)); \ } while(0) - BAD("http://www.test.com/ why hello"); - BAD("http://www.test.com/why-hello\x01"); - BAD("http://www.test.com/why-hello?\x01"); - BAD("http://www.test.com/why-hello#\x01"); + /* Nonconformant URIs we can parse: parsing */ +#define NCF(s) do { \ + uri = URI_PARSE(s); \ + if (uri != NULL && !nonconform) { \ + TT_FAIL(("Expected error parsing \"%s\"",s)); \ + } else if (uri == NULL && nonconform) { \ + TT_FAIL(("Couldn't parse nonconformant URI \"%s\"", \ + s)); \ + } \ + if (uri) { \ + tt_want(evhttp_uri_join(uri, url_tmp, \ + sizeof(url_tmp))); \ + evhttp_uri_free(uri); \ + } \ + } while(0) + + NCF("http://www.test.com/ why hello"); + NCF("http://www.test.com/why-hello\x01"); + NCF("http://www.test.com/why-hello?\x01"); + NCF("http://www.test.com/why-hello#\x01"); BAD("http://www.\x01.test.com/why-hello"); BAD("http://www.%7test.com/why-hello"); - BAD("http://www.test.com/why-hell%7o"); + NCF("http://www.test.com/why-hell%7o"); BAD("h%3ttp://www.test.com/why-hello"); - BAD("http://www.test.com/why-hello%7"); - BAD("http://www.test.com/why-hell%7o"); - BAD("http://www.test.com/foo?ba%r"); - BAD("http://www.test.com/foo#ba%r"); + NCF("http://www.test.com/why-hello%7"); + NCF("http://www.test.com/why-hell%7o"); + NCF("http://www.test.com/foo?ba%r"); + NCF("http://www.test.com/foo#ba%r"); BAD("99:99/foo"); BAD("http://www.test.com:999x/"); BAD("http://www.test.com:x/"); @@ -2057,7 +2078,7 @@ http_parse_uri_test(void *ptr) tt_want(evhttp_uri_join(uri, NULL, sizeof(url_tmp))==NULL); tt_want(evhttp_uri_join(uri, url_tmp, 0)==NULL); evhttp_uri_free(uri); - uri = evhttp_uri_parse("mailto:foo@bar"); + uri = URI_PARSE("mailto:foo@bar"); tt_want(uri != NULL); tt_want(evhttp_uri_get_host(uri) == NULL); tt_want(evhttp_uri_get_userinfo(uri) == NULL); @@ -2109,7 +2130,7 @@ http_parse_uri_test(void *ptr) evhttp_uri_free(uri); /* Valid parsing */ - uri = evhttp_uri_parse("http://www.test.com/?q=t%33est"); + uri = URI_PARSE("http://www.test.com/?q=t%33est"); tt_want(strcmp(evhttp_uri_get_scheme(uri), "http") == 0); tt_want(strcmp(evhttp_uri_get_host(uri), "www.test.com") == 0); tt_want(strcmp(evhttp_uri_get_path(uri), "/") == 0); @@ -2120,7 +2141,7 @@ http_parse_uri_test(void *ptr) TT_URI("http://www.test.com/?q=t%33est"); evhttp_uri_free(uri); - uri = evhttp_uri_parse("http://%77ww.test.com"); + uri = URI_PARSE("http://%77ww.test.com"); tt_want(strcmp(evhttp_uri_get_scheme(uri), "http") == 0); tt_want(strcmp(evhttp_uri_get_host(uri), "%77ww.test.com") == 0); tt_want(strcmp(evhttp_uri_get_path(uri), "") == 0); @@ -2131,7 +2152,7 @@ http_parse_uri_test(void *ptr) TT_URI("http://%77ww.test.com"); evhttp_uri_free(uri); - uri = evhttp_uri_parse("http://www.test.com?q=test"); + uri = URI_PARSE("http://www.test.com?q=test"); tt_want(strcmp(evhttp_uri_get_scheme(uri), "http") == 0); tt_want(strcmp(evhttp_uri_get_host(uri), "www.test.com") == 0); tt_want(strcmp(evhttp_uri_get_path(uri), "") == 0); @@ -2142,7 +2163,7 @@ http_parse_uri_test(void *ptr) TT_URI("http://www.test.com?q=test"); evhttp_uri_free(uri); - uri = evhttp_uri_parse("http://www.test.com#fragment"); + uri = URI_PARSE("http://www.test.com#fragment"); tt_want(strcmp(evhttp_uri_get_scheme(uri), "http") == 0); tt_want(strcmp(evhttp_uri_get_host(uri), "www.test.com") == 0); tt_want(strcmp(evhttp_uri_get_path(uri), "") == 0); @@ -2153,7 +2174,7 @@ http_parse_uri_test(void *ptr) TT_URI("http://www.test.com#fragment"); evhttp_uri_free(uri); - uri = evhttp_uri_parse("http://8000/"); + uri = URI_PARSE("http://8000/"); tt_want(strcmp(evhttp_uri_get_scheme(uri), "http") == 0); tt_want(strcmp(evhttp_uri_get_host(uri), "8000") == 0); tt_want(strcmp(evhttp_uri_get_path(uri), "/") == 0); @@ -2164,7 +2185,7 @@ http_parse_uri_test(void *ptr) TT_URI("http://8000/"); evhttp_uri_free(uri); - uri = evhttp_uri_parse("http://:8000/"); + uri = URI_PARSE("http://:8000/"); tt_want(strcmp(evhttp_uri_get_scheme(uri), "http") == 0); tt_want(strcmp(evhttp_uri_get_host(uri), "") == 0); tt_want(strcmp(evhttp_uri_get_path(uri), "/") == 0); @@ -2175,7 +2196,7 @@ http_parse_uri_test(void *ptr) TT_URI("http://:8000/"); evhttp_uri_free(uri); - uri = evhttp_uri_parse("http://www.test.com:/"); /* empty port */ + uri = URI_PARSE("http://www.test.com:/"); /* empty port */ tt_want(strcmp(evhttp_uri_get_scheme(uri), "http") == 0); tt_want(strcmp(evhttp_uri_get_host(uri), "www.test.com") == 0); tt_want_str_op(evhttp_uri_get_path(uri), ==, "/"); @@ -2186,7 +2207,7 @@ http_parse_uri_test(void *ptr) TT_URI("http://www.test.com/"); evhttp_uri_free(uri); - uri = evhttp_uri_parse("http://www.test.com:"); /* empty port 2 */ + uri = URI_PARSE("http://www.test.com:"); /* empty port 2 */ tt_want(strcmp(evhttp_uri_get_scheme(uri), "http") == 0); tt_want(strcmp(evhttp_uri_get_host(uri), "www.test.com") == 0); tt_want(strcmp(evhttp_uri_get_path(uri), "") == 0); @@ -2197,7 +2218,7 @@ http_parse_uri_test(void *ptr) TT_URI("http://www.test.com"); evhttp_uri_free(uri); - uri = evhttp_uri_parse("ftp://www.test.com/?q=test"); + uri = URI_PARSE("ftp://www.test.com/?q=test"); tt_want(strcmp(evhttp_uri_get_scheme(uri), "ftp") == 0); tt_want(strcmp(evhttp_uri_get_host(uri), "www.test.com") == 0); tt_want(strcmp(evhttp_uri_get_path(uri), "/") == 0); @@ -2208,7 +2229,7 @@ http_parse_uri_test(void *ptr) TT_URI("ftp://www.test.com/?q=test"); evhttp_uri_free(uri); - uri = evhttp_uri_parse("ftp://[::1]:999/?q=test"); + uri = URI_PARSE("ftp://[::1]:999/?q=test"); tt_want(strcmp(evhttp_uri_get_scheme(uri), "ftp") == 0); tt_want(strcmp(evhttp_uri_get_host(uri), "[::1]") == 0); tt_want(strcmp(evhttp_uri_get_path(uri), "/") == 0); @@ -2219,7 +2240,7 @@ http_parse_uri_test(void *ptr) TT_URI("ftp://[::1]:999/?q=test"); evhttp_uri_free(uri); - uri = evhttp_uri_parse("ftp://[ff00::127.0.0.1]/?q=test"); + uri = URI_PARSE("ftp://[ff00::127.0.0.1]/?q=test"); tt_want(strcmp(evhttp_uri_get_scheme(uri), "ftp") == 0); tt_want(strcmp(evhttp_uri_get_host(uri), "[ff00::127.0.0.1]") == 0); tt_want(strcmp(evhttp_uri_get_path(uri), "/") == 0); @@ -2230,7 +2251,7 @@ http_parse_uri_test(void *ptr) TT_URI("ftp://[ff00::127.0.0.1]/?q=test"); evhttp_uri_free(uri); - uri = evhttp_uri_parse("ftp://[v99.not_(any:time)_soon]/?q=test"); + uri = URI_PARSE("ftp://[v99.not_(any:time)_soon]/?q=test"); tt_want(strcmp(evhttp_uri_get_scheme(uri), "ftp") == 0); tt_want(strcmp(evhttp_uri_get_host(uri), "[v99.not_(any:time)_soon]") == 0); tt_want(strcmp(evhttp_uri_get_path(uri), "/") == 0); @@ -2241,7 +2262,7 @@ http_parse_uri_test(void *ptr) TT_URI("ftp://[v99.not_(any:time)_soon]/?q=test"); evhttp_uri_free(uri); - uri = evhttp_uri_parse("scheme://user:pass@foo.com:42/?q=test&s=some+thing#fragment"); + uri = URI_PARSE("scheme://user:pass@foo.com:42/?q=test&s=some+thing#fragment"); tt_want(strcmp(evhttp_uri_get_scheme(uri), "scheme") == 0); tt_want(strcmp(evhttp_uri_get_userinfo(uri), "user:pass") == 0); tt_want(strcmp(evhttp_uri_get_host(uri), "foo.com") == 0); @@ -2252,7 +2273,7 @@ http_parse_uri_test(void *ptr) TT_URI("scheme://user:pass@foo.com:42/?q=test&s=some+thing#fragment"); evhttp_uri_free(uri); - uri = evhttp_uri_parse("scheme://user@foo.com/#fragment"); + uri = URI_PARSE("scheme://user@foo.com/#fragment"); tt_want(strcmp(evhttp_uri_get_scheme(uri), "scheme") == 0); tt_want(strcmp(evhttp_uri_get_userinfo(uri), "user") == 0); tt_want(strcmp(evhttp_uri_get_host(uri), "foo.com") == 0); @@ -2263,7 +2284,7 @@ http_parse_uri_test(void *ptr) TT_URI("scheme://user@foo.com/#fragment"); evhttp_uri_free(uri); - uri = evhttp_uri_parse("scheme://%75ser@foo.com/#frag@ment"); + uri = URI_PARSE("scheme://%75ser@foo.com/#frag@ment"); tt_want(strcmp(evhttp_uri_get_scheme(uri), "scheme") == 0); tt_want(strcmp(evhttp_uri_get_userinfo(uri), "%75ser") == 0); tt_want(strcmp(evhttp_uri_get_host(uri), "foo.com") == 0); @@ -2274,7 +2295,7 @@ http_parse_uri_test(void *ptr) TT_URI("scheme://%75ser@foo.com/#frag@ment"); evhttp_uri_free(uri); - uri = evhttp_uri_parse("file:///some/path/to/the/file"); + uri = URI_PARSE("file:///some/path/to/the/file"); tt_want(strcmp(evhttp_uri_get_scheme(uri), "file") == 0); tt_want(evhttp_uri_get_userinfo(uri) == NULL); tt_want(strcmp(evhttp_uri_get_host(uri), "") == 0); @@ -2285,7 +2306,7 @@ http_parse_uri_test(void *ptr) TT_URI("file:///some/path/to/the/file"); evhttp_uri_free(uri); - uri = evhttp_uri_parse("///some/path/to/the-file"); + uri = URI_PARSE("///some/path/to/the-file"); tt_want(uri != NULL); tt_want(evhttp_uri_get_scheme(uri) == NULL); tt_want(evhttp_uri_get_userinfo(uri) == NULL); @@ -2297,7 +2318,7 @@ http_parse_uri_test(void *ptr) TT_URI("///some/path/to/the-file"); evhttp_uri_free(uri); - uri = evhttp_uri_parse("/s:ome/path/to/the-file?q=99#fred"); + uri = URI_PARSE("/s:ome/path/to/the-file?q=99#fred"); tt_want(uri != NULL); tt_want(evhttp_uri_get_scheme(uri) == NULL); tt_want(evhttp_uri_get_userinfo(uri) == NULL); @@ -2309,7 +2330,7 @@ http_parse_uri_test(void *ptr) TT_URI("/s:ome/path/to/the-file?q=99#fred"); evhttp_uri_free(uri); - uri = evhttp_uri_parse("relative/path/with/co:lon"); + uri = URI_PARSE("relative/path/with/co:lon"); tt_want(uri != NULL); tt_want(evhttp_uri_get_scheme(uri) == NULL); tt_want(evhttp_uri_get_userinfo(uri) == NULL); @@ -2321,7 +2342,7 @@ http_parse_uri_test(void *ptr) TT_URI("relative/path/with/co:lon"); evhttp_uri_free(uri); - uri = evhttp_uri_parse("bob?q=99&q2=q?33#fr?ed"); + uri = URI_PARSE("bob?q=99&q2=q?33#fr?ed"); tt_want(uri != NULL); tt_want(evhttp_uri_get_scheme(uri) == NULL); tt_want(evhttp_uri_get_userinfo(uri) == NULL); @@ -2333,7 +2354,7 @@ http_parse_uri_test(void *ptr) TT_URI("bob?q=99&q2=q?33#fr?ed"); evhttp_uri_free(uri); - uri = evhttp_uri_parse("#fr?ed"); + uri = URI_PARSE("#fr?ed"); tt_want(uri != NULL); tt_want(evhttp_uri_get_scheme(uri) == NULL); tt_want(evhttp_uri_get_userinfo(uri) == NULL); @@ -2344,6 +2365,9 @@ http_parse_uri_test(void *ptr) tt_want(strcmp(evhttp_uri_get_fragment(uri), "fr?ed") == 0); TT_URI("#fr?ed"); evhttp_uri_free(uri); +#undef URI_PARSE +#undef TT_URI +#undef BAD } static void @@ -3489,6 +3513,7 @@ struct testcase_t http_testcases[] = { { "bad_headers", http_bad_header_test, 0, NULL, NULL }, { "parse_query", http_parse_query_test, 0, NULL, NULL }, { "parse_uri", http_parse_uri_test, 0, NULL, NULL }, + { "parse_uri_nc", http_parse_uri_test, 0, &basic_setup, (void*)"nc" }, { "uriencode", http_uriencode_test, 0, NULL, NULL }, HTTP(basic), HTTP(cancel),