From 74c76c8f06c60698a23289abc48d23b9b4ea6c27 Mon Sep 17 00:00:00 2001 From: Martin Panter Date: Sat, 3 Oct 2015 05:55:46 +0000 Subject: [PATCH] Issue #24657: Prevent CGIRequestHandler from collapsing the URL query Initial patch from Xiang Zhang. Also fix out-of-date _url_collapse_path() doc string. --- Lib/CGIHTTPServer.py | 13 +++++++++---- Lib/test/test_httpservers.py | 7 +++++++ Misc/NEWS | 3 +++ 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/Lib/CGIHTTPServer.py b/Lib/CGIHTTPServer.py index 7a4593eeb6..5620083c69 100644 --- a/Lib/CGIHTTPServer.py +++ b/Lib/CGIHTTPServer.py @@ -84,7 +84,7 @@ class CGIHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler): path begins with one of the strings in self.cgi_directories (and the next character is a '/' or the end of the string). """ - collapsed_path = _url_collapse_path(urllib.unquote(self.path)) + collapsed_path = _url_collapse_path(self.path) dir_sep = collapsed_path.find('/', 1) head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:] if head in self.cgi_directories: @@ -304,13 +304,15 @@ def _url_collapse_path(path): The utility of this function is limited to is_cgi method and helps preventing some security attacks. - Returns: A tuple of (head, tail) where tail is everything after the final / - and head is everything before it. Head will always start with a '/' and, - if it contains anything else, never have a trailing '/'. + Returns: The reconstituted URL, which will always start with a '/'. Raises: IndexError if too many '..' occur within the path. """ + # Query component should not be involved. + path, _, query = path.partition('?') + path = urllib.unquote(path) + # Similar to os.path.split(os.path.normpath(path)) but specific to URL # path semantics rather than local operating system semantics. path_parts = path.split('/') @@ -331,6 +333,9 @@ def _url_collapse_path(path): else: tail_part = '' + if query: + tail_part = '?'.join((tail_part, query)) + splitpath = ('/' + '/'.join(head_parts), tail_part) collapsed_path = "/".join(splitpath) diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 023180e624..c84f48fea4 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -558,6 +558,13 @@ class CGIHTTPServerTestCase(BaseTestCase): (b'a=b?c=d\n', 'text/html', 200), (res.read(), res.getheader('Content-type'), res.status)) + def test_query_with_continuous_slashes(self): + res = self.request('/cgi-bin/file4.py?k=aa%2F%2Fbb&//q//p//=//a//b//') + self.assertEqual( + (b'k=aa%2F%2Fbb&//q//p//=//a//b//\n', + 'text/html', 200), + (res.read(), res.getheader('Content-type'), res.status)) + class SimpleHTTPRequestHandlerTestCase(unittest.TestCase): """ Test url parsing """ diff --git a/Misc/NEWS b/Misc/NEWS index f6e29b6ff2..c6904a8459 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -46,6 +46,9 @@ Library - Issue #25232: Fix CGIRequestHandler to split the query from the URL at the first question mark (?) rather than the last. Patch from Xiang Zhang. +- Issue #24657: Prevent CGIRequestHandler from collapsing slashes in the + query part of the URL as if it were a path. Patch from Xiang Zhang. + - Issue #22958: Constructor and update method of weakref.WeakValueDictionary now accept the self keyword argument. -- 2.50.1