From 74c76c8f06c60698a23289abc48d23b9b4ea6c27 Mon Sep 17 00:00:00 2001
From: Martin Panter <vadmium+py@gmail.com>
Date: Sat, 3 Oct 2015 05:55:46 +0000
Subject: [PATCH] Issue #24657: Prevent CGIRequestHandler from collapsing the
 URL query

Initial patch from Xiang Zhang. Also fix out-of-date _url_collapse_path() doc
string.
---
 Lib/CGIHTTPServer.py         | 13 +++++++++----
 Lib/test/test_httpservers.py |  7 +++++++
 Misc/NEWS                    |  3 +++
 3 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/Lib/CGIHTTPServer.py b/Lib/CGIHTTPServer.py
index 7a4593eeb6..5620083c69 100644
--- a/Lib/CGIHTTPServer.py
+++ b/Lib/CGIHTTPServer.py
@@ -84,7 +84,7 @@ class CGIHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
         path begins with one of the strings in self.cgi_directories
         (and the next character is a '/' or the end of the string).
         """
-        collapsed_path = _url_collapse_path(urllib.unquote(self.path))
+        collapsed_path = _url_collapse_path(self.path)
         dir_sep = collapsed_path.find('/', 1)
         head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]
         if head in self.cgi_directories:
@@ -304,13 +304,15 @@ def _url_collapse_path(path):
     The utility of this function is limited to is_cgi method and helps
     preventing some security attacks.
 
-    Returns: A tuple of (head, tail) where tail is everything after the final /
-    and head is everything before it.  Head will always start with a '/' and,
-    if it contains anything else, never have a trailing '/'.
+    Returns: The reconstituted URL, which will always start with a '/'.
 
     Raises: IndexError if too many '..' occur within the path.
 
     """
+    # Query component should not be involved.
+    path, _, query = path.partition('?')
+    path = urllib.unquote(path)
+
     # Similar to os.path.split(os.path.normpath(path)) but specific to URL
     # path semantics rather than local operating system semantics.
     path_parts = path.split('/')
@@ -331,6 +333,9 @@ def _url_collapse_path(path):
     else:
         tail_part = ''
 
+    if query:
+        tail_part = '?'.join((tail_part, query))
+
     splitpath = ('/' + '/'.join(head_parts), tail_part)
     collapsed_path = "/".join(splitpath)
 
diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py
index 023180e624..c84f48fea4 100644
--- a/Lib/test/test_httpservers.py
+++ b/Lib/test/test_httpservers.py
@@ -558,6 +558,13 @@ class CGIHTTPServerTestCase(BaseTestCase):
             (b'a=b?c=d\n', 'text/html', 200),
             (res.read(), res.getheader('Content-type'), res.status))
 
+    def test_query_with_continuous_slashes(self):
+        res = self.request('/cgi-bin/file4.py?k=aa%2F%2Fbb&//q//p//=//a//b//')
+        self.assertEqual(
+            (b'k=aa%2F%2Fbb&//q//p//=//a//b//\n',
+             'text/html', 200),
+            (res.read(), res.getheader('Content-type'), res.status))
+
 
 class SimpleHTTPRequestHandlerTestCase(unittest.TestCase):
     """ Test url parsing """
diff --git a/Misc/NEWS b/Misc/NEWS
index f6e29b6ff2..c6904a8459 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -46,6 +46,9 @@ Library
 - Issue #25232: Fix CGIRequestHandler to split the query from the URL at the
   first question mark (?) rather than the last. Patch from Xiang Zhang.
 
+- Issue #24657: Prevent CGIRequestHandler from collapsing slashes in the
+  query part of the URL as if it were a path. Patch from Xiang Zhang.
+
 - Issue #22958: Constructor and update method of weakref.WeakValueDictionary
   now accept the self keyword argument.
 
-- 
2.50.1