From: Senthil Kumaran Date: Mon, 22 Nov 2010 05:04:33 +0000 (+0000) Subject: Merged revisions 86676 via svnmerge from X-Git-Tag: v2.7.1~41 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=0b7cac11bfd3061ac78c25e5ae118faf16eb9924;p=python Merged revisions 86676 via svnmerge from svn+ssh://pythondev@svn.python.org/python/branches/py3k ........ r86676 | senthil.kumaran | 2010-11-22 12:48:26 +0800 (Mon, 22 Nov 2010) | 4 lines Fix Issue4493 - urllib2 adds '/' to the path component of url, when it does not starts with one. This behavior is exhibited by browser and other clients. ........ --- diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 46db6b4257..b74320d2ac 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -838,6 +838,25 @@ class HandlerTests(unittest.TestCase): p_ds_req = h.do_request_(ds_req) self.assertEqual(p_ds_req.unredirected_hdrs["Host"],"example.com") + def test_fixpath_in_weirdurls(self): + # Issue4493: urllib2 to supply '/' when to urls where path does not + # start with'/' + + h = urllib2.AbstractHTTPHandler() + o = h.parent = MockOpener() + + weird_url = 'http://www.python.org?getspam' + req = Request(weird_url) + newreq = h.do_request_(req) + self.assertEqual(newreq.get_host(),'www.python.org') + self.assertEqual(newreq.get_selector(),'/?getspam') + + url_without_path = 'http://www.python.org' + req = Request(url_without_path) + newreq = h.do_request_(req) + self.assertEqual(newreq.get_host(),'www.python.org') + self.assertEqual(newreq.get_selector(),'') + def test_errors(self): h = urllib2.HTTPErrorProcessor() o = h.parent = MockOpener() diff --git a/Lib/urllib.py b/Lib/urllib.py index d85dedbb64..1553f9dcb2 100644 --- a/Lib/urllib.py +++ b/Lib/urllib.py @@ -1052,7 +1052,12 @@ def splithost(url): _hostprog = re.compile('^//([^/?]*)(.*)$') match = _hostprog.match(url) - if match: return match.group(1, 2) + if match: + host_port = match.group(1) + path = match.group(2) + if path and not path.startswith('/'): + path = '/' + path + return host_port, path return None, url _userprog = None diff --git a/Misc/NEWS b/Misc/NEWS index 315b55694b..cb9e489716 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -13,6 +13,9 @@ Core and Builtins Library ------- +- Issue #4493: urllib2 adds '/' in front of path components which does not + start with '/. Common behavior exhibited by browsers and other clients. + - Issue #6378: idle.bat now runs with the appropriate Python version rather than the system default. Patch by Sridhar Ratnakumar.