]> granicus.if.org Git - python/commitdiff
Fix Issue4493 - urllib2 adds '/' to the path component of url, when it does not
authorSenthil Kumaran <orsenthil@gmail.com>
Mon, 22 Nov 2010 04:48:26 +0000 (04:48 +0000)
committerSenthil Kumaran <orsenthil@gmail.com>
Mon, 22 Nov 2010 04:48:26 +0000 (04:48 +0000)
starts with one. This behavior is exhibited by browser and other clients.

Lib/test/test_urllib2.py
Lib/urllib/parse.py
Lib/urllib/request.py
Misc/NEWS

index e5e3c39adb5801bfac0b0604f8e8e5b8689cdf96..9cc96971fbb0e3d1d4a42e7711fdab410e126d47 100644 (file)
@@ -848,6 +848,25 @@ class HandlerTests(unittest.TestCase):
             p_ds_req = h.do_request_(ds_req)
             self.assertEqual(p_ds_req.unredirected_hdrs["Host"],"example.com")
 
+    def test_fixpath_in_weirdurls(self):
+        # Issue4493: urllib2 to supply '/' when to urls where path does not
+        # start with'/'
+
+        h = urllib.request.AbstractHTTPHandler()
+        o = h.parent = MockOpener()
+
+        weird_url = 'http://www.python.org?getspam'
+        req = Request(weird_url)
+        newreq = h.do_request_(req)
+        self.assertEqual(newreq.host,'www.python.org')
+        self.assertEqual(newreq.selector,'/?getspam')
+
+        url_without_path = 'http://www.python.org'
+        req = Request(url_without_path)
+        newreq = h.do_request_(req)
+        self.assertEqual(newreq.host,'www.python.org')
+        self.assertEqual(newreq.selector,'')
+
 
     def test_errors(self):
         h = urllib.request.HTTPErrorProcessor()
index 2ddd28163a9802de9d5232aee1405868e0f5074a..78f30843b7683dcc1daf56dedacc4fc6e3807f16 100644 (file)
@@ -699,7 +699,12 @@ def splithost(url):
         _hostprog = re.compile('^//([^/?]*)(.*)$')
 
     match = _hostprog.match(url)
-    if match: return match.group(1, 2)
+    if match:
+        host_port = match.group(1)
+        path = match.group(2)
+        if path and not path.startswith('/'):
+            path = '/' + path
+        return host_port, path
     return None, url
 
 _userprog = None
index f3fb7bed747a33c619c2926a3b3fcfe21bf55f07..fe66a678eb455d9b10bdab7eab8a1214467e1462 100644 (file)
@@ -105,7 +105,7 @@ from urllib.response import addinfourl, addclosehook
 # check for SSL
 try:
     import ssl
-except:
+except ImportError:
     _have_ssl = False
 else:
     _have_ssl = True
index b51a644e0028331260234446f22b543be4d77ac3..e10c85631ea173b7d8a84b3ba8340d0826058d29 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -32,6 +32,9 @@ Core and Builtins
 Library
 -------
 
+- Issue #4493: urllib2 adds '/' in front of path components which does not
+  start with '/. Common behavior exhibited by browsers and other clients.
+
 - Issue #6378: idle.bat now runs with the appropriate Python version rather than
   the system default. Patch by Sridhar Ratnakumar.