From 26430419703f925a9b6206ec96780ae899b6dd06 Mon Sep 17 00:00:00 2001 From: Senthil Kumaran Date: Wed, 13 Apr 2011 07:01:19 +0800 Subject: [PATCH] Fix Issue11703 - urllib2.geturl() does not return correct url when the original url contains #fragment. Patch Contribution by Santoso Wijaya. --- Lib/test/test_urllib.py | 10 ++++++++++ Lib/test/test_urllib2.py | 15 ++++++++++++++- Lib/test/test_urllib2net.py | 2 +- Lib/urllib/request.py | 9 ++++++--- 4 files changed, 31 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 2b8852127b..462a2b03f8 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -171,6 +171,16 @@ class urlopen_HttpTests(unittest.TestCase): finally: self.unfakehttp() + def test_url_fragment(self): + # Issue #11703: geturl() omits fragments in the original URL. + url = 'http://docs.python.org/library/urllib.html#OK' + self.fakehttp(b'Hello!') + try: + fp = urllib.request.urlopen(url) + self.assertEqual(fp.geturl(), url) + finally: + self.unfakehttp() + def test_read_bogus(self): # urlopen() should raise IOError for many error codes. self.fakehttp(b'''HTTP/1.1 401 Authentication Required diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 3fd7baa8dd..62226b834c 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -1024,6 +1024,15 @@ class HandlerTests(unittest.TestCase): o.open("http://www.example.com/") self.assertFalse(hh.req.has_header("Cookie")) + def test_redirect_fragment(self): + redirected_url = 'http://www.example.com/index.html#OK\r\n\r\n' + hh = MockHTTPHandler(302, 'Location: ' + redirected_url) + hdeh = urllib.request.HTTPDefaultErrorHandler() + hrh = urllib.request.HTTPRedirectHandler() + o = build_test_opener(hh, hdeh, hrh) + fp = o.open('http://www.example.com') + self.assertEqual(fp.geturl(), redirected_url.strip()) + def test_proxy(self): o = OpenerDirector() ph = urllib.request.ProxyHandler(dict(http="proxy.example.com:3128")) @@ -1339,12 +1348,16 @@ class RequestTests(unittest.TestCase): req = Request("") self.assertEqual("www.python.org", req.get_host()) - def test_urlwith_fragment(self): + def test_url_fragment(self): req = Request("http://www.python.org/?qs=query#fragment=true") self.assertEqual("/?qs=query", req.get_selector()) req = Request("http://www.python.org/#fun=true") self.assertEqual("/", req.get_selector()) + # Issue 11703: geturl() omits fragment in the original URL. + url = 'http://docs.python.org/library/urllib2.html#OK' + req = Request(url) + self.assertEqual(req.get_full_url(), url) def test_main(verbose=None): from test import test_urllib2 diff --git a/Lib/test/test_urllib2net.py b/Lib/test/test_urllib2net.py index 63e25b4b69..e6c4ec1727 100644 --- a/Lib/test/test_urllib2net.py +++ b/Lib/test/test_urllib2net.py @@ -158,7 +158,7 @@ class OtherNetworkTests(unittest.TestCase): req = urllib.request.Request(urlwith_frag) res = urllib.request.urlopen(req) self.assertEqual(res.geturl(), - "http://docs.python.org/glossary.html") + "http://docs.python.org/glossary.html#glossary") def test_custom_headers(self): url = "http://www.example.com" diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 220dfe4b19..6b29901864 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -163,7 +163,7 @@ class Request: origin_req_host=None, unverifiable=False): # unwrap('') --> 'type://host/path' self.full_url = unwrap(url) - self.full_url, fragment = splittag(self.full_url) + self.full_url, self.fragment = splittag(self.full_url) self.data = data self.headers = {} self._tunnel_host = None @@ -202,7 +202,10 @@ class Request: return self.data def get_full_url(self): - return self.full_url + if self.fragment: + return '%s#%s' % (self.full_url, self.fragment) + else: + return self.full_url def get_type(self): return self.type @@ -1106,7 +1109,7 @@ class AbstractHTTPHandler(BaseHandler): except socket.error as err: raise URLError(err) - r.url = req.full_url + r.url = req.get_full_url() # This line replaces the .msg attribute of the HTTPResponse # with .headers, because urllib clients expect the response to # have the reason in .msg. It would be good to mark this -- 2.40.0