]> granicus.if.org Git - python/commitdiff
Fix Issue8280 - urllib2's Request method will remove fragements in the url.
authorSenthil Kumaran <orsenthil@gmail.com>
Sun, 8 Aug 2010 11:27:53 +0000 (11:27 +0000)
committerSenthil Kumaran <orsenthil@gmail.com>
Sun, 8 Aug 2010 11:27:53 +0000 (11:27 +0000)
This is how it should work,wget and curl work like this way too. Old behavior was wrong.

Lib/test/test_urllib2.py
Lib/test/test_urllib2net.py
Lib/urllib/request.py

index b2f7ea86f6d95cbf30771731d30f7d90fe34bcef..02dc83c542106937c0838b350f551dd144854de3 100644 (file)
@@ -1249,6 +1249,16 @@ class RequestTests(unittest.TestCase):
         self.assertEqual("www.python.org", self.get.get_origin_req_host())
         self.assertEqual("www.perl.org", self.get.get_host())
 
+    def test_wrapped_url(self):
+        req = Request("<URL:http://www.python.org>")
+        self.assertEqual("www.python.org", req.get_host())
+
+    def test_urlwith_fragment(self):
+        req = Request("http://www.python.org/?qs=query#fragment=true")
+        self.assertEqual("/?qs=query", req.get_selector())
+        req = Request("http://www.python.org/#fun=true")
+        self.assertEqual("/", req.get_selector())
+
 
 def test_main(verbose=None):
     from test import test_urllib2
index ff7c7bff64cb43341a1b4c9c6ae04f4894996267..8b9435ac60f1748c3b88fca35385de3df4d79c5d 100644 (file)
@@ -149,6 +149,13 @@ class OtherNetworkTests(unittest.TestCase):
 
 ##             self._test_urls(urls, self._extra_handlers()+[bauth, dauth])
 
+    def test_urlwithfrag(self):
+        urlwith_frag = "http://docs.python.org/glossary.html#glossary"
+        req = urllib.request.Request(urlwith_frag)
+        res = urllib.request.urlopen(req)
+        self.assertEqual(res.geturl(),
+                "http://docs.python.org/glossary.html")
+
     def _test_urls(self, urls, handlers, retry=True):
         import time
         import logging
index 012814c948b32363fb13d80f59efd96bdb3fe0bc..0a083b859d1a0c9d2617d6ca615288af4346969f 100644 (file)
@@ -99,7 +99,7 @@ from urllib.error import URLError, HTTPError, ContentTooShortError
 from urllib.parse import (
     urlparse, urlsplit, urljoin, unwrap, quote, unquote,
     splittype, splithost, splitport, splituser, splitpasswd,
-    splitattr, splitquery, splitvalue, to_bytes, urlunparse)
+    splitattr, splitquery, splitvalue, splittag, to_bytes, urlunparse)
 from urllib.response import addinfourl, addclosehook
 
 # check for SSL
@@ -163,6 +163,7 @@ class Request:
                  origin_req_host=None, unverifiable=False):
         # unwrap('<URL:type://host/path>') --> 'type://host/path'
         self.full_url = unwrap(url)
+        self.full_url, fragment = splittag(self.full_url)
         self.data = data
         self.headers = {}
         self._tunnel_host = None