bpo-32498: urllib.parse.unquote also accepts bytes (GH-7768)

author Stein Karlsen <karlsen.stein@gmail.com>

Mon, 14 Oct 2019 10:36:29 +0000 (12:36 +0200)

committer Tal Einat <taleinat+github@gmail.com>

Mon, 14 Oct 2019 10:36:29 +0000 (13:36 +0300)
author Stein Karlsen <karlsen.stein@gmail.com>
Mon, 14 Oct 2019 10:36:29 +0000 (12:36 +0200)
committer Tal Einat <taleinat+github@gmail.com>
Mon, 14 Oct 2019 10:36:29 +0000 (13:36 +0300)
diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst

index 49276daa7ff43f292ff7f6521a8cedfc9bf24955..84d289bc4415c8c7c925e4d04f187dcb34d0b6ea 100644 (file)
--- a/Doc/library/urllib.parse.rst
+++ b/Doc/library/urllib.parse.rst
@@ -571,7 +571,7 @@ task isn't already covered by the URL parsing functions above.
     percent-encoded sequences into Unicode characters, as accepted by the
     :meth:`bytes.decode` method.
  
-   *string* must be a :class:`str`.
+   *string* may be either a :class:`str` or a :class:`bytes`.
  
     *encoding* defaults to ``'utf-8'``.
     *errors* defaults to ``'replace'``, meaning invalid sequences are replaced
@@ -579,6 +579,11 @@ task isn't already covered by the URL parsing functions above.
  
     Example: ``unquote('/El%20Ni%C3%B1o/')`` yields ``'/El Niño/'``.
  
+   .. versionchanged:: 3.9
+      *string* parameter supports bytes and str objects (previously only str).
+
+
+
  
  .. function:: unquote_plus(string, encoding='utf-8', errors='replace')
  
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py

index 9a6b5f66b7a13258e8d7fbc6a151da1d2d7adbbd..3f59c660845938f62fc46e8567dc195b50cdbe11 100644 (file)
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -1049,8 +1049,6 @@ class UnquotingTests(unittest.TestCase):
                           "%s" % result)
          self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
          self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
-        with support.check_warnings(('', BytesWarning), quiet=True):
-            self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
  
      def test_unquoting_badpercent(self):
          # Test unquoting on bad percent-escapes
@@ -1210,6 +1208,29 @@ class UnquotingTests(unittest.TestCase):
          self.assertEqual(expect, result,
                           "using unquote(): %r != %r" % (expect, result))
  
+    def test_unquoting_with_bytes_input(self):
+        # ASCII characters decoded to a string
+        given = b'blueberryjam'
+        expect = 'blueberryjam'
+        result = urllib.parse.unquote(given)
+        self.assertEqual(expect, result,
+                         "using unquote(): %r != %r" % (expect, result))
+
+        # A mix of non-ASCII hex-encoded characters and ASCII characters
+        given = b'bl\xc3\xa5b\xc3\xa6rsyltet\xc3\xb8y'
+        expect = 'bl\u00e5b\u00e6rsyltet\u00f8y'
+        result = urllib.parse.unquote(given)
+        self.assertEqual(expect, result,
+                         "using unquote(): %r != %r" % (expect, result))
+
+        # A mix of non-ASCII percent-encoded characters and ASCII characters
+        given = b'bl%c3%a5b%c3%a6rsyltet%c3%b8j'
+        expect = 'bl\u00e5b\u00e6rsyltet\u00f8j'
+        result = urllib.parse.unquote(given)
+        self.assertEqual(expect, result,
+                         "using unquote(): %r != %r" % (expect, result))
+
+
  class urlencode_Tests(unittest.TestCase):
      """Tests for urlencode()"""
  
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py

index b6608783a894712125c3788338758292a1a0e22b..3a38dc14c9047d9bd4c9274e8bb098a579beed58 100644 (file)
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -631,6 +631,8 @@ def unquote(string, encoding='utf-8', errors='replace'):
  
      unquote('abc%20def') -> 'abc def'.
      """
+    if isinstance(string, bytes):
+        return unquote_to_bytes(string).decode(encoding, errors)
      if '%' not in string:
          string.split
          return string
diff --git a/Misc/NEWS.d/next/Library/2018-06-17-21-02-25.bpo-32498.La3TZz.rst b/Misc/NEWS.d/next/Library/2018-06-17-21-02-25.bpo-32498.La3TZz.rst

new file mode 100644 (file)

index 0000000..9df9e65
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2018-06-17-21-02-25.bpo-32498.La3TZz.rst
@@ -0,0 +1,2 @@
+Made :func:`urllib.parse.unquote()` accept bytes in addition to strings.
+Patch by Stein Karlsen.
author	Stein Karlsen <karlsen.stein@gmail.com>
	Mon, 14 Oct 2019 10:36:29 +0000 (12:36 +0200)
committer	Tal Einat <taleinat+github@gmail.com>
	Mon, 14 Oct 2019 10:36:29 +0000 (13:36 +0300)
Doc/library/urllib.parse.rst		patch \| blob \| history
Lib/test/test_urllib.py		patch \| blob \| history
Lib/urllib/parse.py		patch \| blob \| history
Misc/NEWS.d/next/Library/2018-06-17-21-02-25.bpo-32498.La3TZz.rst	[new file with mode: 0644]	patch \| blob