Fix for Issue8135 - urllib.unquote to support mixed percent escapes

author Senthil Kumaran <orsenthil@gmail.com>

Thu, 18 Mar 2010 12:14:15 +0000 (12:14 +0000)

committer Senthil Kumaran <orsenthil@gmail.com>

Thu, 18 Mar 2010 12:14:15 +0000 (12:14 +0000)
author Senthil Kumaran <orsenthil@gmail.com>
Thu, 18 Mar 2010 12:14:15 +0000 (12:14 +0000)
committer Senthil Kumaran <orsenthil@gmail.com>
Thu, 18 Mar 2010 12:14:15 +0000 (12:14 +0000)
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py

index 1cf8e4487b1ca2a69f1843a96dc4507a56c16a14..9bd8857371c63ef5f13f2465d607b2dc44fb16df 100644 (file)
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -439,6 +439,32 @@ class UnquotingTests(unittest.TestCase):
                           "using unquote(): not all characters escaped: "
                           "%s" % result)
  
+    def test_unquoting_badpercent(self):
+        # Test unquoting on bad percent-escapes
+        given = '%xab'
+        expect = given
+        result = urllib.unquote(given)
+        self.assertEqual(expect, result, "using unquote(): %r != %r"
+                         % (expect, result))
+        given = '%x'
+        expect = given
+        result = urllib.unquote(given)
+        self.assertEqual(expect, result, "using unquote(): %r != %r"
+                         % (expect, result))
+        given = '%'
+        expect = given
+        result = urllib.unquote(given)
+        self.assertEqual(expect, result, "using unquote(): %r != %r"
+                         % (expect, result))
+
+    def test_unquoting_mixed_case(self):
+        # Test unquoting on mixed-case hex digits in the percent-escapes
+        given = '%Ab%eA'
+        expect = '\xab\xea'
+        result = urllib.unquote(given)
+        self.assertEqual(expect, result, "using unquote(): %r != %r"
+                         % (expect, result))
+
      def test_unquoting_parts(self):
          # Make sure unquoting works when have non-quoted characters
          # interspersed
diff --git a/Lib/urllib.py b/Lib/urllib.py

index 58e750a5e20b8c5b0cc3c07adafae41ba6f29d13..c74dd3c0c81f8534ba6eb7ff5af3ba587d9bf24f 100644 (file)
--- a/Lib/urllib.py
+++ b/Lib/urllib.py
@@ -1158,8 +1158,8 @@ def splitvalue(attr):
      if match: return match.group(1, 2)
      return attr, None
  
-_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
-_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
+_hexdig = '0123456789ABCDEFabcdef'
+_hextochr = dict((a+b, chr(int(a+b,16))) for a in _hexdig for b in _hexdig)
  
  def unquote(s):
      """unquote('abc%20def') -> 'abc def'."""
diff --git a/Lib/urlparse.py b/Lib/urlparse.py

index 1d065d3986e8d445c008697bcec67f40c1c4fa5a..f6e20afdb8bdb31996cd78dc8877344cccf24581 100644 (file)
--- a/Lib/urlparse.py
+++ b/Lib/urlparse.py
@@ -272,8 +272,9 @@ def urldefrag(url):
  # Cannot use directly from urllib as it would create circular reference.
  # urllib uses urlparse methods ( urljoin)
  
-_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
-_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
+
+_hexdig = '0123456789ABCDEFabcdef'
+_hextochr = dict((a+b, chr(int(a+b,16))) for a in _hexdig for b in _hexdig)
  
  def unquote(s):
      """unquote('abc%20def') -> 'abc def'."""
author	Senthil Kumaran <orsenthil@gmail.com>
	Thu, 18 Mar 2010 12:14:15 +0000 (12:14 +0000)
committer	Senthil Kumaran <orsenthil@gmail.com>
	Thu, 18 Mar 2010 12:14:15 +0000 (12:14 +0000)
Lib/test/test_urllib.py		patch \| blob \| history
Lib/urllib.py		patch \| blob \| history
Lib/urlparse.py		patch \| blob \| history