Merged revisions 82510 via svnmerge from

author Senthil Kumaran <orsenthil@gmail.com>

Sat, 3 Jul 2010 17:55:41 +0000 (17:55 +0000)

committer Senthil Kumaran <orsenthil@gmail.com>

Sat, 3 Jul 2010 17:55:41 +0000 (17:55 +0000)
author Senthil Kumaran <orsenthil@gmail.com>
Sat, 3 Jul 2010 17:55:41 +0000 (17:55 +0000)
committer Senthil Kumaran <orsenthil@gmail.com>
Sat, 3 Jul 2010 17:55:41 +0000 (17:55 +0000)
diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst

index d9776be4bfb7b5d2aaec364171062c7f1027c1de..cfd995d4362aa121ada6668eedd8f1b906c41578 100644 (file)
--- a/Doc/library/urllib.parse.rst
+++ b/Doc/library/urllib.parse.rst
@@ -307,23 +307,29 @@ The :mod:`urllib.parse` module defines the following functions:
     ``b'a&\xef'``.
  
  
-.. function:: urlencode(query, doseq=False)
-
-   Convert a mapping object or a sequence of two-element tuples  to a
-   "url-encoded" string, suitable to pass to :func:`urlopen` above as the
-   optional *data* argument.  This is useful to pass a dictionary of form
-   fields to a ``POST`` request.  The resulting string is a series of
-   ``key=value`` pairs separated by ``'&'`` characters, where both *key* and
-   *value* are quoted using :func:`quote_plus` above. When a sequence of
-   two-element tuples is used as the *query* argument, the first element of
-   each tuple is a key and the second is a value. The value element in itself
-   can be a sequence and in that case, if the optional parameter *doseq* is
-   evaluates to *True*, individual ``key=value`` pairs separated by ``'&'``are
-   generated for each element of the value sequence for the key.  The order of
-   parameters in the encoded string will match the order of parameter tuples in
-   the sequence. This module provides the functions :func:`parse_qs` and
-   :func:`parse_qsl` which are used to parse query strings into Python data
-   structures.
+.. function:: urlencode(query, doseq=False, safe='', encoding=None, errors=None)
+
+   Convert a mapping object or a sequence of two-element, which may either be a
+   :class:`str` or a :class:`bytes` tuples,  to a "url-encoded" string,
+   suitable to pass to :func:`urlopen` above as the optional *data* argument.
+   This is useful to pass a dictionary of form fields to a ``POST`` request.
+   The resulting string is a series of ``key=value`` pairs separated by ``'&'``
+   characters, where both *key* and *value* are quoted using :func:`quote_plus`
+   above. When a sequence of two-element tuples is used as the *query*
+   argument, the first element of each tuple is a key and the second is a
+   value. The value element in itself can be a sequence and in that case, if
+   the optional parameter *doseq* is evaluates to *True*, individual
+   ``key=value`` pairs separated by ``'&'`` are generated for each element of
+   the value sequence for the key.  The order of parameters in the encoded
+   string will match the order of parameter tuples in the sequence. This module
+   provides the functions :func:`parse_qs` and :func:`parse_qsl` which are used
+   to parse query strings into Python data structures.
+
+   When *query* parameter is a :class:`str`, the *safe*, *encoding* and *error*
+   parameters are sent the :func:`quote_plus` for encoding.
+
+   .. versionchanged:: 3.2
+      query paramater supports bytes and string.
  
  
  .. seealso::
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py

index f4b3766375cdbeca8daeff50ef1a836f3cd5d4e7..acd55778248c6f91726db4655e483a65ae500151 100644 (file)
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -797,6 +797,116 @@ class urlencode_Tests(unittest.TestCase):
          self.assertEqual("a=a&a=b",
                           urllib.parse.urlencode({"a": {"a": 1, "b": 1}}, True))
  
+    def test_urlencode_encoding(self):
+        # ASCII encoding. Expect %3F with errors="replace'
+        given = (('\u00a0', '\u00c1'),)
+        expect = '%3F=%3F'
+        result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
+        self.assertEqual(expect, result)
+
+        # Default is UTF-8 encoding.
+        given = (('\u00a0', '\u00c1'),)
+        expect = '%C2%A0=%C3%81'
+        result = urllib.parse.urlencode(given)
+        self.assertEqual(expect, result)
+
+        # Latin-1 encoding.
+        given = (('\u00a0', '\u00c1'),)
+        expect = '%A0=%C1'
+        result = urllib.parse.urlencode(given, encoding="latin-1")
+        self.assertEqual(expect, result)
+
+    def test_urlencode_encoding_doseq(self):
+        # ASCII Encoding. Expect %3F with errors="replace'
+        given = (('\u00a0', '\u00c1'),)
+        expect = '%3F=%3F'
+        result = urllib.parse.urlencode(given, doseq=True,
+                                        encoding="ASCII", errors="replace")
+        self.assertEqual(expect, result)
+
+        # ASCII Encoding. On a sequence of values.
+        given = (("\u00a0", (1, "\u00c1")),)
+        expect = '%3F=1&%3F=%3F'
+        result = urllib.parse.urlencode(given, True,
+                                        encoding="ASCII", errors="replace")
+        self.assertEqual(expect, result)
+
+        # Utf-8
+        given = (("\u00a0", "\u00c1"),)
+        expect = '%C2%A0=%C3%81'
+        result = urllib.parse.urlencode(given, True)
+        self.assertEqual(expect, result)
+
+        given = (("\u00a0", (42, "\u00c1")),)
+        expect = '%C2%A0=42&%C2%A0=%C3%81'
+        result = urllib.parse.urlencode(given, True)
+        self.assertEqual(expect, result)
+
+        # latin-1
+        given = (("\u00a0", "\u00c1"),)
+        expect = '%A0=%C1'
+        result = urllib.parse.urlencode(given, True, encoding="latin-1")
+        self.assertEqual(expect, result)
+
+        given = (("\u00a0", (42, "\u00c1")),)
+        expect = '%A0=42&%A0=%C1'
+        result = urllib.parse.urlencode(given, True, encoding="latin-1")
+        self.assertEqual(expect, result)
+
+    def test_urlencode_bytes(self):
+        given = ((b'\xa0\x24', b'\xc1\x24'),)
+        expect = '%A0%24=%C1%24'
+        result = urllib.parse.urlencode(given)
+        self.assertEqual(expect, result)
+        result = urllib.parse.urlencode(given, True)
+        self.assertEqual(expect, result)
+
+        # Sequence of values
+        given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
+        expect = '%A0%24=42&%A0%24=%C1%24'
+        result = urllib.parse.urlencode(given, True)
+        self.assertEqual(expect, result)
+
+    def test_urlencode_encoding_safe_parameter(self):
+
+        # Send '$' (\x24) as safe character
+        # Default utf-8 encoding
+
+        given = ((b'\xa0\x24', b'\xc1\x24'),)
+        result = urllib.parse.urlencode(given, safe=":$")
+        expect = '%A0$=%C1$'
+        self.assertEqual(expect, result)
+
+        given = ((b'\xa0\x24', b'\xc1\x24'),)
+        result = urllib.parse.urlencode(given, doseq=True, safe=":$")
+        expect = '%A0$=%C1$'
+        self.assertEqual(expect, result)
+
+        # Safe parameter in sequence
+        given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
+        expect = '%A0$=%C1$&%A0$=13&%A0$=42'
+        result = urllib.parse.urlencode(given, True, safe=":$")
+        self.assertEqual(expect, result)
+
+        # Test all above in latin-1 encoding
+
+        given = ((b'\xa0\x24', b'\xc1\x24'),)
+        result = urllib.parse.urlencode(given, safe=":$",
+                                        encoding="latin-1")
+        expect = '%A0$=%C1$'
+        self.assertEqual(expect, result)
+
+        given = ((b'\xa0\x24', b'\xc1\x24'),)
+        expect = '%A0$=%C1$'
+        result = urllib.parse.urlencode(given, doseq=True, safe=":$",
+                                        encoding="latin-1")
+
+        given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
+        expect = '%A0$=%C1$&%A0$=13&%A0$=42'
+        result = urllib.parse.urlencode(given, True, safe=":$",
+                                        encoding="latin-1")
+        self.assertEqual(expect, result)
+
  class Pathname_Tests(unittest.TestCase):
      """Test pathname2url() and url2pathname()"""
  
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py

index b7890d84dd9ab97d8d2a6ad7b7a9348b93589e0e..27b732b78cda3c67acd7ecd01ac99643c96a8095 100644 (file)
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -533,7 +533,7 @@ def quote_from_bytes(bs, safe='/'):
          _safe_quoters[cachekey] = quoter
      return ''.join([quoter[char] for char in bs])
  
-def urlencode(query, doseq=False):
+def urlencode(query, doseq=False, safe='', encoding=None, errors=None):
      """Encode a sequence of two-element tuples or dictionary into a URL query string.
  
      If any values in the query arg are sequences and doseq is true, each
@@ -542,6 +542,10 @@ def urlencode(query, doseq=False):
      If the query arg is a sequence of two-element tuples, the order of the
      parameters in the output will match the order of parameters in the
      input.
+
+    The query arg may be either a string or a bytes type. When query arg is a
+    string, the safe, encoding and error parameters are sent the quote_plus for
+    encoding.
      """
  
      if hasattr(query, "items"):
@@ -566,14 +570,28 @@ def urlencode(query, doseq=False):
      l = []
      if not doseq:
          for k, v in query:
-            k = quote_plus(str(k))
-            v = quote_plus(str(v))
+            if isinstance(k, bytes):
+                k = quote_plus(k, safe)
+            else:
+                k = quote_plus(str(k), safe, encoding, errors)
+
+            if isinstance(v, bytes):
+                v = quote_plus(v, safe)
+            else:
+                v = quote_plus(str(v), safe, encoding, errors)
              l.append(k + '=' + v)
      else:
          for k, v in query:
-            k = quote_plus(str(k))
-            if isinstance(v, str):
-                v = quote_plus(v)
+            if isinstance(k, bytes):
+                k = quote_plus(k, safe)
+            else:
+                k = quote_plus(str(k), safe, encoding, errors)
+
+            if isinstance(v, bytes):
+                v = quote_plus(v, safe)
+                l.append(k + '=' + v)
+            elif isinstance(v, str):
+                v = quote_plus(v, safe, encoding, errors)
                  l.append(k + '=' + v)
              else:
                  try:
@@ -581,12 +599,16 @@ def urlencode(query, doseq=False):
                      x = len(v)
                  except TypeError:
                      # not a sequence
-                    v = quote_plus(str(v))
+                    v = quote_plus(str(v), safe, encoding, errors)
                      l.append(k + '=' + v)
                  else:
                      # loop over the sequence
                      for elt in v:
-                        l.append(k + '=' + quote_plus(str(elt)))
+                        if isinstance(elt, bytes):
+                            elt = quote_plus(elt, safe)
+                        else:
+                            elt = quote_plus(str(elt), safe, encoding, errors)
+                        l.append(k + '=' + elt)
      return '&'.join(l)
  
  # Utilities to parse URLs (most of these return None for missing parts):
diff --git a/Misc/NEWS b/Misc/NEWS

index cf9cf74acae627c9ae9a946ba3fa434f60daf594..d630751de8df7ff5a675e69a678a7ed2063de0b8 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -75,6 +75,9 @@ C-API
  Library
  -------
  
+- Issue #5468: urlencode to handle bytes type and other encodings in its query
+  parameter. Patch by Dan Mahn.
+
  - Issue #7673: Fix security vulnerability (CVE-2010-2089) in the audioop
    module, ensure that the input string length is a multiple of the frame size
author	Senthil Kumaran <orsenthil@gmail.com>
	Sat, 3 Jul 2010 17:55:41 +0000 (17:55 +0000)
committer	Senthil Kumaran <orsenthil@gmail.com>
	Sat, 3 Jul 2010 17:55:41 +0000 (17:55 +0000)
Doc/library/urllib.parse.rst		patch \| blob \| history
Lib/test/test_urllib.py		patch \| blob \| history
Lib/urllib/parse.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history