From: Senthil Kumaran Date: Thu, 22 Jul 2010 01:47:30 +0000 (+0000) Subject: Reverting the checkin made in revision 82940, as it was adding new parameters to... X-Git-Tag: v2.7.1rc1~574 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=880685f69894d3d267ac41abe0c06db3ba9a322d;p=python Reverting the checkin made in revision 82940, as it was adding new parameters to quote function in a bugfix release. Discussed in issue1712522 --- diff --git a/Doc/library/urllib.rst b/Doc/library/urllib.rst index 7b4d0bf2bc..5621f51226 100644 --- a/Doc/library/urllib.rst +++ b/Doc/library/urllib.rst @@ -202,40 +202,24 @@ High-level interface Utility functions ----------------- -.. function:: quote(string[, safe[, encoding[, errors]]]) +.. function:: quote(string[, safe]) Replace special characters in *string* using the ``%xx`` escape. Letters, digits, and the characters ``'_.-'`` are never quoted. By default, this - function is intended for quoting the path section of the URL. The optional + function is intended for quoting the path section of the URL.The optional *safe* parameter specifies additional characters that should not be quoted --- its default value is ``'/'``. - *string* may be either a :class:`str` or a :class:`unicode`. - - The optional *encoding* and *errors* parameters specify how to deal with - non-ASCII characters, as accepted by the :meth:`unicode.encode` method. - *encoding* defaults to ``'utf-8'``. - *errors* defaults to ``'strict'``, meaning unsupported characters raise a - :class:`UnicodeEncodeError`. - Non-Unicode strings are not encoded by default, and all bytes are allowed. - Example: ``quote('/~connolly/')`` yields ``'/%7econnolly/'``. - Example: ``quote(u'/El Niño/')`` yields ``'/El%20Ni%C3%B1o/'``. - - .. versionchanged:: 2.7.1 - Added *encoding* and *errors* parameters. - -.. function:: quote_plus(string[, safe[, encoding[, errors]]]) +.. function:: quote_plus(string[, safe]) Like :func:`quote`, but also replaces spaces by plus signs, as required for quoting HTML form values when building up a query string to go into a URL. Plus signs in the original string are escaped unless they are included in *safe*. It also does not have *safe* default to ``'/'``. - Example: ``quote_plus(u'/El Niño/')`` yields ``'%2FEl+Ni%C3%B1o%2F'``. - .. function:: unquote(string) diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 16febaec44..7119f55b15 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -355,38 +355,6 @@ class QuotingTests(unittest.TestCase): self.assertEqual(quote_by_default, result, "using quote_plus(): %s != %s" % (quote_by_default, result)) - # Safe expressed as unicode rather than str - result = urllib.quote(quote_by_default, safe=u"<>") - self.assertEqual(quote_by_default, result, - "using quote(): %r != %r" % (quote_by_default, result)) - # "Safe" non-ASCII bytes should still work - # (Technically disallowed by the URI standard, but allowed for - # backwards compatibility with previous versions of Python) - result = urllib.quote(b"a\xfcb", safe=b"\xfc") - expect = b"a\xfcb" - self.assertEqual(expect, result, - "using quote(): %r != %r" % - (expect, result)) - # Same as above, but with 'safe' as a unicode rather than str - # "Safe" non-ASCII unicode characters should have no effect - # (Since URIs are not allowed to have non-ASCII characters) - result = urllib.quote(b"a\xfcb", safe=u"\xfc") - expect = urllib.quote(b"a\xfcb", safe="") - self.assertEqual(expect, result, - "using quote(): %r != %r" % - (expect, result)) - # Same as above, but quoting a unicode rather than a str - result = urllib.quote(u"a\xfcb", encoding="latin-1", safe=b"\xfc") - expect = b"a\xfcb" - self.assertEqual(expect, result, - "using quote(): %r != %r" % - (expect, result)) - # Same as above, but with both the quoted value and 'safe' as unicode - result = urllib.quote(u"a\xfcb", encoding="latin-1", safe=u"\xfc") - expect = urllib.quote(u"a\xfcb", encoding="latin-1", safe="") - self.assertEqual(expect, result, - "using quote(): %r != %r" % - (expect, result)) def test_default_quoting(self): # Make sure all characters that should be quoted are by default sans @@ -439,81 +407,6 @@ class QuotingTests(unittest.TestCase): 'alpha%2Bbeta+gamma') self.assertEqual(urllib.quote_plus('alpha+beta gamma', '+'), 'alpha+beta+gamma') - # Test with unicode - self.assertEqual(urllib.quote_plus(u'alpha+beta gamma'), - 'alpha%2Bbeta+gamma') - # Test with safe unicode - self.assertEqual(urllib.quote_plus('alpha+beta gamma', u'+'), - 'alpha+beta+gamma') - - def test_quote_bytes(self): - # Non-ASCII bytes should quote directly to percent-encoded values - given = b"\xa2\xd8ab\xff" - expect = "%A2%D8ab%FF" - result = urllib.quote(given) - self.assertEqual(expect, result, - "using quote(): %r != %r" % (expect, result)) - # Encoding argument should raise UnicodeDecodeError on bytes input - # with non-ASCII characters (just as with str.encode). - self.assertRaises(UnicodeDecodeError, urllib.quote, given, - encoding="latin-1") - - def test_quote_with_unicode(self): - # Characters in Latin-1 range, encoded by default in UTF-8 - given = u"\xa2\xd8ab\xff" - expect = "%C2%A2%C3%98ab%C3%BF" - result = urllib.quote(given) - self.assertEqual(expect, result, - "using quote(): %r != %r" % (expect, result)) - # Characters in Latin-1 range, encoded by with None (default) - result = urllib.quote(given, encoding=None, errors=None) - self.assertEqual(expect, result, - "using quote(): %r != %r" % (expect, result)) - # Characters in Latin-1 range, encoded with Latin-1 - given = u"\xa2\xd8ab\xff" - expect = "%A2%D8ab%FF" - result = urllib.quote(given, encoding="latin-1") - self.assertEqual(expect, result, - "using quote(): %r != %r" % (expect, result)) - # Characters in BMP, encoded by default in UTF-8 - given = u"\u6f22\u5b57" # "Kanji" - expect = "%E6%BC%A2%E5%AD%97" - result = urllib.quote(given) - self.assertEqual(expect, result, - "using quote(): %r != %r" % (expect, result)) - # Characters in BMP, encoded with Latin-1 - given = u"\u6f22\u5b57" - self.assertRaises(UnicodeEncodeError, urllib.quote, given, - encoding="latin-1") - # Characters in BMP, encoded with Latin-1, with replace error handling - given = u"\u6f22\u5b57" - expect = "%3F%3F" # "??" - result = urllib.quote(given, encoding="latin-1", - errors="replace") - self.assertEqual(expect, result, - "using quote(): %r != %r" % (expect, result)) - # Characters in BMP, Latin-1, with xmlcharref error handling - given = u"\u6f22\u5b57" - expect = "%26%2328450%3B%26%2323383%3B" # "漢字" - result = urllib.quote(given, encoding="latin-1", - errors="xmlcharrefreplace") - self.assertEqual(expect, result, - "using quote(): %r != %r" % (expect, result)) - - def test_quote_plus_with_unicode(self): - # Encoding (latin-1) test for quote_plus - given = u"\xa2\xd8 \xff" - expect = "%A2%D8+%FF" - result = urllib.quote_plus(given, encoding="latin-1") - self.assertEqual(expect, result, - "using quote_plus(): %r != %r" % (expect, result)) - # Errors test for quote_plus - given = u"ab\u6f22\u5b57 cd" - expect = "ab%3F%3F+cd" - result = urllib.quote_plus(given, encoding="latin-1", - errors="replace") - self.assertEqual(expect, result, - "using quote_plus(): %r != %r" % (expect, result)) class UnquotingTests(unittest.TestCase): """Tests for unquote() and unquote_plus() diff --git a/Lib/urllib.py b/Lib/urllib.py index 3460a56657..9c58923fc7 100644 --- a/Lib/urllib.py +++ b/Lib/urllib.py @@ -1193,7 +1193,7 @@ for i, c in zip(xrange(256), str(bytearray(xrange(256)))): _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i) _safe_quoters = {} -def quote(s, safe='/', encoding=None, errors=None): +def quote(s, safe='/'): """quote('abc def') -> 'abc%20def' Each part of a URL, e.g. the path info, the query, etc., has a @@ -1213,30 +1213,12 @@ def quote(s, safe='/', encoding=None, errors=None): is reserved, but in typical usage the quote function is being called on a path where the existing slash characters are used as reserved characters. - - string and safe may be either str or unicode objects. - - The optional encoding and errors parameters specify how to deal with the - non-ASCII characters, as accepted by the unicode.encode method. - By default, encoding='utf-8' (characters are encoded with UTF-8), and - errors='strict' (unsupported characters raise a UnicodeEncodeError). """ # fastpath if not s: if s is None: raise TypeError('None object cannot be quoted') return s - - if encoding is not None or isinstance(s, unicode): - if encoding is None: - encoding = 'utf-8' - if errors is None: - errors = 'strict' - s = s.encode(encoding, errors) - if isinstance(safe, unicode): - # Normalize 'safe' by converting to str and removing non-ASCII chars - safe = safe.encode('ascii', 'ignore') - cachekey = (safe, always_safe) try: (quoter, safe) = _safe_quoters[cachekey] @@ -1250,12 +1232,12 @@ def quote(s, safe='/', encoding=None, errors=None): return s return ''.join(map(quoter, s)) -def quote_plus(s, safe='', encoding=None, errors=None): +def quote_plus(s, safe=''): """Quote the query fragment of a URL; replacing ' ' with '+'""" if ' ' in s: - s = quote(s, safe + ' ', encoding, errors) + s = quote(s, safe + ' ') return s.replace(' ', '+') - return quote(s, safe, encoding, errors) + return quote(s, safe) def urlencode(query, doseq=0): """Encode a sequence of two-element tuples or dictionary into a URL query string. diff --git a/Misc/NEWS b/Misc/NEWS index 979d35ab9a..3ed042156a 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -22,10 +22,6 @@ Library I/O error occurred. Now an IOError is raised instead. Patch by chuck (Jan Hosang). -- Issue 1712522: urllib.quote supports Unicode String with encoding and errors - parameter. The encoding parameter defaults to utf-8 and errors to strict. - Patch by Matt Giuca. - - Issue #7646: The fnmatch pattern cache no longer grows without bound. - Issue #9136: Fix 'dictionary changed size during iteration'