Utility functions
-----------------
-.. function:: quote(string[, safe[, encoding[, errors]]])
+.. function:: quote(string[, safe])
Replace special characters in *string* using the ``%xx`` escape. Letters,
digits, and the characters ``'_.-'`` are never quoted. By default, this
- function is intended for quoting the path section of the URL. The optional
+ function is intended for quoting the path section of the URL.The optional
*safe* parameter specifies additional characters that should not be quoted
--- its default value is ``'/'``.
- *string* may be either a :class:`str` or a :class:`unicode`.
-
- The optional *encoding* and *errors* parameters specify how to deal with
- non-ASCII characters, as accepted by the :meth:`unicode.encode` method.
- *encoding* defaults to ``'utf-8'``.
- *errors* defaults to ``'strict'``, meaning unsupported characters raise a
- :class:`UnicodeEncodeError`.
- Non-Unicode strings are not encoded by default, and all bytes are allowed.
-
Example: ``quote('/~connolly/')`` yields ``'/%7econnolly/'``.
- Example: ``quote(u'/El Niño/')`` yields ``'/El%20Ni%C3%B1o/'``.
-
- .. versionchanged:: 2.7.1
- Added *encoding* and *errors* parameters.
-
-.. function:: quote_plus(string[, safe[, encoding[, errors]]])
+.. function:: quote_plus(string[, safe])
Like :func:`quote`, but also replaces spaces by plus signs, as required for
quoting HTML form values when building up a query string to go into a URL.
Plus signs in the original string are escaped unless they are included in
*safe*. It also does not have *safe* default to ``'/'``.
- Example: ``quote_plus(u'/El Niño/')`` yields ``'%2FEl+Ni%C3%B1o%2F'``.
-
.. function:: unquote(string)
self.assertEqual(quote_by_default, result,
"using quote_plus(): %s != %s" %
(quote_by_default, result))
- # Safe expressed as unicode rather than str
- result = urllib.quote(quote_by_default, safe=u"<>")
- self.assertEqual(quote_by_default, result,
- "using quote(): %r != %r" % (quote_by_default, result))
- # "Safe" non-ASCII bytes should still work
- # (Technically disallowed by the URI standard, but allowed for
- # backwards compatibility with previous versions of Python)
- result = urllib.quote(b"a\xfcb", safe=b"\xfc")
- expect = b"a\xfcb"
- self.assertEqual(expect, result,
- "using quote(): %r != %r" %
- (expect, result))
- # Same as above, but with 'safe' as a unicode rather than str
- # "Safe" non-ASCII unicode characters should have no effect
- # (Since URIs are not allowed to have non-ASCII characters)
- result = urllib.quote(b"a\xfcb", safe=u"\xfc")
- expect = urllib.quote(b"a\xfcb", safe="")
- self.assertEqual(expect, result,
- "using quote(): %r != %r" %
- (expect, result))
- # Same as above, but quoting a unicode rather than a str
- result = urllib.quote(u"a\xfcb", encoding="latin-1", safe=b"\xfc")
- expect = b"a\xfcb"
- self.assertEqual(expect, result,
- "using quote(): %r != %r" %
- (expect, result))
- # Same as above, but with both the quoted value and 'safe' as unicode
- result = urllib.quote(u"a\xfcb", encoding="latin-1", safe=u"\xfc")
- expect = urllib.quote(u"a\xfcb", encoding="latin-1", safe="")
- self.assertEqual(expect, result,
- "using quote(): %r != %r" %
- (expect, result))
def test_default_quoting(self):
# Make sure all characters that should be quoted are by default sans
'alpha%2Bbeta+gamma')
self.assertEqual(urllib.quote_plus('alpha+beta gamma', '+'),
'alpha+beta+gamma')
- # Test with unicode
- self.assertEqual(urllib.quote_plus(u'alpha+beta gamma'),
- 'alpha%2Bbeta+gamma')
- # Test with safe unicode
- self.assertEqual(urllib.quote_plus('alpha+beta gamma', u'+'),
- 'alpha+beta+gamma')
-
- def test_quote_bytes(self):
- # Non-ASCII bytes should quote directly to percent-encoded values
- given = b"\xa2\xd8ab\xff"
- expect = "%A2%D8ab%FF"
- result = urllib.quote(given)
- self.assertEqual(expect, result,
- "using quote(): %r != %r" % (expect, result))
- # Encoding argument should raise UnicodeDecodeError on bytes input
- # with non-ASCII characters (just as with str.encode).
- self.assertRaises(UnicodeDecodeError, urllib.quote, given,
- encoding="latin-1")
-
- def test_quote_with_unicode(self):
- # Characters in Latin-1 range, encoded by default in UTF-8
- given = u"\xa2\xd8ab\xff"
- expect = "%C2%A2%C3%98ab%C3%BF"
- result = urllib.quote(given)
- self.assertEqual(expect, result,
- "using quote(): %r != %r" % (expect, result))
- # Characters in Latin-1 range, encoded by with None (default)
- result = urllib.quote(given, encoding=None, errors=None)
- self.assertEqual(expect, result,
- "using quote(): %r != %r" % (expect, result))
- # Characters in Latin-1 range, encoded with Latin-1
- given = u"\xa2\xd8ab\xff"
- expect = "%A2%D8ab%FF"
- result = urllib.quote(given, encoding="latin-1")
- self.assertEqual(expect, result,
- "using quote(): %r != %r" % (expect, result))
- # Characters in BMP, encoded by default in UTF-8
- given = u"\u6f22\u5b57" # "Kanji"
- expect = "%E6%BC%A2%E5%AD%97"
- result = urllib.quote(given)
- self.assertEqual(expect, result,
- "using quote(): %r != %r" % (expect, result))
- # Characters in BMP, encoded with Latin-1
- given = u"\u6f22\u5b57"
- self.assertRaises(UnicodeEncodeError, urllib.quote, given,
- encoding="latin-1")
- # Characters in BMP, encoded with Latin-1, with replace error handling
- given = u"\u6f22\u5b57"
- expect = "%3F%3F" # "??"
- result = urllib.quote(given, encoding="latin-1",
- errors="replace")
- self.assertEqual(expect, result,
- "using quote(): %r != %r" % (expect, result))
- # Characters in BMP, Latin-1, with xmlcharref error handling
- given = u"\u6f22\u5b57"
- expect = "%26%2328450%3B%26%2323383%3B" # "漢字"
- result = urllib.quote(given, encoding="latin-1",
- errors="xmlcharrefreplace")
- self.assertEqual(expect, result,
- "using quote(): %r != %r" % (expect, result))
-
- def test_quote_plus_with_unicode(self):
- # Encoding (latin-1) test for quote_plus
- given = u"\xa2\xd8 \xff"
- expect = "%A2%D8+%FF"
- result = urllib.quote_plus(given, encoding="latin-1")
- self.assertEqual(expect, result,
- "using quote_plus(): %r != %r" % (expect, result))
- # Errors test for quote_plus
- given = u"ab\u6f22\u5b57 cd"
- expect = "ab%3F%3F+cd"
- result = urllib.quote_plus(given, encoding="latin-1",
- errors="replace")
- self.assertEqual(expect, result,
- "using quote_plus(): %r != %r" % (expect, result))
class UnquotingTests(unittest.TestCase):
"""Tests for unquote() and unquote_plus()
_safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i)
_safe_quoters = {}
-def quote(s, safe='/', encoding=None, errors=None):
+def quote(s, safe='/'):
"""quote('abc def') -> 'abc%20def'
Each part of a URL, e.g. the path info, the query, etc., has a
is reserved, but in typical usage the quote function is being
called on a path where the existing slash characters are used as
reserved characters.
-
- string and safe may be either str or unicode objects.
-
- The optional encoding and errors parameters specify how to deal with the
- non-ASCII characters, as accepted by the unicode.encode method.
- By default, encoding='utf-8' (characters are encoded with UTF-8), and
- errors='strict' (unsupported characters raise a UnicodeEncodeError).
"""
# fastpath
if not s:
if s is None:
raise TypeError('None object cannot be quoted')
return s
-
- if encoding is not None or isinstance(s, unicode):
- if encoding is None:
- encoding = 'utf-8'
- if errors is None:
- errors = 'strict'
- s = s.encode(encoding, errors)
- if isinstance(safe, unicode):
- # Normalize 'safe' by converting to str and removing non-ASCII chars
- safe = safe.encode('ascii', 'ignore')
-
cachekey = (safe, always_safe)
try:
(quoter, safe) = _safe_quoters[cachekey]
return s
return ''.join(map(quoter, s))
-def quote_plus(s, safe='', encoding=None, errors=None):
+def quote_plus(s, safe=''):
"""Quote the query fragment of a URL; replacing ' ' with '+'"""
if ' ' in s:
- s = quote(s, safe + ' ', encoding, errors)
+ s = quote(s, safe + ' ')
return s.replace(' ', '+')
- return quote(s, safe, encoding, errors)
+ return quote(s, safe)
def urlencode(query, doseq=0):
"""Encode a sequence of two-element tuples or dictionary into a URL query string.