From: R. David Murray Date: Tue, 25 May 2010 15:20:46 +0000 (+0000) Subject: Issue 8143: sync unquote in urlparse with urllib; add comment about doing so. X-Git-Tag: v2.7rc1~87 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=bfbdefe5392c60d36ec12ffc9b52a156f4610f3e;p=python Issue 8143: sync unquote in urlparse with urllib; add comment about doing so. unquote is duplicated in the two files to avoid a circular reference. (This is fixed in Python3.) Updates keep getting made to the public unquote without fixing the urlparse one, however, so this fix syncs the two and adds a comment to both to make sure changes are applied to both. --- diff --git a/Lib/urllib.py b/Lib/urllib.py index de10c7e4e6..de67f6721d 100644 --- a/Lib/urllib.py +++ b/Lib/urllib.py @@ -1156,6 +1156,10 @@ def splitvalue(attr): if match: return match.group(1, 2) return attr, None +# urlparse contains a duplicate of this method to avoid a circular import. If +# you update this method, also update the copy in urlparse. This code +# duplication does not exist in Python3. + _hexdig = '0123456789ABCDEFabcdef' _hextochr = dict((a + b, chr(int(a + b, 16))) for a in _hexdig for b in _hexdig) diff --git a/Lib/urlparse.py b/Lib/urlparse.py index 65bb9cb33a..2a0407e1cb 100644 --- a/Lib/urlparse.py +++ b/Lib/urlparse.py @@ -301,25 +301,29 @@ def urldefrag(url): return url, '' # unquote method for parse_qs and parse_qsl -# Cannot use directly from urllib as it would create circular reference. -# urllib uses urlparse methods ( urljoin) - +# Cannot use directly from urllib as it would create a circular reference +# because urllib uses urlparse methods (urljoin). If you update this function, +# update it also in urllib. This code duplication does not existin in Python3. _hexdig = '0123456789ABCDEFabcdef' -_hextochr = dict((a+b, chr(int(a+b,16))) for a in _hexdig for b in _hexdig) +_hextochr = dict((a+b, chr(int(a+b,16))) + for a in _hexdig for b in _hexdig) def unquote(s): """unquote('abc%20def') -> 'abc def'.""" res = s.split('%') - for i in xrange(1, len(res)): - item = res[i] + # fastpath + if len(res) == 1: + return s + s = res[0] + for item in res[1:]: try: - res[i] = _hextochr[item[:2]] + item[2:] + s += _hextochr[item[:2]] + item[2:] except KeyError: - res[i] = '%' + item + s += '%' + item except UnicodeDecodeError: - res[i] = unichr(int(item[:2], 16)) + item[2:] - return "".join(res) + s += unichr(int(item[:2], 16)) + item[2:] + return s def parse_qs(qs, keep_blank_values=0, strict_parsing=0): """Parse a query given as a string argument.