From f10567233237673e8b96aff8a9753a6426fa4474 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 19 Oct 2013 20:37:49 +0300 Subject: [PATCH] Issue #19279: UTF-7 decoder no more produces illegal unicode strings. --- Lib/test/test_codecs.py | 29 +++++++++++++++++++++++++++++ Misc/NEWS | 2 ++ Objects/unicodeobject.c | 2 ++ 3 files changed, 33 insertions(+) diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index 0ccf8183e5..c9a25154ae 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -611,6 +611,35 @@ class UTF7Test(ReadTest): ] ) + def test_errors(self): + tests = [ + ('a\xffb', u'a\ufffdb'), + ('a+IK', u'a\ufffd'), + ('a+IK-b', u'a\ufffdb'), + ('a+IK,b', u'a\ufffdb'), + ('a+IKx', u'a\u20ac\ufffd'), + ('a+IKx-b', u'a\u20ac\ufffdb'), + ('a+IKwgr', u'a\u20ac\ufffd'), + ('a+IKwgr-b', u'a\u20ac\ufffdb'), + ('a+IKwgr,', u'a\u20ac\ufffd'), + ('a+IKwgr,-b', u'a\u20ac\ufffd-b'), + ('a+IKwgrB', u'a\u20ac\u20ac\ufffd'), + ('a+IKwgrB-b', u'a\u20ac\u20ac\ufffdb'), + ('a+/,+IKw-b', u'a\ufffd\u20acb'), + ('a+//,+IKw-b', u'a\ufffd\u20acb'), + ('a+///,+IKw-b', u'a\uffff\ufffd\u20acb'), + ('a+////,+IKw-b', u'a\uffff\ufffd\u20acb'), + ] + for raw, expected in tests: + self.assertRaises(UnicodeDecodeError, codecs.utf_7_decode, + raw, 'strict', True) + self.assertEqual(raw.decode('utf-7', 'replace'), expected) + + def test_nonbmp(self): + self.assertEqual(u'\U000104A0'.encode(self.encoding), '+2AHcoA-') + self.assertEqual(u'\ud801\udca0'.encode(self.encoding), '+2AHcoA-') + self.assertEqual('+2AHcoA-'.decode(self.encoding), u'\U000104A0') + class UTF16ExTest(unittest.TestCase): def test_errors(self): diff --git a/Misc/NEWS b/Misc/NEWS index e56e3b83a1..b0b4f10429 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -9,6 +9,8 @@ What's New in Python 2.7.6? Core and Builtins ----------------- +- Issue #19279: UTF-7 decoder no more produces illegal unicode strings. + - Issue #18739: Fix an inconsistency between math.log(n) and math.log(long(n)); the results could be off from one another by a ulp or two. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 866eb9b058..5ce9c88efa 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1671,6 +1671,7 @@ PyObject *PyUnicode_DecodeUTF7Stateful(const char *s, (base64buffer >> (base64bits-16)); base64bits -= 16; base64buffer &= (1 << base64bits) - 1; /* clear high bits */ + assert(outCh <= 0xffff); if (surrogate) { /* expecting a second surrogate */ if (outCh >= 0xDC00 && outCh <= 0xDFFF) { @@ -1737,6 +1738,7 @@ PyObject *PyUnicode_DecodeUTF7Stateful(const char *s, inShift = 1; shiftOutStart = p; base64bits = 0; + base64buffer = 0; } } else if (DECODE_DIRECT(ch)) { /* character decodes as itself */ -- 2.50.1