]> granicus.if.org Git - python/commitdiff
Issue #19279: UTF-7 decoder no more produces illegal unicode strings.
authorSerhiy Storchaka <storchaka@gmail.com>
Sat, 19 Oct 2013 17:37:49 +0000 (20:37 +0300)
committerSerhiy Storchaka <storchaka@gmail.com>
Sat, 19 Oct 2013 17:37:49 +0000 (20:37 +0300)
Lib/test/test_codecs.py
Misc/NEWS
Objects/unicodeobject.c

index 0ccf8183e57c4c78eb18de07eed91acf74e8dd02..c9a25154aef94bec5cdc9a086b0e4236a0c2a1ca 100644 (file)
@@ -611,6 +611,35 @@ class UTF7Test(ReadTest):
             ]
         )
 
+    def test_errors(self):
+        tests = [
+            ('a\xffb', u'a\ufffdb'),
+            ('a+IK', u'a\ufffd'),
+            ('a+IK-b', u'a\ufffdb'),
+            ('a+IK,b', u'a\ufffdb'),
+            ('a+IKx', u'a\u20ac\ufffd'),
+            ('a+IKx-b', u'a\u20ac\ufffdb'),
+            ('a+IKwgr', u'a\u20ac\ufffd'),
+            ('a+IKwgr-b', u'a\u20ac\ufffdb'),
+            ('a+IKwgr,', u'a\u20ac\ufffd'),
+            ('a+IKwgr,-b', u'a\u20ac\ufffd-b'),
+            ('a+IKwgrB', u'a\u20ac\u20ac\ufffd'),
+            ('a+IKwgrB-b', u'a\u20ac\u20ac\ufffdb'),
+            ('a+/,+IKw-b', u'a\ufffd\u20acb'),
+            ('a+//,+IKw-b', u'a\ufffd\u20acb'),
+            ('a+///,+IKw-b', u'a\uffff\ufffd\u20acb'),
+            ('a+////,+IKw-b', u'a\uffff\ufffd\u20acb'),
+        ]
+        for raw, expected in tests:
+            self.assertRaises(UnicodeDecodeError, codecs.utf_7_decode,
+                              raw, 'strict', True)
+            self.assertEqual(raw.decode('utf-7', 'replace'), expected)
+
+    def test_nonbmp(self):
+        self.assertEqual(u'\U000104A0'.encode(self.encoding), '+2AHcoA-')
+        self.assertEqual(u'\ud801\udca0'.encode(self.encoding), '+2AHcoA-')
+        self.assertEqual('+2AHcoA-'.decode(self.encoding), u'\U000104A0')
+
 class UTF16ExTest(unittest.TestCase):
 
     def test_errors(self):
index e56e3b83a1834f2455f70de202548ab632956df7..b0b4f104296c782e1a814a825d48f153e58acec8 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -9,6 +9,8 @@ What's New in Python 2.7.6?
 Core and Builtins
 -----------------
 
+- Issue #19279: UTF-7 decoder no more produces illegal unicode strings.
+
 - Issue #18739: Fix an inconsistency between math.log(n) and math.log(long(n));
   the results could be off from one another by a ulp or two.
 
index 866eb9b0589e2bf0347e1689dd53e896849727a2..5ce9c88efa28ced6582ab2c6b26c6793ff1c6535 100644 (file)
@@ -1671,6 +1671,7 @@ PyObject *PyUnicode_DecodeUTF7Stateful(const char *s,
                                        (base64buffer >> (base64bits-16));
                     base64bits -= 16;
                     base64buffer &= (1 << base64bits) - 1; /* clear high bits */
+                    assert(outCh <= 0xffff);
                     if (surrogate) {
                         /* expecting a second surrogate */
                         if (outCh >= 0xDC00 && outCh <= 0xDFFF) {
@@ -1737,6 +1738,7 @@ PyObject *PyUnicode_DecodeUTF7Stateful(const char *s,
                 inShift = 1;
                 shiftOutStart = p;
                 base64bits = 0;
+                base64buffer = 0;
             }
         }
         else if (DECODE_DIRECT(ch)) { /* character decodes as itself */