]> granicus.if.org Git - python/commitdiff
Issue #20538: UTF-7 incremental decoder produced inconsistant string when
authorSerhiy Storchaka <storchaka@gmail.com>
Sat, 8 Feb 2014 12:01:29 +0000 (14:01 +0200)
committerSerhiy Storchaka <storchaka@gmail.com>
Sat, 8 Feb 2014 12:01:29 +0000 (14:01 +0200)
input was truncated in BASE64 section.

Lib/test/test_codecs.py
Misc/NEWS
Objects/unicodeobject.c

index 2c6dce72a23f324c8717c18609707c29a5f9a237..cb618ece8c2e507ba4877534af3e583f218d6bed 100644 (file)
@@ -852,13 +852,40 @@ class UTF7Test(ReadTest, unittest.TestCase):
 
     def test_partial(self):
         self.check_partial(
-            "a+-b",
+            'a+-b\x00c\x80d\u0100e\U00010000f',
             [
-                "a",
-                "a",
-                "a+",
-                "a+-",
-                "a+-b",
+                'a',
+                'a',
+                'a+',
+                'a+-',
+                'a+-b',
+                'a+-b',
+                'a+-b',
+                'a+-b',
+                'a+-b',
+                'a+-b\x00',
+                'a+-b\x00c',
+                'a+-b\x00c',
+                'a+-b\x00c',
+                'a+-b\x00c',
+                'a+-b\x00c',
+                'a+-b\x00c\x80',
+                'a+-b\x00c\x80d',
+                'a+-b\x00c\x80d',
+                'a+-b\x00c\x80d',
+                'a+-b\x00c\x80d',
+                'a+-b\x00c\x80d',
+                'a+-b\x00c\x80d\u0100',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e',
+                'a+-b\x00c\x80d\u0100e\U00010000',
+                'a+-b\x00c\x80d\u0100e\U00010000f',
             ]
         )
 
index e663bfea9b2f02ae4188df948f78fab03782571a..adce2c19b3d3d384061440deb706addea553564e 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ What's New in Python 3.3.4 release candidate 1?
 Core and Builtins
 -----------------
 
+- Issue #20538: UTF-7 incremental decoder produced inconsistant string when
+  input was truncated in BASE64 section.
+
 - Issue #17825: Cursor "^" is correctly positioned for SyntaxError and
   IndentationError.
 
index a149177a09fbaae12b5d11818fae44a6166e4a1f..beafaa44963436da679d3d2d697350dfee577cc8 100644 (file)
@@ -4474,8 +4474,16 @@ utf7Error:
     /* return state */
     if (consumed) {
         if (inShift) {
-            outpos = shiftOutStart; /* back off output */
             *consumed = startinpos;
+            if (outpos != shiftOutStart &&
+                PyUnicode_MAX_CHAR_VALUE(unicode) > 127) {
+                PyObject *result = PyUnicode_FromKindAndData(
+                        PyUnicode_KIND(unicode), PyUnicode_DATA(unicode),
+                        shiftOutStart);
+                Py_DECREF(unicode);
+                unicode = result;
+            }
+            outpos = shiftOutStart; /* back off output */
         }
         else {
             *consumed = s-starts;