bpo-22602: Raise an exception in the UTF-7 decoder for ill-formed sequences starting...

author Zackery Spytz <zspytz@gmail.com>

Sun, 19 Aug 2018 04:43:38 +0000 (22:43 -0600)

committer Serhiy Storchaka <storchaka@gmail.com>

Sun, 19 Aug 2018 04:43:38 +0000 (07:43 +0300)
author Zackery Spytz <zspytz@gmail.com>
Sun, 19 Aug 2018 04:43:38 +0000 (22:43 -0600)
committer Serhiy Storchaka <storchaka@gmail.com>
Sun, 19 Aug 2018 04:43:38 +0000 (07:43 +0300)
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py

index a59a5e21358e7b6b85e1ec62d4a516e0157387b9..86d0dde1705763dca23cef25574c46290356732f 100644 (file)
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -1020,6 +1020,7 @@ class UTF7Test(ReadTest, unittest.TestCase):
              (b'a+////,+IKw-b', 'a\uffff\ufffd\u20acb'),
              (b'a+IKw-b\xff', 'a\u20acb\ufffd'),
              (b'a+IKw\xffb', 'a\u20ac\ufffdb'),
+            (b'a+@b', 'a\ufffdb'),
          ]
          for raw, expected in tests:
              with self.subTest(raw=raw):
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py

index 3cc018c0cc2caa8c3125233fde2a4d314dc4e0d9..fb7bb2d523fe6eebc86ce10810373e8741d30ae4 100644 (file)
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -1630,6 +1630,10 @@ class UnicodeTest(string_tests.CommonTest,
          for c in set_o:
              self.assertEqual(c.encode('ascii').decode('utf7'), c)
  
+        with self.assertRaisesRegex(UnicodeDecodeError,
+                                    'ill-formed sequence'):
+            b'+@'.decode('utf-7')
+
      def test_codecs_utf8(self):
          self.assertEqual(''.encode('utf-8'), b'')
          self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac')
diff --git a/Misc/NEWS.d/next/Library/2018-08-12-00-14-54.bpo-22602.ybG9K8.rst b/Misc/NEWS.d/next/Library/2018-08-12-00-14-54.bpo-22602.ybG9K8.rst

new file mode 100644 (file)

index 0000000..5b113e3
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2018-08-12-00-14-54.bpo-22602.ybG9K8.rst
@@ -0,0 +1,3 @@
+The UTF-7 decoder now raises :exc:`UnicodeDecodeError` for ill-formed
+sequences starting with "+" (as specified in RFC 2152).  Patch by Zackery
+Spytz.
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index 04fd6d03b464e8474a23e14c293929ae499e597f..0460d184932ee02f5498c6bd56d17e7fd19a4f76 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -4479,6 +4479,11 @@ PyUnicode_DecodeUTF7Stateful(const char *s,
                  if (_PyUnicodeWriter_WriteCharInline(&writer, '+') < 0)
                      goto onError;
              }
+            else if (s < e && !IS_BASE64(*s)) {
+                s++;
+                errmsg = "ill-formed sequence";
+                goto utf7Error;
+            }
              else { /* begin base64-encoded section */
                  inShift = 1;
                  surrogate = 0;
author	Zackery Spytz <zspytz@gmail.com>
	Sun, 19 Aug 2018 04:43:38 +0000 (22:43 -0600)
committer	Serhiy Storchaka <storchaka@gmail.com>
	Sun, 19 Aug 2018 04:43:38 +0000 (07:43 +0300)
Lib/test/test_codecs.py		patch \| blob \| history
Lib/test/test_unicode.py		patch \| blob \| history
Misc/NEWS.d/next/Library/2018-08-12-00-14-54.bpo-22602.ybG9K8.rst	[new file with mode: 0644]	patch \| blob
Objects/unicodeobject.c		patch \| blob \| history