From e349bf23584eef20e0d1e1b2989d9b1430f15507 Mon Sep 17 00:00:00 2001 From: Zackery Spytz Date: Sat, 18 Aug 2018 22:43:38 -0600 Subject: [PATCH] bpo-22602: Raise an exception in the UTF-7 decoder for ill-formed sequences starting with "+". (GH-8741) The UTF-7 decoder now raises UnicodeDecodeError for ill-formed sequences starting with "+" (as specified in RFC 2152). --- Lib/test/test_codecs.py | 1 + Lib/test/test_unicode.py | 4 ++++ .../next/Library/2018-08-12-00-14-54.bpo-22602.ybG9K8.rst | 3 +++ Objects/unicodeobject.c | 5 +++++ 4 files changed, 13 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2018-08-12-00-14-54.bpo-22602.ybG9K8.rst diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index a59a5e2135..86d0dde170 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1020,6 +1020,7 @@ class UTF7Test(ReadTest, unittest.TestCase): (b'a+////,+IKw-b', 'a\uffff\ufffd\u20acb'), (b'a+IKw-b\xff', 'a\u20acb\ufffd'), (b'a+IKw\xffb', 'a\u20ac\ufffdb'), + (b'a+@b', 'a\ufffdb'), ] for raw, expected in tests: with self.subTest(raw=raw): diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 3cc018c0cc..fb7bb2d523 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -1630,6 +1630,10 @@ class UnicodeTest(string_tests.CommonTest, for c in set_o: self.assertEqual(c.encode('ascii').decode('utf7'), c) + with self.assertRaisesRegex(UnicodeDecodeError, + 'ill-formed sequence'): + b'+@'.decode('utf-7') + def test_codecs_utf8(self): self.assertEqual(''.encode('utf-8'), b'') self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac') diff --git a/Misc/NEWS.d/next/Library/2018-08-12-00-14-54.bpo-22602.ybG9K8.rst b/Misc/NEWS.d/next/Library/2018-08-12-00-14-54.bpo-22602.ybG9K8.rst new file mode 100644 index 0000000000..5b113e3204 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-08-12-00-14-54.bpo-22602.ybG9K8.rst @@ -0,0 +1,3 @@ +The UTF-7 decoder now raises :exc:`UnicodeDecodeError` for ill-formed +sequences starting with "+" (as specified in RFC 2152). Patch by Zackery +Spytz. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 04fd6d03b4..0460d18493 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4479,6 +4479,11 @@ PyUnicode_DecodeUTF7Stateful(const char *s, if (_PyUnicodeWriter_WriteCharInline(&writer, '+') < 0) goto onError; } + else if (s < e && !IS_BASE64(*s)) { + s++; + errmsg = "ill-formed sequence"; + goto utf7Error; + } else { /* begin base64-encoded section */ inShift = 1; surrogate = 0; -- 2.50.1