From: Zackery Spytz Date: Sun, 19 Aug 2018 04:43:38 +0000 (-0600) Subject: bpo-22602: Raise an exception in the UTF-7 decoder for ill-formed sequences starting... X-Git-Tag: v3.8.0a1~1193 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e349bf23584eef20e0d1e1b2989d9b1430f15507;p=python bpo-22602: Raise an exception in the UTF-7 decoder for ill-formed sequences starting with "+". (GH-8741) The UTF-7 decoder now raises UnicodeDecodeError for ill-formed sequences starting with "+" (as specified in RFC 2152). --- diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index a59a5e2135..86d0dde170 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1020,6 +1020,7 @@ class UTF7Test(ReadTest, unittest.TestCase): (b'a+////,+IKw-b', 'a\uffff\ufffd\u20acb'), (b'a+IKw-b\xff', 'a\u20acb\ufffd'), (b'a+IKw\xffb', 'a\u20ac\ufffdb'), + (b'a+@b', 'a\ufffdb'), ] for raw, expected in tests: with self.subTest(raw=raw): diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 3cc018c0cc..fb7bb2d523 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -1630,6 +1630,10 @@ class UnicodeTest(string_tests.CommonTest, for c in set_o: self.assertEqual(c.encode('ascii').decode('utf7'), c) + with self.assertRaisesRegex(UnicodeDecodeError, + 'ill-formed sequence'): + b'+@'.decode('utf-7') + def test_codecs_utf8(self): self.assertEqual(''.encode('utf-8'), b'') self.assertEqual('\u20ac'.encode('utf-8'), b'\xe2\x82\xac') diff --git a/Misc/NEWS.d/next/Library/2018-08-12-00-14-54.bpo-22602.ybG9K8.rst b/Misc/NEWS.d/next/Library/2018-08-12-00-14-54.bpo-22602.ybG9K8.rst new file mode 100644 index 0000000000..5b113e3204 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-08-12-00-14-54.bpo-22602.ybG9K8.rst @@ -0,0 +1,3 @@ +The UTF-7 decoder now raises :exc:`UnicodeDecodeError` for ill-formed +sequences starting with "+" (as specified in RFC 2152). Patch by Zackery +Spytz. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 04fd6d03b4..0460d18493 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4479,6 +4479,11 @@ PyUnicode_DecodeUTF7Stateful(const char *s, if (_PyUnicodeWriter_WriteCharInline(&writer, '+') < 0) goto onError; } + else if (s < e && !IS_BASE64(*s)) { + s++; + errmsg = "ill-formed sequence"; + goto utf7Error; + } else { /* begin base64-encoded section */ inShift = 1; surrogate = 0;