From f5f7870d9322b46ab87c45b2c4c46f6b10ecbd70 Mon Sep 17 00:00:00 2001 From: Xiang Zhang Date: Tue, 9 May 2017 12:17:09 +0800 Subject: [PATCH] bpo-29990: Fix range checking in GB18030 decoder (#1495) (#1508) When decoding a 4-byte GB18030 sequence, the first and third byte cannot exceed 0xFE. --- Lib/test/test_codecencodings_cn.py | 6 ++++++ Misc/NEWS | 2 ++ Modules/cjkcodecs/_codecs_cn.c | 4 +++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_codecencodings_cn.py b/Lib/test/test_codecencodings_cn.py index d0e3a15d16..f135bb26e7 100644 --- a/Lib/test/test_codecencodings_cn.py +++ b/Lib/test/test_codecencodings_cn.py @@ -49,6 +49,12 @@ class Test_GB18030(multibytecodec_support.TestBase, unittest.TestCase): (b"abc\x84\x32\x80\x80def", "replace", 'abc\ufffd2\ufffd\ufffddef'), (b"abc\x81\x30\x81\x30def", "strict", 'abc\x80def'), (b"abc\x86\x30\x81\x30def", "replace", 'abc\ufffd0\ufffd0def'), + # issue29990 + (b"\xff\x30\x81\x30", "strict", None), + (b"\x81\x30\xff\x30", "strict", None), + (b"abc\x81\x39\xff\x39\xc1\xc4", "replace", "abc\ufffd\x39\ufffd\x39\u804a"), + (b"abc\xab\x36\xff\x30def", "replace", 'abc\ufffd\x36\ufffd\x30def'), + (b"abc\xbf\x38\xff\x32\xc1\xc4", "ignore", "abc\x38\x32\u804a"), ) has_iso10646 = True diff --git a/Misc/NEWS b/Misc/NEWS index 1ec6a09760..06e464ffc8 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -49,6 +49,8 @@ Extension Modules Library ------- +- bpo-29990: Fix range checking in GB18030 decoder. Original patch by Ma Lin. + - Revert bpo-26293 for zipfile breakage. See also bpo-29094. - bpo-30243: Removed the __init__ methods of _json's scanner and encoder. diff --git a/Modules/cjkcodecs/_codecs_cn.c b/Modules/cjkcodecs/_codecs_cn.c index 1a070f2f39..bda175c55d 100644 --- a/Modules/cjkcodecs/_codecs_cn.c +++ b/Modules/cjkcodecs/_codecs_cn.c @@ -279,7 +279,9 @@ DECODER(gb18030) REQUIRE_INBUF(4); c3 = INBYTE3; c4 = INBYTE4; - if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39) + if (c < 0x81 || c > 0xFE || + c3 < 0x81 || c3 > 0xFE || + c4 < 0x30 || c4 > 0x39) return 1; c -= 0x81; c2 -= 0x30; c3 -= 0x81; c4 -= 0x30; -- 2.40.0