]> granicus.if.org Git - python/commitdiff
bpo-29990: Fix range checking in GB18030 decoder (#1495)
authorXiang Zhang <angwerzx@126.com>
Tue, 9 May 2017 03:38:32 +0000 (11:38 +0800)
committerGitHub <noreply@github.com>
Tue, 9 May 2017 03:38:32 +0000 (11:38 +0800)
When decoding a 4-byte GB18030 sequence, the first and third byte cannot exceed 0xFE.

Lib/test/test_codecencodings_cn.py
Misc/NEWS
Modules/cjkcodecs/_codecs_cn.c

index 3bdf7d0e14b3b7cd3cecb6dc10f97f9457fe9e3f..c8a410c2e0398c4e7dc50960f22a1df077f5109c 100644 (file)
@@ -48,6 +48,12 @@ class Test_GB18030(multibytecodec_support.TestBase, unittest.TestCase):
         (b"abc\x84\x32\x80\x80def", "replace", 'abc\ufffd2\ufffd\ufffddef'),
         (b"abc\x81\x30\x81\x30def", "strict", 'abc\x80def'),
         (b"abc\x86\x30\x81\x30def", "replace", 'abc\ufffd0\ufffd0def'),
+        # issue29990
+        (b"\xff\x30\x81\x30", "strict", None),
+        (b"\x81\x30\xff\x30", "strict", None),
+        (b"abc\x81\x39\xff\x39\xc1\xc4", "replace", "abc\ufffd\x39\ufffd\x39\u804a"),
+        (b"abc\xab\x36\xff\x30def", "replace", 'abc\ufffd\x36\ufffd\x30def'),
+        (b"abc\xbf\x38\xff\x32\xc1\xc4", "ignore",  "abc\x38\x32\u804a"),
     )
     has_iso10646 = True
 
index 5e5ce59e29d63d090e3b45dddf129908c0d3b17c..b9348880b4fc3ea4d0b8b75d9d189d258915fd00 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -317,6 +317,8 @@ Extension Modules
 Library
 -------
 
+- bpo-29990: Fix range checking in GB18030 decoder.  Original patch by Ma Lin.
+
 - bpo-29979: rewrite cgi.parse_multipart, reusing the FieldStorage class and
   making its results consistent with those of FieldStorage for
   multipart/form-data requests. Patch by Pierre Quentel.
index 1a070f2f393219de4ec69bf4a14ba081c6b436ee..bda175c55d13239ae7e41366e240f2b192fb54d6 100644 (file)
@@ -279,7 +279,9 @@ DECODER(gb18030)
             REQUIRE_INBUF(4);
             c3 = INBYTE3;
             c4 = INBYTE4;
-            if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39)
+            if (c  < 0x81 || c  > 0xFE ||
+                c3 < 0x81 || c3 > 0xFE ||
+                c4 < 0x30 || c4 > 0x39)
                 return 1;
             c -= 0x81;  c2 -= 0x30;
             c3 -= 0x81; c4 -= 0x30;