From: Xiang Zhang Date: Mon, 22 May 2017 17:02:33 +0000 (+0800) Subject: bpo-30003: Fix handling escape characters in HZ codec (#1556) (#1718) X-Git-Tag: v3.5.4rc1~136 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=65440f8278351e16350be716dff61f5f786f7060;p=python bpo-30003: Fix handling escape characters in HZ codec (#1556) (#1718) --- diff --git a/Lib/test/test_codecencodings_cn.py b/Lib/test/test_codecencodings_cn.py index f135bb26e7..5101b5ceb1 100644 --- a/Lib/test/test_codecencodings_cn.py +++ b/Lib/test/test_codecencodings_cn.py @@ -87,6 +87,10 @@ class Test_HZ(multibytecodec_support.TestBase, unittest.TestCase): (b'ab~{\x81\x81\x41\x44~}cd', 'replace', 'ab\uFFFD\uFFFD\u804Acd'), (b'ab~{\x41\x44~}cd', 'replace', 'ab\u804Acd'), (b"ab~{\x79\x79\x41\x44~}cd", "replace", "ab\ufffd\ufffd\u804acd"), + # issue 30003 + ('ab~cd', 'strict', b'ab~~cd'), # escape ~ + (b'~{Dc~~:C~}', 'strict', None), # ~~ only in ASCII mode + (b'~{Dc~\n:C~}', 'strict', None), # ~\n only in ASCII mode ) if __name__ == "__main__": diff --git a/Misc/NEWS b/Misc/NEWS index 43e7748e48..823178d709 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -56,6 +56,9 @@ Extension Modules Library ------- +- bpo-30003: Fix handling escape characters in HZ codec. Based on patch + by Ma Lin. + - bpo-30301: Fix AttributeError when using SimpleQueue.empty() under *spawn* and *forkserver* start methods. diff --git a/Modules/cjkcodecs/_codecs_cn.c b/Modules/cjkcodecs/_codecs_cn.c index bda175c55d..1fcc220b8d 100644 --- a/Modules/cjkcodecs/_codecs_cn.c +++ b/Modules/cjkcodecs/_codecs_cn.c @@ -350,15 +350,17 @@ ENCODER(hz) DBCHAR code; if (c < 0x80) { - if (state->i == 0) { - WRITEBYTE1((unsigned char)c); - NEXT(1, 1); - } - else { - WRITEBYTE3('~', '}', (unsigned char)c); - NEXT(1, 3); + if (state->i) { + WRITEBYTE2('~', '}'); + NEXT_OUT(2); state->i = 0; } + WRITEBYTE1((unsigned char)c); + NEXT(1, 1); + if (c == '~') { + WRITEBYTE1('~'); + NEXT_OUT(1); + } continue; } @@ -409,17 +411,14 @@ DECODER(hz) unsigned char c2 = INBYTE2; REQUIRE_INBUF(2); - if (c2 == '~') { + if (c2 == '~' && state->i == 0) OUTCHAR('~'); - NEXT_IN(2); - continue; - } else if (c2 == '{' && state->i == 0) state->i = 1; /* set GB */ + else if (c2 == '\n' && state->i == 0) + ; /* line-continuation */ else if (c2 == '}' && state->i == 1) state->i = 0; /* set ASCII */ - else if (c2 == '\n') - ; /* line-continuation */ else return 1; NEXT_IN(2);