]> granicus.if.org Git - python/commitdiff
bpo-30003: Fix handling escape characters in HZ codec (#1720) (#1556)
authorXiang Zhang <angwerzx@126.com>
Mon, 22 May 2017 17:04:27 +0000 (01:04 +0800)
committerGitHub <noreply@github.com>
Mon, 22 May 2017 17:04:27 +0000 (01:04 +0800)
Lib/test/test_codecencodings_cn.py
Misc/NEWS
Modules/cjkcodecs/_codecs_cn.c

index fdae538973d36aa96eb254519a76832664003059..a1049373a450c85c8ea9fc2dda50e57286ef47b4 100644 (file)
@@ -82,6 +82,10 @@ class Test_HZ(test_multibytecodec_support.TestBase, unittest.TestCase):
         (b'ab~cd', 'replace', u'ab\uFFFDd'),
         (b'ab\xffcd', 'replace', u'ab\uFFFDcd'),
         (b'ab~{\x81\x81\x41\x44~}cd', 'replace', u'ab\uFFFD\uFFFD\u804Acd'),
+        # issue 30003
+        (u'ab~cd', 'strict',  b'ab~~cd'), # escape ~
+        (b'~{Dc~~:C~}', 'strict', None),  # ~~ only in ASCII mode
+        (b'~{Dc~\n:C~}', 'strict', None), # ~\n only in ASCII mode
     )
 
 def test_main():
index 254bb52f9778d9117fda6eb837069ea56ada30c9..938a02955a5470d325f0e57651693b0bcfac28d1 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -49,6 +49,9 @@ Extension Modules
 Library
 -------
 
+- bpo-30003: Fix handling escape characters in HZ codec.  Based on patch
+  by Ma Lin.
+
 - bpo-30375: Warnings emitted when compile a regular expression now always
   point to the line in the user code.  Previously they could point into inners
   of the re module if emitted from inside of groups or conditionals.
index 3bc652fefffb910bfb3c602c97b81836369263f5..92cf06d5ffd35354bb514f9f20e3bb070022c984 100644 (file)
@@ -335,15 +335,17 @@ ENCODER(hz)
         DBCHAR code;
 
         if (c < 0x80) {
-            if (state->i == 0) {
-                WRITE1((unsigned char)c)
-                NEXT(1, 1)
-            }
-            else {
-                WRITE3('~', '}', (unsigned char)c)
-                NEXT(1, 3)
+            if (state->i) {
+                WRITE2('~', '}')
+                NEXT_OUT(2)
                 state->i = 0;
             }
+            WRITE1((unsigned char)c)
+            NEXT(1, 1)
+            if (c == '~') {
+                WRITE1('~')
+                NEXT_OUT(1)
+            }
             continue;
         }
 
@@ -390,20 +392,19 @@ DECODER(hz)
             unsigned char c2 = IN2;
 
             REQUIRE_INBUF(2)
-            if (c2 == '~') {
+            if (c2 == '~' && state->i == 0) {
                 WRITE1('~')
-                NEXT(2, 1)
-                continue;
+                NEXT_OUT(1)
             }
             else if (c2 == '{' && state->i == 0)
                 state->i = 1; /* set GB */
+            else if (c2 == '\n' && state->i == 0)
+                ; /* line-continuation */
             else if (c2 == '}' && state->i == 1)
                 state->i = 0; /* set ASCII */
-            else if (c2 == '\n')
-                ; /* line-continuation */
             else
                 return 2;
-            NEXT(2, 0);
+            NEXT_IN(2)
             continue;
         }