]> granicus.if.org Git - python/commitdiff
bpo-30003: Fix handling escape characters in HZ codec (#1556) (#1718)
authorXiang Zhang <angwerzx@126.com>
Mon, 22 May 2017 17:02:33 +0000 (01:02 +0800)
committerGitHub <noreply@github.com>
Mon, 22 May 2017 17:02:33 +0000 (01:02 +0800)
Lib/test/test_codecencodings_cn.py
Misc/NEWS
Modules/cjkcodecs/_codecs_cn.c

index f135bb26e7b044de52ad2098b4f3c6f20cdfb5f7..5101b5ceb1926b776e3b074e4bb46b4dc425c1e8 100644 (file)
@@ -87,6 +87,10 @@ class Test_HZ(multibytecodec_support.TestBase, unittest.TestCase):
         (b'ab~{\x81\x81\x41\x44~}cd', 'replace', 'ab\uFFFD\uFFFD\u804Acd'),
         (b'ab~{\x41\x44~}cd', 'replace', 'ab\u804Acd'),
         (b"ab~{\x79\x79\x41\x44~}cd", "replace", "ab\ufffd\ufffd\u804acd"),
+        # issue 30003
+        ('ab~cd', 'strict',  b'ab~~cd'),  # escape ~
+        (b'~{Dc~~:C~}', 'strict', None),  # ~~ only in ASCII mode
+        (b'~{Dc~\n:C~}', 'strict', None), # ~\n only in ASCII mode
     )
 
 if __name__ == "__main__":
index 43e7748e48f1420f4d0accb314178a93fe99826c..823178d709046161008ffcb6b28a35f775c4f7bb 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -56,6 +56,9 @@ Extension Modules
 Library
 -------
 
+- bpo-30003: Fix handling escape characters in HZ codec.  Based on patch
+  by Ma Lin.
+
 - bpo-30301: Fix AttributeError when using SimpleQueue.empty() under
   *spawn* and *forkserver* start methods.
 
index bda175c55d13239ae7e41366e240f2b192fb54d6..1fcc220b8db0f4cbd500801310c6b3ff2ba15e7c 100644 (file)
@@ -350,15 +350,17 @@ ENCODER(hz)
         DBCHAR code;
 
         if (c < 0x80) {
-            if (state->i == 0) {
-                WRITEBYTE1((unsigned char)c);
-                NEXT(1, 1);
-            }
-            else {
-                WRITEBYTE3('~', '}', (unsigned char)c);
-                NEXT(1, 3);
+            if (state->i) {
+                WRITEBYTE2('~', '}');
+                NEXT_OUT(2);
                 state->i = 0;
             }
+            WRITEBYTE1((unsigned char)c);
+            NEXT(1, 1);
+            if (c == '~') {
+                WRITEBYTE1('~');
+                NEXT_OUT(1);
+            }
             continue;
         }
 
@@ -409,17 +411,14 @@ DECODER(hz)
             unsigned char c2 = INBYTE2;
 
             REQUIRE_INBUF(2);
-            if (c2 == '~') {
+            if (c2 == '~' && state->i == 0)
                 OUTCHAR('~');
-                NEXT_IN(2);
-                continue;
-            }
             else if (c2 == '{' && state->i == 0)
                 state->i = 1; /* set GB */
+            else if (c2 == '\n' && state->i == 0)
+                ; /* line-continuation */
             else if (c2 == '}' && state->i == 1)
                 state->i = 0; /* set ASCII */
-            else if (c2 == '\n')
-                ; /* line-continuation */
             else
                 return 1;
             NEXT_IN(2);