]> granicus.if.org Git - python/commitdiff
bpo-30003: Fix handling escape characters in HZ codec (#1556)
authorXiang Zhang <angwerzx@126.com>
Mon, 22 May 2017 14:42:05 +0000 (22:42 +0800)
committerGitHub <noreply@github.com>
Mon, 22 May 2017 14:42:05 +0000 (22:42 +0800)
Lib/test/test_codecencodings_cn.py
Misc/NEWS
Modules/cjkcodecs/_codecs_cn.c

index c8a410c2e0398c4e7dc50960f22a1df077f5109c..2a450718d5aae57799a4afb331fb0b1cb3f55821 100644 (file)
@@ -86,6 +86,10 @@ class Test_HZ(multibytecodec_support.TestBase, unittest.TestCase):
         (b'ab~{\x81\x81\x41\x44~}cd', 'replace', 'ab\uFFFD\uFFFD\u804Acd'),
         (b'ab~{\x41\x44~}cd', 'replace', 'ab\u804Acd'),
         (b"ab~{\x79\x79\x41\x44~}cd", "replace", "ab\ufffd\ufffd\u804acd"),
+        # issue 30003
+        ('ab~cd', 'strict',  b'ab~~cd'),  # escape ~
+        (b'~{Dc~~:C~}', 'strict', None),  # ~~ only in ASCII mode
+        (b'~{Dc~\n:C~}', 'strict', None), # ~\n only in ASCII mode
     )
 
 if __name__ == "__main__":
index 74f7922934d90ce5f6faa158fefdee25f1254837..5a7e377d6b2d7c6183cd2a301af717f1f0072fa3 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -334,6 +334,9 @@ Extension Modules
 Library
 -------
 
+- bpo-30003: Fix handling escape characters in HZ codec.  Based on patch
+  by Ma Lin.
+
 - bpo-30149: inspect.signature() now supports callables with
   variable-argument parameters wrapped with partialmethod.
   Patch by Dong-hee Na.
index bda175c55d13239ae7e41366e240f2b192fb54d6..1fcc220b8db0f4cbd500801310c6b3ff2ba15e7c 100644 (file)
@@ -350,15 +350,17 @@ ENCODER(hz)
         DBCHAR code;
 
         if (c < 0x80) {
-            if (state->i == 0) {
-                WRITEBYTE1((unsigned char)c);
-                NEXT(1, 1);
-            }
-            else {
-                WRITEBYTE3('~', '}', (unsigned char)c);
-                NEXT(1, 3);
+            if (state->i) {
+                WRITEBYTE2('~', '}');
+                NEXT_OUT(2);
                 state->i = 0;
             }
+            WRITEBYTE1((unsigned char)c);
+            NEXT(1, 1);
+            if (c == '~') {
+                WRITEBYTE1('~');
+                NEXT_OUT(1);
+            }
             continue;
         }
 
@@ -409,17 +411,14 @@ DECODER(hz)
             unsigned char c2 = INBYTE2;
 
             REQUIRE_INBUF(2);
-            if (c2 == '~') {
+            if (c2 == '~' && state->i == 0)
                 OUTCHAR('~');
-                NEXT_IN(2);
-                continue;
-            }
             else if (c2 == '{' && state->i == 0)
                 state->i = 1; /* set GB */
+            else if (c2 == '\n' && state->i == 0)
+                ; /* line-continuation */
             else if (c2 == '}' && state->i == 1)
                 state->i = 0; /* set ASCII */
-            else if (c2 == '\n')
-                ; /* line-continuation */
             else
                 return 1;
             NEXT_IN(2);