Issue #12100: Don't reset incremental encoders of CJK codecs at each call to

author Victor Stinner <victor.stinner@haypocalc.com>

Tue, 24 May 2011 20:29:13 +0000 (22:29 +0200)

committer Victor Stinner <victor.stinner@haypocalc.com>

Tue, 24 May 2011 20:29:13 +0000 (22:29 +0200)
author Victor Stinner <victor.stinner@haypocalc.com>
Tue, 24 May 2011 20:29:13 +0000 (22:29 +0200)
committer Victor Stinner <victor.stinner@haypocalc.com>
Tue, 24 May 2011 20:29:13 +0000 (22:29 +0200)
diff --git a/Lib/test/test_multibytecodec.py b/Lib/test/test_multibytecodec.py

index 63c1e620b5935eabf3798517674b619fa8233e12..5e86ca2aae8ac09d93c6df7344756d529ebd9160 100644 (file)
--- a/Lib/test/test_multibytecodec.py
+++ b/Lib/test/test_multibytecodec.py
@@ -237,6 +237,36 @@ class Test_ISO2022(unittest.TestCase):
              # Any ISO 2022 codec will cause the segfault
              myunichr(x).encode('iso_2022_jp', 'ignore')
  
+class TestStateful(unittest.TestCase):
+    text = u'\u4E16\u4E16'
+    encoding = 'iso-2022-jp'
+    expected = b'\x1b$B@$@$'
+    expected_reset = b'\x1b$B@$@$\x1b(B'
+
+    def test_encode(self):
+        self.assertEqual(self.text.encode(self.encoding), self.expected_reset)
+
+    def test_incrementalencoder(self):
+        encoder = codecs.getincrementalencoder(self.encoding)()
+        output = b''.join(
+            encoder.encode(char)
+            for char in self.text)
+        self.assertEqual(output, self.expected)
+
+    def test_incrementalencoder_final(self):
+        encoder = codecs.getincrementalencoder(self.encoding)()
+        last_index = len(self.text) - 1
+        output = b''.join(
+            encoder.encode(char, index == last_index)
+            for index, char in enumerate(self.text))
+        self.assertEqual(output, self.expected_reset)
+
+class TestHZStateful(TestStateful):
+    text = u'\u804a\u804a'
+    encoding = 'hz'
+    expected = b'~{ADAD'
+    expected_reset = b'~{ADAD~}'
+
  def test_main():
      test_support.run_unittest(__name__)
  
diff --git a/Misc/NEWS b/Misc/NEWS

index 8b7d726c399cbbb8148f74fd710ac0ed4444a0b3..3d6815fa3791e0638b446e9767d6e72af57555b3 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -83,6 +83,10 @@ Core and Builtins
  Library
  -------
  
+- Issue #12100: Don't reset incremental encoders of CJK codecs at each call to
+  their encode() method anymore, but continue to call the reset() method if the
+  final argument is True.
+
  - Issue #12124: zipimport doesn't keep a reference to zlib.decompress() anymore
    to be able to unload the module.
  
diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c

index 1f31595d3a07be8b501b3a8590e2648ab9129d50..14fed3ef38a537b45d533f2e780d33acbaf568f8 100644 (file)
--- a/Modules/cjkcodecs/multibytecodec.c
+++ b/Modules/cjkcodecs/multibytecodec.c
@@ -471,7 +471,7 @@ multibytecodec_encode(MultibyteCodec *codec,
      MultibyteEncodeBuffer buf;
      Py_ssize_t finalsize, r = 0;
  
-    if (datalen == 0)
+    if (datalen == 0 && !(flags & MBENC_RESET))
          return PyString_FromString("");
  
      buf.excobj = NULL;
@@ -506,7 +506,7 @@ multibytecodec_encode(MultibyteCodec *codec,
              break;
      }
  
-    if (codec->encreset != NULL)
+    if (codec->encreset != NULL && (flags & MBENC_RESET))
          for (;;) {
              Py_ssize_t outleft;
  
@@ -776,8 +776,8 @@ encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
      inbuf_end = inbuf + datalen;
  
      r = multibytecodec_encode(ctx->codec, &ctx->state,
-                    (const Py_UNICODE **)&inbuf,
-                    datalen, ctx->errors, final ? MBENC_FLUSH : 0);
+                    (const Py_UNICODE **)&inbuf, datalen,
+                    ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0);
      if (r == NULL) {
          /* recover the original pending buffer */
          if (origpending > 0)
author	Victor Stinner <victor.stinner@haypocalc.com>
	Tue, 24 May 2011 20:29:13 +0000 (22:29 +0200)
committer	Victor Stinner <victor.stinner@haypocalc.com>
	Tue, 24 May 2011 20:29:13 +0000 (22:29 +0200)
Lib/test/test_multibytecodec.py		patch \| blob \| history
Misc/NEWS		patch \| blob \| history
Modules/cjkcodecs/multibytecodec.c		patch \| blob \| history