]> granicus.if.org Git - python/commitdiff
Issue 3739: The unicode-internal encoder now reports the number of *characters*
authorWalter Dörwald <walter@livinglogic.de>
Wed, 6 May 2009 14:28:24 +0000 (14:28 +0000)
committerWalter Dörwald <walter@livinglogic.de>
Wed, 6 May 2009 14:28:24 +0000 (14:28 +0000)
consumed like any other encoder (instead of the number of bytes).

Lib/test/test_codecs.py
Modules/_codecsmodule.c

index cee819ca0af0c1fbaa281023add45f7363451ff2..5df6fe5e4e147146f652fbea33c7849c0d847672 100644 (file)
@@ -802,6 +802,12 @@ class UnicodeInternalTest(unittest.TestCase):
                 "UnicodeInternalTest")
             self.assertEquals((u"ab", 12), ignored)
 
+    def test_encode_length(self):
+        # Issue 3739
+        encoder = codecs.getencoder("unicode_internal")
+        self.assertEquals(encoder(u"a")[1], 1)
+        self.assertEquals(encoder(u"\xe9\u0142")[1], 2)
+
 # From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
 nameprep_tests = [
     # 3.1 Map to nothing.
@@ -1292,8 +1298,7 @@ class BasicUnicodeTest(unittest.TestCase):
                 name = "latin_1"
             self.assertEqual(encoding.replace("_", "-"), name.replace("_", "-"))
             (bytes, size) = codecs.getencoder(encoding)(s)
-            if encoding != "unicode_internal":
-                self.assertEqual(size, len(s), "%r != %r (encoding=%r)" % (size, len(s), encoding))
+            self.assertEqual(size, len(s), "%r != %r (encoding=%r)" % (size, len(s), encoding))
             (chars, size) = codecs.getdecoder(encoding)(bytes)
             self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding))
 
index 6877238036223e85fbb543825f476a72bc2df837..495e4ff53a46a8ea4ed9730fe3b085831c59305d 100644 (file)
@@ -646,7 +646,7 @@ unicode_internal_encode(PyObject *self,
        data = PyUnicode_AS_DATA(obj);
        size = PyUnicode_GET_DATA_SIZE(obj);
        return codec_tuple(PyString_FromStringAndSize(data, size),
-                          size);
+                          PyUnicode_GET_SIZE(obj));
     }
     else {
        if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))