From: Walter Dörwald Date: Wed, 6 May 2009 14:28:24 +0000 (+0000) Subject: Issue 3739: The unicode-internal encoder now reports the number of *characters* X-Git-Tag: v2.7a1~1261 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a7fb408a02a06c9d391bc52b61396a6eba2f60d0;p=python Issue 3739: The unicode-internal encoder now reports the number of *characters* consumed like any other encoder (instead of the number of bytes). --- diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index cee819ca0a..5df6fe5e4e 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -802,6 +802,12 @@ class UnicodeInternalTest(unittest.TestCase): "UnicodeInternalTest") self.assertEquals((u"ab", 12), ignored) + def test_encode_length(self): + # Issue 3739 + encoder = codecs.getencoder("unicode_internal") + self.assertEquals(encoder(u"a")[1], 1) + self.assertEquals(encoder(u"\xe9\u0142")[1], 2) + # From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html nameprep_tests = [ # 3.1 Map to nothing. @@ -1292,8 +1298,7 @@ class BasicUnicodeTest(unittest.TestCase): name = "latin_1" self.assertEqual(encoding.replace("_", "-"), name.replace("_", "-")) (bytes, size) = codecs.getencoder(encoding)(s) - if encoding != "unicode_internal": - self.assertEqual(size, len(s), "%r != %r (encoding=%r)" % (size, len(s), encoding)) + self.assertEqual(size, len(s), "%r != %r (encoding=%r)" % (size, len(s), encoding)) (chars, size) = codecs.getdecoder(encoding)(bytes) self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding)) diff --git a/Modules/_codecsmodule.c b/Modules/_codecsmodule.c index 6877238036..495e4ff53a 100644 --- a/Modules/_codecsmodule.c +++ b/Modules/_codecsmodule.c @@ -646,7 +646,7 @@ unicode_internal_encode(PyObject *self, data = PyUnicode_AS_DATA(obj); size = PyUnicode_GET_DATA_SIZE(obj); return codec_tuple(PyString_FromStringAndSize(data, size), - size); + PyUnicode_GET_SIZE(obj)); } else { if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))