]> granicus.if.org Git - python/commitdiff
Issue #15379: Fix passing of non-BMP characters as integers for the charmap decoder...
authorAntoine Pitrou <solipsis@pitrou.net>
Sun, 23 Sep 2012 18:00:04 +0000 (20:00 +0200)
committerAntoine Pitrou <solipsis@pitrou.net>
Sun, 23 Sep 2012 18:00:04 +0000 (20:00 +0200)
Patch by Serhiy Storchaka.

1  2 
Lib/test/test_codecs.py
Misc/NEWS
Objects/unicodeobject.c

index 59179c4e22da431cf6ef652e1867fbbe3d1e332e,f342d88b9f6a9306f72a9a4feb955422d4edc2eb..4e808ec6acd0288d4cf8dded53b815906bba1c94
@@@ -1692,6 -1546,10 +1692,15 @@@ class CharmapTest(unittest.TestCase)
              ("abc", 3)
          )
  
++        self.assertEqual(
++            codecs.charmap_decode(b"\x00\x01\x02", "strict", "\U0010FFFFbc"),
++            ("\U0010FFFFbc", 3)
++        )
++
+         self.assertRaises(UnicodeDecodeError,
+             codecs.charmap_decode, b"\x00\x01\x02", "strict", "ab"
+         )
          self.assertEqual(
              codecs.charmap_decode(b"\x00\x01\x02", "replace", "ab"),
              ("ab\ufffd", 3)
              ("", len(allbytes))
          )
  
 -                                   {0: 0x110000, 1: b, 2: c}
+     def test_decode_with_int2str_map(self):
+         self.assertEqual(
+             codecs.charmap_decode(b"\x00\x01\x02", "strict",
+                                   {0: 'a', 1: 'b', 2: 'c'}),
+             ("abc", 3)
+         )
+         self.assertEqual(
+             codecs.charmap_decode(b"\x00\x01\x02", "strict",
+                                   {0: 'Aa', 1: 'Bb', 2: 'Cc'}),
+             ("AaBbCc", 3)
+         )
+         self.assertEqual(
+             codecs.charmap_decode(b"\x00\x01\x02", "strict",
+                                   {0: '\U0010FFFF', 1: 'b', 2: 'c'}),
+             ("\U0010FFFFbc", 3)
+         )
+         self.assertEqual(
+             codecs.charmap_decode(b"\x00\x01\x02", "strict",
+                                   {0: 'a', 1: 'b', 2: ''}),
+             ("ab", 3)
+         )
+         self.assertRaises(UnicodeDecodeError,
+             codecs.charmap_decode, b"\x00\x01\x02", "strict",
+                                    {0: 'a', 1: 'b'}
+         )
+         self.assertEqual(
+             codecs.charmap_decode(b"\x00\x01\x02", "replace",
+                                   {0: 'a', 1: 'b'}),
+             ("ab\ufffd", 3)
+         )
+         self.assertEqual(
+             codecs.charmap_decode(b"\x00\x01\x02", "replace",
+                                   {0: 'a', 1: 'b', 2: None}),
+             ("ab\ufffd", 3)
+         )
+         self.assertEqual(
+             codecs.charmap_decode(b"\x00\x01\x02", "ignore",
+                                   {0: 'a', 1: 'b'}),
+             ("ab", 3)
+         )
+         self.assertEqual(
+             codecs.charmap_decode(b"\x00\x01\x02", "ignore",
+                                   {0: 'a', 1: 'b', 2: None}),
+             ("ab", 3)
+         )
+         allbytes = bytes(range(256))
+         self.assertEqual(
+             codecs.charmap_decode(allbytes, "ignore", {}),
+             ("", len(allbytes))
+         )
+     def test_decode_with_int2int_map(self):
+         a = ord('a')
+         b = ord('b')
+         c = ord('c')
+         self.assertEqual(
+             codecs.charmap_decode(b"\x00\x01\x02", "strict",
+                                   {0: a, 1: b, 2: c}),
+             ("abc", 3)
+         )
+         # Issue #15379
+         self.assertEqual(
+             codecs.charmap_decode(b"\x00\x01\x02", "strict",
+                                   {0: 0x10FFFF, 1: b, 2: c}),
+             ("\U0010FFFFbc", 3)
+         )
++        self.assertEqual(
++            codecs.charmap_decode(b"\x00\x01\x02", "strict",
++                                  {0: sys.maxunicode, 1: b, 2: c}),
++            (chr(sys.maxunicode) + "bc", 3)
++        )
++
+         self.assertRaises(TypeError,
+             codecs.charmap_decode, b"\x00\x01\x02", "strict",
++                                   {0: sys.maxunicode + 1, 1: b, 2: c}
+         )
+         self.assertRaises(UnicodeDecodeError,
+             codecs.charmap_decode, b"\x00\x01\x02", "strict",
+                                    {0: a, 1: b},
+         )
+         self.assertEqual(
+             codecs.charmap_decode(b"\x00\x01\x02", "replace",
+                                   {0: a, 1: b}),
+             ("ab\ufffd", 3)
+         )
+         self.assertEqual(
+             codecs.charmap_decode(b"\x00\x01\x02", "ignore",
+                                   {0: a, 1: b}),
+             ("ab", 3)
+         )
  class WithStmtTest(unittest.TestCase):
      def test_encodedfile(self):
          f = io.BytesIO(b"\xc3\xbc")
diff --cc Misc/NEWS
index 29fd3070e3b0030ba8ff446c87c88e33231bd35f,57ed006fa762729bbb47b1fde7d206d918315be1..49d68349ff6e8068bef8cb01362cb35928e022f6
+++ b/Misc/NEWS
@@@ -10,22 -10,13 +10,25 @@@ What's New in Python 3.3.
  Core and Builtins
  -----------------
  
 -- Issue #13992: The trashcan mechanism is now thread-safe.  This eliminates
 -  sporadic crashes in multi-thread programs when several long deallocator
 -  chains ran concurrently and involved subclasses of built-in container
 -  types.
+ - Issue #15379: Fix passing of non-BMP characters as integers for the charmap
+   decoder (already working as unicode strings).  Patch by Serhiy Storchaka.
 +- Issue #15144: Fix possible integer overflow when handling pointers as
 +  integer values, by using Py_uintptr_t instead of size_t.  Patch by
 +  Serhiy Storchaka.
 +
 +- Issue #15965: Explicitly cast AT_FDCWD as (int).  Required on Solaris 10
 +  (which defines AT_FDCWD as 0xffd19553), harmless on other platforms.
 +
 +- Issue #15926: Fix crash after multiple reinitializations of the interpreter.
 +
 +- Issue #15895: Fix FILE pointer leak in one error branch of
 +  PyRun_SimpleFileExFlags() when filename points to a pyc/pyo file, closeit
 +  is false an and set_main_loader() fails.
 +
 +- Issue #15900: Fix reference leak in PyUnicode_TranslateCharmap().
 +
 +- Issue #15839: Convert SystemErrors in super() to RuntimeErrors.
  
  - Issue #15846: Fix SystemError which happened when using ast.parse in an
    exception handler on code with syntax errors.
index 748508b27887db20792bcb0e728f913bc2473bde,f59db36dd3d455c241aecfca5fa02300d490b43b..0da565a612b33c9741d5d104e5b73e922f3fe6f7
@@@ -7525,9 -5250,9 +7525,10 @@@ Error
              /* Apply mapping */
              if (PyLong_Check(x)) {
                  long value = PyLong_AS_LONG(x);
-                 if (value < 0 || value > 65535) {
 -                if (value < 0 || value > 0x10FFFF) {
--                    PyErr_SetString(PyExc_TypeError,
-                                     "character mapping must be in range(65536)");
 -                                    "character mapping must be in range(0x110000)");
++                if (value < 0 || value > MAX_UNICODE) {
++                    PyErr_Format(PyExc_TypeError,
++                                 "character mapping must be in range(0x%lx)",
++                                 (unsigned long)MAX_UNICODE + 1);
                      Py_DECREF(x);
                      goto onError;
                  }