]> granicus.if.org Git - python/commitdiff
bpo-19865: ctypes.create_unicode_buffer() supports non-BMP strings on Windows (GH...
authorZackery Spytz <zspytz@gmail.com>
Fri, 14 Jun 2019 15:53:59 +0000 (09:53 -0600)
committerVictor Stinner <vstinner@redhat.com>
Fri, 14 Jun 2019 15:53:59 +0000 (17:53 +0200)
Lib/ctypes/__init__.py
Lib/ctypes/test/test_buffers.py
Misc/NEWS.d/next/Library/2019-06-14-08-30-16.bpo-19865.FRGH4I.rst [new file with mode: 0644]

index 4107db3e3972d79dd5b17c84264ac7e1364d68a4..128155dbf4f2d9e6f41e99bdd2d6539bb9427816 100644 (file)
@@ -274,7 +274,15 @@ def create_unicode_buffer(init, size=None):
     """
     if isinstance(init, str):
         if size is None:
-            size = len(init)+1
+            if sizeof(c_wchar) == 2:
+                # UTF-16 requires a surrogate pair (2 wchar_t) for non-BMP
+                # characters (outside [U+0000; U+FFFF] range). +1 for trailing
+                # NUL character.
+                size = sum(2 if ord(c) > 0xFFFF else 1 for c in init) + 1
+            else:
+                # 32-bit wchar_t (1 wchar_t per Unicode character). +1 for
+                # trailing NUL character.
+                size = len(init) + 1
         buftype = c_wchar * size
         buf = buftype()
         buf.value = init
index 166faaf4e4b89c954ede2f84bb70333bee26f75b..15782be757c8535db56be2a2036195e54a48dd01 100644 (file)
@@ -60,5 +60,14 @@ class StringBufferTestCase(unittest.TestCase):
         self.assertEqual(b[::2], "ac")
         self.assertEqual(b[::5], "a")
 
+    @need_symbol('c_wchar')
+    def test_create_unicode_buffer_non_bmp(self):
+        expected = 5 if sizeof(c_wchar) == 2 else 3
+        for s in '\U00010000\U00100000', '\U00010000\U0010ffff':
+            b = create_unicode_buffer(s)
+            self.assertEqual(len(b), expected)
+            self.assertEqual(b[-1], '\0')
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2019-06-14-08-30-16.bpo-19865.FRGH4I.rst b/Misc/NEWS.d/next/Library/2019-06-14-08-30-16.bpo-19865.FRGH4I.rst
new file mode 100644 (file)
index 0000000..efd1f55
--- /dev/null
@@ -0,0 +1,2 @@
+:func:`ctypes.create_unicode_buffer()` now also supports non-BMP characters
+on platforms with 16-bit :c:type:`wchar_t` (for example, Windows and AIX).