From b1556c537d7c49978fa40594a9c9f40c6f88cdde Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 20 May 2010 11:29:45 +0000 Subject: [PATCH] libpython.py: fix support of non-BMP unicode characters Forward port some code from Python3: * join surrogate pairs if sizeof(Py_UNICODE)==2 * Enable non-BMP test on narrow builds using u"\U0001D121" instead of unichr(0x1D121) --- Lib/test/test_gdb.py | 10 ++-------- Tools/gdb/libpython.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_gdb.py b/Lib/test/test_gdb.py index 388f24059e..2a7057c8e5 100644 --- a/Lib/test/test_gdb.py +++ b/Lib/test/test_gdb.py @@ -243,14 +243,8 @@ class PrettyPrintTests(DebuggerTests): # This is: # UTF-8: 0xF0 0x9D 0x84 0xA1 # UTF-16: 0xD834 0xDD21 - try: - # This will only work on wide-unicode builds: - self.assertGdbRepr(unichr(0x1D121)) - except ValueError, e: - # We're probably on a narrow-unicode build; if we're seeing a - # different problem, then re-raise it: - if e.args != ('unichr() arg not in range(0x10000) (narrow Python build)',): - raise e + # This will only work on wide-unicode builds: + self.assertGdbRepr(u"\U0001D121") def test_sets(self): 'Verify the pretty-printing of sets' diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py index f62735fb2a..3481f71c15 100644 --- a/Tools/gdb/libpython.py +++ b/Tools/gdb/libpython.py @@ -1013,6 +1013,10 @@ class PyTypeObjectPtr(PyObjectPtr): class PyUnicodeObjectPtr(PyObjectPtr): _typename = 'PyUnicodeObject' + def char_width(self): + _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE') + return _type_Py_UNICODE.sizeof + def proxyval(self, visited): # From unicodeobject.h: # Py_ssize_t length; /* Length of raw Unicode data in buffer */ @@ -1029,6 +1033,30 @@ class PyUnicodeObjectPtr(PyObjectPtr): result = u''.join([unichr(ucs) for ucs in Py_UNICODEs]) return result + def write_repr(self, out, visited): + proxy = self.proxyval(visited) + if self.char_width() == 2: + # sizeof(Py_UNICODE)==2: join surrogates + proxy2 = [] + i = 0 + while i < len(proxy): + ch = proxy[i] + i += 1 + if (i < len(proxy) + and 0xD800 <= ord(ch) < 0xDC00 \ + and 0xDC00 <= ord(proxy[i]) <= 0xDFFF): + # Get code point from surrogate pair + ch2 = proxy[i] + code = (ord(ch) & 0x03FF) << 10 + code |= ord(ch2) & 0x03FF + code += 0x00010000 + i += 1 + proxy2.append(unichr(code)) + else: + proxy2.append(ch) + proxy = u''.join(proxy2) + out.write(repr(proxy)) + def int_from_int(gdbval): return int(str(gdbval)) -- 2.50.1