From: Georg Brandl Date: Sun, 1 Aug 2010 08:49:18 +0000 (+0000) Subject: #8821: do not rely on Unicode strings being terminated with a \u0000, rather explicit... X-Git-Tag: v3.2a2~573 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=bd534f03498c97273dc5bf00182e6405a3a92e01;p=python #8821: do not rely on Unicode strings being terminated with a \u0000, rather explicitly check range before looking for a second surrogate character. --- diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index f2d666de12..bfd19ebbbf 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3734,7 +3734,7 @@ PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s, ch2 = *s++; size--; - if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) { + if (ch2 >= 0xDC00 && ch2 <= 0xDFFF && size) { ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000; *p++ = '\\'; *p++ = 'U'; @@ -3976,7 +3976,7 @@ PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s, ch2 = *s++; size--; - if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) { + if (ch2 >= 0xDC00 && ch2 <= 0xDFFF && size) { ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000; *p++ = '\\'; *p++ = 'U';