From: Victor Stinner Date: Tue, 5 Mar 2013 23:41:50 +0000 (+0100) Subject: Issue #17223: Fix PyUnicode_FromUnicode() on Windows (16-bit wchar_t type) X-Git-Tag: v3.4.0a1~1271 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=313cac88c524dd1d3c9810109de3f9c310d73413;p=python Issue #17223: Fix PyUnicode_FromUnicode() on Windows (16-bit wchar_t type) to reject invalid UTF-16 surrogate. --- diff --git a/Misc/NEWS b/Misc/NEWS index a0dd2f60f0..d858cd2e66 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,9 @@ What's New in Python 3.4.0 Alpha 1? Core and Builtins ----------------- +- Issue #17223: Fix PyUnicode_FromUnicode() on Windows (16-bit wchar_t type) + to reject invalid UTF-16 surrogate. + - Issue #17032: The "global" in the "NameError: global name 'x' is not defined" error message has been removed. Patch by Ram Rachum. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 2175655039..00a6a36fcd 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1384,13 +1384,18 @@ find_maxchar_surrogates(const wchar_t *begin, const wchar_t *end, for (iter = begin; iter < end; ) { #if SIZEOF_WCHAR_T == 2 - if (Py_UNICODE_IS_HIGH_SURROGATE(iter[0]) - && (iter+1) < end - && Py_UNICODE_IS_LOW_SURROGATE(iter[1])) - { - ch = Py_UNICODE_JOIN_SURROGATES(iter[0], iter[1]); - ++(*num_surrogates); - iter += 2; + if (Py_UNICODE_IS_HIGH_SURROGATE(iter[0])) { + if ((iter+1) < end + && Py_UNICODE_IS_LOW_SURROGATE(iter[1])) + { + ch = Py_UNICODE_JOIN_SURROGATES(iter[0], iter[1]); + ++(*num_surrogates); + iter += 2; + } + else { + PyErr_SetString(PyExc_ValueError, "illegal UTF-16 surrogate"); + return -1; + } } else #endif