From: Serhiy Storchaka Date: Thu, 7 Feb 2013 14:25:25 +0000 (+0200) Subject: Issue #17043: The unicode-internal decoder no longer read past the end of X-Git-Tag: v3.3.1rc1~213 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=03ee12ed7251b6b251d55d708a22616ed2538b19;p=python Issue #17043: The unicode-internal decoder no longer read past the end of input buffer. --- 03ee12ed7251b6b251d55d708a22616ed2538b19 diff --cc Misc/NEWS index b63511c4ef,c715170df2..9491614912 --- a/Misc/NEWS +++ b/Misc/NEWS @@@ -12,9 -10,9 +12,12 @@@ What's New in Python 3.3.1 Core and Builtins ----------------- + - Issue #17043: The unicode-internal decoder no longer read past the end of + input buffer. + +- Issue #17098: All modules now have __loader__ set even if they pre-exist the + bootstrapping of importlib. + - Issue #16979: Fix error handling bugs in the unicode-escape-decode decoder. - Issue #10156: In the interpreter's initialization phase, unicode globals diff --cc Objects/unicodeobject.c index e8459138a8,cd4e9e9295..abe793dfd4 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@@ -6101,65 -4392,38 +6101,61 @@@ _PyUnicode_DecodeUnicodeInternal(const end = s + size; while (s < end) { - if (end-s < Py_UNICODE_SIZE) { + Py_UNICODE uch; + Py_UCS4 ch; ++ if (end - s < Py_UNICODE_SIZE) { + endinpos = end-starts; + reason = "truncated input"; + goto error; + } - memcpy(p, s, sizeof(Py_UNICODE)); + /* We copy the raw representation one byte at a time because the + pointer may be unaligned (see test_codeccallbacks). */ + ((char *) &uch)[0] = s[0]; + ((char *) &uch)[1] = s[1]; +#ifdef Py_UNICODE_WIDE + ((char *) &uch)[2] = s[2]; + ((char *) &uch)[3] = s[3]; +#endif + ch = uch; - + #ifdef Py_UNICODE_WIDE /* We have to sanity check the raw data, otherwise doom looms for some malformed UCS-4 data. */ - if ( - #ifdef Py_UNICODE_WIDE - ch > 0x10ffff || - #endif - end-s < Py_UNICODE_SIZE - ) - { - startinpos = s - starts; - if (end-s < Py_UNICODE_SIZE) { - endinpos = end-starts; - reason = "truncated input"; - } - else { - endinpos = s - starts + Py_UNICODE_SIZE; - reason = "illegal code point (> 0x10FFFF)"; - } - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "unicode_internal", reason, - &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &outpos)) - goto onError; - continue; - if (*p > unimax || *p < 0) { ++ if (ch > 0x10ffff) { + endinpos = s - starts + Py_UNICODE_SIZE; + reason = "illegal code point (> 0x10FFFF)"; + goto error; } - + #endif - p++; s += Py_UNICODE_SIZE; +#ifndef Py_UNICODE_WIDE - if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && s < end) ++ if (Py_UNICODE_IS_HIGH_SURROGATE(ch) && end - s >= Py_UNICODE_SIZE) + { + Py_UNICODE uch2; + ((char *) &uch2)[0] = s[0]; + ((char *) &uch2)[1] = s[1]; + if (Py_UNICODE_IS_LOW_SURROGATE(uch2)) + { + ch = Py_UNICODE_JOIN_SURROGATES(uch, uch2); + s += Py_UNICODE_SIZE; + } + } +#endif + + if (unicode_putchar(&v, &outpos, ch) < 0) + goto onError; + continue; + + error: + startinpos = s - starts; - outpos = p - PyUnicode_AS_UNICODE(v); + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + "unicode_internal", reason, + &starts, &end, &startinpos, &endinpos, &exc, &s, - &v, &outpos, &p)) { ++ &v, &outpos)) + goto onError; - } } - if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0) + if (unicode_resize(&v, outpos) < 0) goto onError; Py_XDECREF(errorHandler); Py_XDECREF(exc);