From: Serhiy Storchaka Date: Tue, 15 Jan 2013 13:01:20 +0000 (+0200) Subject: Issue #14850: Now a chamap decoder treates U+FFFE as "undefined mapping" X-Git-Tag: v3.3.1rc1~356 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=45d16d99240506df8d943c81017880977612488b;p=python Issue #14850: Now a chamap decoder treates U+FFFE as "undefined mapping" in any mapping, not only in an unicode string. --- 45d16d99240506df8d943c81017880977612488b diff --cc Misc/NEWS index ae3a67b8aa,45c059817c..c1ff58d5a7 --- a/Misc/NEWS +++ b/Misc/NEWS @@@ -12,14 -10,9 +12,17 @@@ What's New in Python 3.3.1 Core and Builtins ----------------- + - Issue #14850: Now a chamap decoder treates U+FFFE as "undefined mapping" + in any mapping, not only in a string. + +- Issue #16730: importlib.machinery.FileFinder now no longers raises an + exception when trying to populate its cache and it finds out the directory is + unreadable or has turned into a file. Reported and diagnosed by + David Pritchard. + +- Issue #16906: Fix a logic error that prevented most static strings from being + cleared. + - Issue #11461: Fix the incremental UTF-16 decoder. Original patch by Amaury Forgeot d'Arc. diff --cc Objects/unicodeobject.c index 16d59292ef,e1df874c0b..044b26e7fc --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@@ -7518,45 -5251,53 +7517,36 @@@ Error } /* Apply mapping */ + if (x == Py_None) + goto Undefined; if (PyLong_Check(x)) { long value = PyLong_AS_LONG(x); + if (value == 0xFFFE) + goto Undefined; - if (value < 0 || value > 0x10FFFF) { - PyErr_SetString(PyExc_TypeError, - "character mapping must be in range(0x110000)"); + if (value < 0 || value > MAX_UNICODE) { + PyErr_Format(PyExc_TypeError, + "character mapping must be in range(0x%lx)", + (unsigned long)MAX_UNICODE + 1); Py_DECREF(x); goto onError; } - -#ifndef Py_UNICODE_WIDE - if (value > 0xFFFF) { - /* see the code for 1-n mapping below */ - if (extrachars < 2) { - /* resize first */ - Py_ssize_t oldpos = p - PyUnicode_AS_UNICODE(v); - Py_ssize_t needed = 10 - extrachars; - extrachars += needed; - /* XXX overflow detection missing */ - if (_PyUnicode_Resize(&v, - PyUnicode_GET_SIZE(v) + needed) < 0) { - Py_DECREF(x); - goto onError; - } - p = PyUnicode_AS_UNICODE(v) + oldpos; - } - value -= 0x10000; - *p++ = 0xD800 | (value >> 10); - *p++ = 0xDC00 | (value & 0x3FF); - extrachars -= 2; - } - else -#endif - *p++ = (Py_UNICODE)value; + if (unicode_putchar(&v, &outpos, value) < 0) + goto onError; } - else if (x == Py_None) { - /* undefined mapping */ - startinpos = s-starts; - endinpos = startinpos+1; - if (unicode_decode_call_errorhandler( - errors, &errorHandler, - "charmap", "character maps to ", - &starts, &e, &startinpos, &endinpos, &exc, &s, - &v, &outpos)) { - Py_DECREF(x); - goto onError; - } - Py_DECREF(x); - continue; - } else if (PyUnicode_Check(x)) { - Py_ssize_t targetsize = PyUnicode_GET_SIZE(x); + Py_ssize_t targetsize; + + if (PyUnicode_READY(x) == -1) + goto onError; + targetsize = PyUnicode_GET_LENGTH(x); if (targetsize == 1) { /* 1-1 mapping */ - if (unicode_putchar(&v, &outpos, - PyUnicode_READ_CHAR(x, 0)) < 0) - Py_UNICODE value = *PyUnicode_AS_UNICODE(x); ++ Py_UCS4 value = PyUnicode_READ_CHAR(x, 0); + if (value == 0xFFFE) + goto Undefined; - *p++ = value; ++ if (unicode_putchar(&v, &outpos, value) < 0) + goto onError; } else if (targetsize > 1) { /* 1-n mapping */ @@@ -7590,13 -5332,28 +7580,26 @@@ } Py_DECREF(x); ++s; + continue; + Undefined: + /* undefined mapping */ + Py_XDECREF(x); - outpos = p-PyUnicode_AS_UNICODE(v); + startinpos = s-starts; + endinpos = startinpos+1; + if (unicode_decode_call_errorhandler( + errors, &errorHandler, + "charmap", "character maps to ", + &starts, &e, &startinpos, &endinpos, &exc, &s, - &v, &outpos, &p)) { ++ &v, &outpos)) { + goto onError; + } } } - if (p - PyUnicode_AS_UNICODE(v) < PyUnicode_GET_SIZE(v)) - if (_PyUnicode_Resize(&v, p - PyUnicode_AS_UNICODE(v)) < 0) - goto onError; + if (unicode_resize(&v, outpos) < 0) + goto onError; Py_XDECREF(errorHandler); Py_XDECREF(exc); - return (PyObject *)v; + return unicode_result(v); onError: Py_XDECREF(errorHandler);