From: Serhiy Storchaka Date: Tue, 15 Jan 2013 13:30:04 +0000 (+0200) Subject: Issue #14850: Now a chamap decoder treates U+FFFE as "undefined mapping" X-Git-Tag: v3.4.0a1~1603 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=55e2cb497bc35c6bf31673dce4d1ec4de0cb87d0;p=python Issue #14850: Now a chamap decoder treates U+FFFE as "undefined mapping" in any mapping, not only in an unicode string. --- 55e2cb497bc35c6bf31673dce4d1ec4de0cb87d0 diff --cc Objects/unicodeobject.c index 65393d2efa,044b26e7fc..bd0dbb47ad --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@@ -7409,31 -7530,46 +7412,29 @@@ Error Py_DECREF(x); goto onError; } - if (unicode_putchar(&v, &outpos, value) < 0) + + if (_PyUnicodeWriter_Prepare(&writer, 1, value) == -1) goto onError; + PyUnicode_WRITE(writer.kind, writer.data, writer.pos, value); + writer.pos++; } - else if (x == Py_None) { - /* undefined mapping */ - startinpos = s-starts; - endinpos = startinpos+1; - if (unicode_decode_call_errorhandler_writer( - errors, &errorHandler, - "charmap", "character maps to ", - &starts, &e, &startinpos, &endinpos, &exc, &s, - &writer)) { - Py_DECREF(x); - goto onError; - } - Py_DECREF(x); - continue; - } else if (PyUnicode_Check(x)) { - writer.overallocate = 1; - if (_PyUnicodeWriter_WriteStr(&writer, x) == -1) - Py_ssize_t targetsize; - + if (PyUnicode_READY(x) == -1) goto onError; - targetsize = PyUnicode_GET_LENGTH(x); - - if (targetsize == 1) { - /* 1-1 mapping */ ++ if (PyUnicode_GET_LENGTH(x) == 1) { + Py_UCS4 value = PyUnicode_READ_CHAR(x, 0); + if (value == 0xFFFE) + goto Undefined; - if (unicode_putchar(&v, &outpos, value) < 0) ++ if (_PyUnicodeWriter_Prepare(&writer, 1, value) == -1) + goto onError; ++ PyUnicode_WRITE(writer.kind, writer.data, writer.pos, value); ++ writer.pos++; + } - else if (targetsize > 1) { - /* 1-n mapping */ - if (targetsize > extrachars) { - /* resize first */ - Py_ssize_t needed = (targetsize - extrachars) + \ - (targetsize << 2); - extrachars += needed; - /* XXX overflow detection missing */ - if (unicode_resize(&v, - PyUnicode_GET_LENGTH(v) + needed) < 0) - { - Py_DECREF(x); - goto onError; - } - } - if (unicode_widen(&v, outpos, PyUnicode_MAX_CHAR_VALUE(x)) < 0) ++ else { ++ writer.overallocate = 1; ++ if (_PyUnicodeWriter_WriteStr(&writer, x) == -1) + goto onError; - PyUnicode_CopyCharacters(v, outpos, x, 0, targetsize); - outpos += targetsize; - extrachars -= targetsize; + } - /* 1-0 mapping: skip the character */ } else { /* wrong return value */ @@@ -7444,11 -7580,26 +7445,24 @@@ } Py_DECREF(x); ++s; + continue; + Undefined: + /* undefined mapping */ + Py_XDECREF(x); + startinpos = s-starts; + endinpos = startinpos+1; - if (unicode_decode_call_errorhandler( ++ if (unicode_decode_call_errorhandler_writer( + errors, &errorHandler, + "charmap", "character maps to ", + &starts, &e, &startinpos, &endinpos, &exc, &s, - &v, &outpos)) { ++ &writer)) { + goto onError; + } } } - if (unicode_resize(&v, outpos) < 0) - goto onError; Py_XDECREF(errorHandler); Py_XDECREF(exc); - return unicode_result(v); + return _PyUnicodeWriter_Finish(&writer); onError: Py_XDECREF(errorHandler);