From: Serhiy Storchaka Date: Mon, 18 May 2015 13:10:40 +0000 (+0300) Subject: Issue #24102: Fixed exception type checking in standard error handlers. X-Git-Tag: v3.5.0b1~84 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=c0937f79ec12fb46938416004fd1fd002ae75a12;p=python Issue #24102: Fixed exception type checking in standard error handlers. --- c0937f79ec12fb46938416004fd1fd002ae75a12 diff --cc Lib/test/test_codeccallbacks.py index 4cfb88e99b,1327f112a2..ee1e28a763 --- a/Lib/test/test_codeccallbacks.py +++ b/Lib/test/test_codeccallbacks.py @@@ -1046,6 -961,29 +1046,30 @@@ class CodecCallbackTest(unittest.TestCa with self.assertRaises(TypeError): data.decode(encoding, "test.replacing") + def test_fake_error_class(self): + handlers = [ + codecs.strict_errors, + codecs.ignore_errors, + codecs.replace_errors, + codecs.backslashreplace_errors, ++ codecs.namereplace_errors, + codecs.xmlcharrefreplace_errors, + codecs.lookup_error('surrogateescape'), + codecs.lookup_error('surrogatepass'), + ] + for cls in UnicodeEncodeError, UnicodeDecodeError, UnicodeTranslateError: + class FakeUnicodeError(str): + __class__ = cls + for handler in handlers: + with self.subTest(handler=handler, error_class=cls): + self.assertRaises(TypeError, handler, FakeUnicodeError()) + class FakeUnicodeError(Exception): + __class__ = cls + for handler in handlers: + with self.subTest(handler=handler, error_class=cls): + with self.assertRaises((TypeError, FakeUnicodeError)): + handler(FakeUnicodeError()) + if __name__ == "__main__": unittest.main() diff --cc Misc/NEWS index b51a9f2087,a41bef055e..141d2e0a54 --- a/Misc/NEWS +++ b/Misc/NEWS @@@ -10,14 -10,8 +10,16 @@@ Release date: 2015-05-2 Core and Builtins ----------------- + - Issue #24102: Fixed exception type checking in standard error handlers. + +- Issue #15027: The UTF-32 encoder is now 3x to 7x faster. + +- Issue #23290: Optimize set_merge() for cases where the target is empty. + (Contributed by Serhiy Storchaka.) + +- Issue #20274: When calling a _sqlite.Connection, it now complains if passed + any keyword arguments. Previously it silently ignored them. + - Issue #20274: Remove ignored and erroneous "kwargs" parameters from three METH_VARARGS methods on _sqlite.Connection. diff --cc Python/codecs.c index 64fc3d6331,74445b03dc..38b0c2c33d --- a/Python/codecs.c +++ b/Python/codecs.c @@@ -863,121 -854,8 +855,121 @@@ PyObject *PyCodec_XMLCharRefReplaceErro } PyObject *PyCodec_BackslashReplaceErrors(PyObject *exc) +{ + PyObject *object; + Py_ssize_t i; + Py_ssize_t start; + Py_ssize_t end; + PyObject *res; + unsigned char *outp; + int ressize; + Py_UCS4 c; + - if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) { ++ if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeDecodeError)) { + unsigned char *p; + if (PyUnicodeDecodeError_GetStart(exc, &start)) + return NULL; + if (PyUnicodeDecodeError_GetEnd(exc, &end)) + return NULL; + if (!(object = PyUnicodeDecodeError_GetObject(exc))) + return NULL; + if (!(p = (unsigned char*)PyBytes_AsString(object))) { + Py_DECREF(object); + return NULL; + } + res = PyUnicode_New(4 * (end - start), 127); + if (res == NULL) { + Py_DECREF(object); + return NULL; + } + outp = PyUnicode_1BYTE_DATA(res); + for (i = start; i < end; i++, outp += 4) { + unsigned char c = p[i]; + outp[0] = '\\'; + outp[1] = 'x'; + outp[2] = Py_hexdigits[(c>>4)&0xf]; + outp[3] = Py_hexdigits[c&0xf]; + } + + assert(_PyUnicode_CheckConsistency(res, 1)); + Py_DECREF(object); + return Py_BuildValue("(Nn)", res, end); + } - if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { ++ if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) { + if (PyUnicodeEncodeError_GetStart(exc, &start)) + return NULL; + if (PyUnicodeEncodeError_GetEnd(exc, &end)) + return NULL; + if (!(object = PyUnicodeEncodeError_GetObject(exc))) + return NULL; + } - else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) { ++ else if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeTranslateError)) { + if (PyUnicodeTranslateError_GetStart(exc, &start)) + return NULL; + if (PyUnicodeTranslateError_GetEnd(exc, &end)) + return NULL; + if (!(object = PyUnicodeTranslateError_GetObject(exc))) + return NULL; + } + else { + wrong_exception_type(exc); + return NULL; + } + + if (end - start > PY_SSIZE_T_MAX / (1+1+8)) + end = start + PY_SSIZE_T_MAX / (1+1+8); + for (i = start, ressize = 0; i < end; ++i) { + /* object is guaranteed to be "ready" */ + c = PyUnicode_READ_CHAR(object, i); + if (c >= 0x10000) { + ressize += 1+1+8; + } + else if (c >= 0x100) { + ressize += 1+1+4; + } + else + ressize += 1+1+2; + } + res = PyUnicode_New(ressize, 127); + if (res == NULL) { + Py_DECREF(object); + return NULL; + } + outp = PyUnicode_1BYTE_DATA(res); + for (i = start; i < end; ++i) { + c = PyUnicode_READ_CHAR(object, i); + *outp++ = '\\'; + if (c >= 0x00010000) { + *outp++ = 'U'; + *outp++ = Py_hexdigits[(c>>28)&0xf]; + *outp++ = Py_hexdigits[(c>>24)&0xf]; + *outp++ = Py_hexdigits[(c>>20)&0xf]; + *outp++ = Py_hexdigits[(c>>16)&0xf]; + *outp++ = Py_hexdigits[(c>>12)&0xf]; + *outp++ = Py_hexdigits[(c>>8)&0xf]; + } + else if (c >= 0x100) { + *outp++ = 'u'; + *outp++ = Py_hexdigits[(c>>12)&0xf]; + *outp++ = Py_hexdigits[(c>>8)&0xf]; + } + else + *outp++ = 'x'; + *outp++ = Py_hexdigits[(c>>4)&0xf]; + *outp++ = Py_hexdigits[c&0xf]; + } + + assert(_PyUnicode_CheckConsistency(res, 1)); + Py_DECREF(object); + return Py_BuildValue("(Nn)", res, end); +} + +static _PyUnicode_Name_CAPI *ucnhash_CAPI = NULL; +static int ucnhash_initialized = 0; + +PyObject *PyCodec_NameReplaceErrors(PyObject *exc) { - if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) { + if (PyObject_TypeCheck(exc, (PyTypeObject *)PyExc_UnicodeEncodeError)) { PyObject *restuple; PyObject *object; Py_ssize_t i;