From: Victor Stinner Date: Wed, 2 Mar 2011 01:03:14 +0000 (+0000) Subject: Issue #8923: cache str.encode() result X-Git-Tag: v3.3.0a1~3012 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=a5c68c3cb7bc5068833742dc10a3cd5a19e69e12;p=python Issue #8923: cache str.encode() result When a string is encoded to UTF-8 in strict mode, the result is cached into the object. Examples: str.encode(), str.encode('utf-8'), PyUnicode_AsUTF8String() and PyUnicode_AsEncodedString(unicode, "utf-8", NULL). --- diff --git a/Misc/NEWS b/Misc/NEWS index b87fb127a5..31d7c4c2a9 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -10,6 +10,11 @@ What's New in Python 3.3 Alpha 1? Core and Builtins ----------------- +- Issue #8923: When a string is encoded to UTF-8 in strict mode, the result is + cached into the object. Examples: str.encode(), str.encode('utf-8'), + PyUnicode_AsUTF8String() and PyUnicode_AsEncodedString(unicode, "utf-8", + NULL). + - Issue #10831: PyUnicode_FromFormat() supports %li, %lli and %zi formats. - Issue #10829: Refactor PyUnicode_FromFormat(), use the same function to parse diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e4539cd46a..68012597ca 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1710,17 +1710,21 @@ PyUnicode_AsEncodedString(PyObject *unicode, } if (encoding == NULL) - return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), - PyUnicode_GET_SIZE(unicode), - errors); + return PyUnicode_AsUTF8String(unicode); /* Shortcuts for common default encodings */ if (normalize_encoding(encoding, lower, sizeof(lower))) { if ((strcmp(lower, "utf-8") == 0) || (strcmp(lower, "utf8") == 0)) - return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), - PyUnicode_GET_SIZE(unicode), - errors); + { + if (errors == NULL || strcmp(errors, "strict") == 0) { + return PyUnicode_AsUTF8String(unicode); + } else { + return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), + PyUnicode_GET_SIZE(unicode), + errors); + } + } else if ((strcmp(lower, "latin-1") == 0) || (strcmp(lower, "latin1") == 0) || (strcmp(lower, "iso-8859-1") == 0)) @@ -3077,13 +3081,16 @@ PyUnicode_EncodeUTF8(const Py_UNICODE *s, PyObject * PyUnicode_AsUTF8String(PyObject *unicode) { + PyObject *utf8; if (!PyUnicode_Check(unicode)) { PyErr_BadArgument(); return NULL; } - return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode), - PyUnicode_GET_SIZE(unicode), - NULL); + utf8 = _PyUnicode_AsDefaultEncodedString(unicode); + if (utf8 == NULL) + return NULL; + Py_INCREF(utf8); + return utf8; } /* --- UTF-32 Codec ------------------------------------------------------- */