Issue #8923: cache str.encode() result

author Victor Stinner <victor.stinner@haypocalc.com>

Wed, 2 Mar 2011 01:03:14 +0000 (01:03 +0000)

committer Victor Stinner <victor.stinner@haypocalc.com>

Wed, 2 Mar 2011 01:03:14 +0000 (01:03 +0000)
author Victor Stinner <victor.stinner@haypocalc.com>
Wed, 2 Mar 2011 01:03:14 +0000 (01:03 +0000)
committer Victor Stinner <victor.stinner@haypocalc.com>
Wed, 2 Mar 2011 01:03:14 +0000 (01:03 +0000)
diff --git a/Misc/NEWS b/Misc/NEWS

index b87fb127a5ef60930a822b8a7f11c40c043e4eae..31d7c4c2a999c31371afa7a400eb9f4416cf141c 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,11 @@ What's New in Python 3.3 Alpha 1?
  Core and Builtins
  -----------------
  
+- Issue #8923: When a string is encoded to UTF-8 in strict mode, the result is
+  cached into the object. Examples: str.encode(), str.encode('utf-8'),
+  PyUnicode_AsUTF8String() and PyUnicode_AsEncodedString(unicode, "utf-8",
+  NULL).
+
  - Issue #10831: PyUnicode_FromFormat() supports %li, %lli and %zi formats.
  
  - Issue #10829: Refactor PyUnicode_FromFormat(), use the same function to parse
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index e4539cd46a8d6f090bee4dc3f79ada5ec1820b72..68012597ca1505b9410113d80093d110b686822c 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -1710,17 +1710,21 @@ PyUnicode_AsEncodedString(PyObject *unicode,
      }
  
      if (encoding == NULL)
-        return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
-                                    PyUnicode_GET_SIZE(unicode),
-                                    errors);
+        return PyUnicode_AsUTF8String(unicode);
  
      /* Shortcuts for common default encodings */
      if (normalize_encoding(encoding, lower, sizeof(lower))) {
          if ((strcmp(lower, "utf-8") == 0) ||
              (strcmp(lower, "utf8") == 0))
-            return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
-                                        PyUnicode_GET_SIZE(unicode),
-                                        errors);
+        {
+            if (errors == NULL || strcmp(errors, "strict") == 0) {
+                return PyUnicode_AsUTF8String(unicode);
+            } else {
+                return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
+                                            PyUnicode_GET_SIZE(unicode),
+                                            errors);
+            }
+        }
          else if ((strcmp(lower, "latin-1") == 0) ||
                   (strcmp(lower, "latin1") == 0) ||
                   (strcmp(lower, "iso-8859-1") == 0))
@@ -3077,13 +3081,16 @@ PyUnicode_EncodeUTF8(const Py_UNICODE *s,
  PyObject *
  PyUnicode_AsUTF8String(PyObject *unicode)
  {
+    PyObject *utf8;
      if (!PyUnicode_Check(unicode)) {
          PyErr_BadArgument();
          return NULL;
      }
-    return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
-                                PyUnicode_GET_SIZE(unicode),
-                                NULL);
+    utf8 = _PyUnicode_AsDefaultEncodedString(unicode);
+    if (utf8 == NULL)
+        return NULL;
+    Py_INCREF(utf8);
+    return utf8;
  }
  
  /* --- UTF-32 Codec ------------------------------------------------------- */
author	Victor Stinner <victor.stinner@haypocalc.com>
	Wed, 2 Mar 2011 01:03:14 +0000 (01:03 +0000)
committer	Victor Stinner <victor.stinner@haypocalc.com>
	Wed, 2 Mar 2011 01:03:14 +0000 (01:03 +0000)
Misc/NEWS		patch \| blob \| history
Objects/unicodeobject.c		patch \| blob \| history