]> granicus.if.org Git - python/commitdiff
Issue #15596: Faster pickling of unicode strings.
authorAntoine Pitrou <solipsis@pitrou.net>
Sun, 7 Apr 2013 15:38:11 +0000 (17:38 +0200)
committerAntoine Pitrou <solipsis@pitrou.net>
Sun, 7 Apr 2013 15:38:11 +0000 (17:38 +0200)
Misc/NEWS
Modules/_pickle.c

index d1fc765131fb5d66bc08b131292a3b803be711da..4c8f2114c1e07ced20094ac697402915773588d4 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -307,6 +307,8 @@ Core and Builtins
 Library
 -------
 
+- Issue #15596: Faster pickling of unicode strings.
+
 - Issue #17572: Avoid chained exceptions while passing bad directives to
   time.strptime().  Initial patch by Claudiu Popa.
 
index c213a51582b9f07dc7331bd55d08edfd44cbb972..f0d3e7928ccaa03e73928da9a94194fbca3944c0 100644 (file)
@@ -1873,63 +1873,97 @@ done:
 }
 
 static int
-save_unicode(PicklerObject *self, PyObject *obj)
+write_utf8(PicklerObject *self, char *data, Py_ssize_t size)
 {
-    Py_ssize_t size;
-    PyObject *encoded = NULL;
+    char pdata[5];
 
-    if (self->bin) {
-        char pdata[5];
+#if SIZEOF_SIZE_T > 4
+    if (size > 0xffffffffUL) {
+        /* string too large */
+        PyErr_SetString(PyExc_OverflowError,
+                        "cannot serialize a string larger than 4GB");
+        return -1;
+    }
+#endif
 
-        encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
-        if (encoded == NULL)
-            goto error;
+    pdata[0] = BINUNICODE;
+    pdata[1] = (unsigned char)(size & 0xff);
+    pdata[2] = (unsigned char)((size >> 8) & 0xff);
+    pdata[3] = (unsigned char)((size >> 16) & 0xff);
+    pdata[4] = (unsigned char)((size >> 24) & 0xff);
 
-        size = PyBytes_GET_SIZE(encoded);
-        if (size > 0xffffffffL) {
-            PyErr_SetString(PyExc_OverflowError,
-                            "cannot serialize a string larger than 4 GiB");
-            goto error;          /* string too large */
-        }
+    if (_Pickler_Write(self, pdata, sizeof(pdata)) < 0)
+        return -1;
+
+    if (_Pickler_Write(self, data, size) < 0)
+        return -1;
 
-        pdata[0] = BINUNICODE;
-        pdata[1] = (unsigned char)(size & 0xff);
-        pdata[2] = (unsigned char)((size >> 8) & 0xff);
-        pdata[3] = (unsigned char)((size >> 16) & 0xff);
-        pdata[4] = (unsigned char)((size >> 24) & 0xff);
+    return 0;
+}
 
-        if (_Pickler_Write(self, pdata, 5) < 0)
-            goto error;
+static int
+write_unicode_binary(PicklerObject *self, PyObject *obj)
+{
+    PyObject *encoded = NULL;
+    Py_ssize_t size;
+    char *data;
+    int r;
 
-        if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
-            goto error;
+    if (PyUnicode_READY(obj))
+        return -1;
+
+    data = PyUnicode_AsUTF8AndSize(obj, &size);
+    if (data != NULL)
+        return write_utf8(self, data, size);
+
+    /* Issue #8383: for strings with lone surrogates, fallback on the
+       "surrogatepass" error handler. */
+    PyErr_Clear();
+    encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
+    if (encoded == NULL)
+        return -1;
+
+    r = write_utf8(self, PyBytes_AS_STRING(encoded),
+                   PyBytes_GET_SIZE(encoded));
+    Py_DECREF(encoded);
+    return r;
+}
+
+static int
+save_unicode(PicklerObject *self, PyObject *obj)
+{
+    if (self->bin) {
+        if (write_unicode_binary(self, obj) < 0)
+            return -1;
     }
     else {
+        PyObject *encoded;
+        Py_ssize_t size;
         const char unicode_op = UNICODE;
 
         encoded = raw_unicode_escape(obj);
         if (encoded == NULL)
-            goto error;
+            return -1;
 
-        if (_Pickler_Write(self, &unicode_op, 1) < 0)
-            goto error;
+        if (_Pickler_Write(self, &unicode_op, 1) < 0) {
+            Py_DECREF(encoded);
+            return -1;
+        }
 
         size = PyBytes_GET_SIZE(encoded);
-        if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
-            goto error;
+        if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
+            Py_DECREF(encoded);
+            return -1;
+        }
+        Py_DECREF(encoded);
 
         if (_Pickler_Write(self, "\n", 1) < 0)
-            goto error;
+            return -1;
     }
     if (memo_put(self, obj) < 0)
-        goto error;
+        return -1;
 
-    Py_DECREF(encoded);
     return 0;
-
-  error:
-    Py_XDECREF(encoded);
-    return -1;
 }
 
 /* A helper for save_tuple.  Push the len elements in tuple t on the stack. */