]> granicus.if.org Git - python/commitdiff
bpo-31993: Do not create frames for large bytes and str objects (#5114)
authorSerhiy Storchaka <storchaka@gmail.com>
Thu, 11 Jan 2018 11:03:20 +0000 (13:03 +0200)
committerGitHub <noreply@github.com>
Thu, 11 Jan 2018 11:03:20 +0000 (13:03 +0200)
when serialize into memory buffer with C pickle implementations.

This optimization already is performed when serialize into memory
with Python pickle implementations or into a file with both
implementations.

Lib/test/pickletester.py
Lib/test/test_pickle.py
Modules/_pickle.c

index 5d983eb617fe3faac020f85fd012412882a2af94..f4e3f81249c86ed27d0dc23c00f80a9afc9c849d 100644 (file)
@@ -2097,20 +2097,21 @@ class AbstractPickleTests(unittest.TestCase):
         N = 1024 * 1024
         obj = [b'x' * N, b'y' * N, 'z' * N]
         for proto in range(4, pickle.HIGHEST_PROTOCOL + 1):
-            for fast in [True, False]:
+            for fast in [False, True]:
                 with self.subTest(proto=proto, fast=fast):
-                    if hasattr(self, 'pickler'):
+                    if not fast:
+                        # fast=False by default.
+                        # This covers in-memory pickling with pickle.dumps().
+                        pickled = self.dumps(obj, proto)
+                    else:
+                        # Pickler is required when fast=True.
+                        if not hasattr(self, 'pickler'):
+                            continue
                         buf = io.BytesIO()
                         pickler = self.pickler(buf, protocol=proto)
                         pickler.fast = fast
                         pickler.dump(obj)
                         pickled = buf.getvalue()
-                    elif fast:
-                        continue
-                    else:
-                        # Fallback to self.dumps when fast=False and
-                        # self.pickler is not available.
-                        pickled = self.dumps(obj, proto)
                     unpickled = self.loads(pickled)
                     # More informative error message in case of failure.
                     self.assertEqual([len(x) for x in obj],
index 895ed48df1d8d20c2ac6b1c3f8f68e994143c483..0051a0118a3f218d08ad7226c8a6c8482d57d339 100644 (file)
@@ -71,8 +71,6 @@ class PyPicklerTests(AbstractPickleTests):
 class InMemoryPickleTests(AbstractPickleTests, AbstractUnpickleTests,
                           BigmemPickleTests):
 
-    pickler = pickle._Pickler
-    unpickler = pickle._Unpickler
     bad_stack_errors = (pickle.UnpicklingError, IndexError)
     truncated_errors = (pickle.UnpicklingError, EOFError,
                         AttributeError, ValueError,
@@ -84,6 +82,8 @@ class InMemoryPickleTests(AbstractPickleTests, AbstractUnpickleTests,
     def loads(self, buf, **kwds):
         return pickle.loads(buf, **kwds)
 
+    test_framed_write_sizes_with_delayed_writer = None
+
 
 class PersistentPicklerUnpicklerMixin(object):
 
index 5cb1fba6cc2918bda211c764404ee1d2ebadc443..5b3de4d96dda02d31e38de7a907ae0463a73c23f 100644 (file)
@@ -2142,47 +2142,74 @@ done:
     return 0;
 }
 
-/* No-copy code-path to write large contiguous data directly into the
-   underlying file object, bypassing the output_buffer of the Pickler. */
-static int
-_Pickler_write_large_bytes(
-    PicklerObject *self, const char *header, Py_ssize_t header_size,
-    PyObject *payload)
-{
-    assert(self->output_buffer != NULL);
-    assert(self->write != NULL);
-    PyObject *result;
+/* Perform direct write of the header and payload of the binary object.
 
-    /* Commit the previous frame. */
-    if (_Pickler_CommitFrame(self)) {
-        return -1;
+   The large contiguous data is written directly into the underlying file
+   object, bypassing the output_buffer of the Pickler.  We intentionally
+   do not insert a protocol 4 frame opcode to make it possible to optimize
+   file.read calls in the loader.
+ */
+static int
+_Pickler_write_bytes(PicklerObject *self,
+                     const char *header, Py_ssize_t header_size,
+                     const char *data, Py_ssize_t data_size,
+                     PyObject *payload)
+{
+    int bypass_buffer = (data_size >= FRAME_SIZE_TARGET);
+    int framing = self->framing;
+
+    if (bypass_buffer) {
+        assert(self->output_buffer != NULL);
+        /* Commit the previous frame. */
+        if (_Pickler_CommitFrame(self)) {
+            return -1;
+        }
+        /* Disable framing temporarily */
+        self->framing = 0;
     }
-    /* Disable frameing temporarily */
-    self->framing = 0;
 
     if (_Pickler_Write(self, header, header_size) < 0) {
         return -1;
     }
-    /* Dump the output buffer to the file. */
-    if (_Pickler_FlushToFile(self) < 0) {
-        return -1;
-    }
 
-    /* Stream write the payload into the file without going through the
-       output buffer. */
-    result = PyObject_CallFunctionObjArgs(self->write, payload, NULL);
-    if (result == NULL) {
-        return -1;
-    }
-    Py_DECREF(result);
+    if (bypass_buffer && self->write != NULL) {
+        /* Bypass the in-memory buffer to directly stream large data
+           into the underlying file object. */
+        PyObject *result, *mem = NULL;
+        /* Dump the output buffer to the file. */
+        if (_Pickler_FlushToFile(self) < 0) {
+            return -1;
+        }
 
-    /* Reinitialize the buffer for subsequent calls to _Pickler_Write. */
-    if (_Pickler_ClearBuffer(self) < 0) {
-        return -1;
+        /* Stream write the payload into the file without going through the
+           output buffer. */
+        if (payload == NULL) {
+            payload = mem = PyMemoryView_FromMemory((char *) data, data_size,
+                                                    PyBUF_READ);
+            if (payload == NULL) {
+                return -1;
+            }
+        }
+        result = PyObject_CallFunctionObjArgs(self->write, payload, NULL);
+        Py_XDECREF(mem);
+        if (result == NULL) {
+            return -1;
+        }
+        Py_DECREF(result);
+
+        /* Reinitialize the buffer for subsequent calls to _Pickler_Write. */
+        if (_Pickler_ClearBuffer(self) < 0) {
+            return -1;
+        }
+    }
+    else {
+        if (_Pickler_Write(self, data, data_size) < 0) {
+            return -1;
+        }
     }
 
     /* Re-enable framing for subsequent calls to _Pickler_Write. */
-    self->framing = 1;
+    self->framing = framing;
 
     return 0;
 }
@@ -2265,20 +2292,10 @@ save_bytes(PicklerObject *self, PyObject *obj)
             return -1;          /* string too large */
         }
 
-        if (size < FRAME_SIZE_TARGET || self->write == NULL) {
-            if (_Pickler_Write(self, header, len) < 0) {
-                return -1;
-            }
-            if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0) {
-                return -1;
-            }
-        }
-        else {
-            /* Bypass the in-memory buffer to directly stream large data
-               into the underlying file object. */
-            if (_Pickler_write_large_bytes(self, header, len, obj) < 0) {
-                return -1;
-            }
+        if (_Pickler_write_bytes(self, header, len,
+                                 PyBytes_AS_STRING(obj), size, obj) < 0)
+        {
+            return -1;
         }
 
         if (memo_put(self, obj) < 0)
@@ -2360,11 +2377,29 @@ error:
 }
 
 static int
-write_utf8(PicklerObject *self, const char *data, Py_ssize_t size)
+write_unicode_binary(PicklerObject *self, PyObject *obj)
 {
     char header[9];
     Py_ssize_t len;
-    PyObject *mem;
+    PyObject *encoded = NULL;
+    Py_ssize_t size;
+    const char *data;
+
+    if (PyUnicode_READY(obj))
+        return -1;
+
+    data = PyUnicode_AsUTF8AndSize(obj, &size);
+    if (data == NULL) {
+        /* Issue #8383: for strings with lone surrogates, fallback on the
+           "surrogatepass" error handler. */
+        PyErr_Clear();
+        encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
+        if (encoded == NULL)
+            return -1;
+
+        data = PyBytes_AS_STRING(encoded);
+        size = PyBytes_GET_SIZE(encoded);
+    }
 
     assert(size >= 0);
     if (size <= 0xff && self->proto >= 4) {
@@ -2388,61 +2423,18 @@ write_utf8(PicklerObject *self, const char *data, Py_ssize_t size)
     else {
         PyErr_SetString(PyExc_OverflowError,
                         "cannot serialize a string larger than 4GiB");
+        Py_XDECREF(encoded);
         return -1;
     }
 
-    if (size < FRAME_SIZE_TARGET || self->write == NULL) {
-        if (_Pickler_Write(self, header, len) < 0) {
-            return -1;
-        }
-        if (_Pickler_Write(self, data, size) < 0) {
-            return -1;
-        }
-    }
-    else {
-        /* Bypass the in-memory buffer to directly stream large data
-           into the underlying file object. */
-        mem = PyMemoryView_FromMemory((char *) data, size, PyBUF_READ);
-        if (mem == NULL) {
-            return -1;
-        }
-        if (_Pickler_write_large_bytes(self, header, len, mem) < 0) {
-            Py_DECREF(mem);
-            return -1;
-        }
-        Py_DECREF(mem);
+    if (_Pickler_write_bytes(self, header, len, data, size, encoded) < 0) {
+        Py_XDECREF(encoded);
+        return -1;
     }
+    Py_XDECREF(encoded);
     return 0;
 }
 
-static int
-write_unicode_binary(PicklerObject *self, PyObject *obj)
-{
-    PyObject *encoded = NULL;
-    Py_ssize_t size;
-    const char *data;
-    int r;
-
-    if (PyUnicode_READY(obj))
-        return -1;
-
-    data = PyUnicode_AsUTF8AndSize(obj, &size);
-    if (data != NULL)
-        return write_utf8(self, data, size);
-
-    /* Issue #8383: for strings with lone surrogates, fallback on the
-       "surrogatepass" error handler. */
-    PyErr_Clear();
-    encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
-    if (encoded == NULL)
-        return -1;
-
-    r = write_utf8(self, PyBytes_AS_STRING(encoded),
-                   PyBytes_GET_SIZE(encoded));
-    Py_DECREF(encoded);
-    return r;
-}
-
 static int
 save_unicode(PicklerObject *self, PyObject *obj)
 {