]> granicus.if.org Git - python/commitdiff
Issue 5670: special-case pickling of dicts. This nearly doubles the performance of...
authorCollin Winter <collinw@gmail.com>
Mon, 25 May 2009 04:34:39 +0000 (04:34 +0000)
committerCollin Winter <collinw@gmail.com>
Mon, 25 May 2009 04:34:39 +0000 (04:34 +0000)
Modules/cPickle.c

index a0e443ed940e9aa208bd723958d77b7ece610972..fbb1888aaa9f3c6dc689fde29d4c8602a9a43895 100644 (file)
@@ -1860,13 +1860,74 @@ BatchFailed:
        return -1;
 }
 
+/* This is a variant of batch_dict() above that specializes for dicts, with no
+ * support for dict subclasses. Like batch_dict(), we batch up chunks of
+ *     MARK key value ... key value SETITEMS
+ * opcode sequences.  Calling code should have arranged to first create an
+ * empty dict, or dict-like object, for the SETITEMS to operate on.
+ * Returns 0 on success, -1 on error.
+ *
+ * Note that this currently doesn't work for protocol 0.
+ */
+static int
+batch_dict_exact(Picklerobject *self, PyObject *obj)
+{
+       PyObject *key = NULL, *value = NULL;
+       int i;
+       Py_ssize_t dict_size, ppos = 0;
+
+       static char setitem = SETITEM;
+       static char setitems = SETITEMS;
+
+       assert(obj != NULL);
+       assert(self->proto > 0);
+
+       dict_size = PyDict_Size(obj);
+
+       /* Special-case len(d) == 1 to save space. */
+       if (dict_size == 1) {
+               PyDict_Next(obj, &ppos, &key, &value);
+               if (save(self, key, 0) < 0)
+                       return -1;
+               if (save(self, value, 0) < 0)
+                       return -1;
+               if (self->write_func(self, &setitem, 1) < 0)
+                       return -1;
+               return 0;
+       }
+
+       /* Write in batches of BATCHSIZE. */
+       do {
+               i = 0;
+               if (self->write_func(self, &MARKv, 1) < 0)
+                       return -1;
+               while (PyDict_Next(obj, &ppos, &key, &value)) {
+                       if (save(self, key, 0) < 0)
+                               return -1;
+                       if (save(self, value, 0) < 0)
+                               return -1;
+                       if (++i == BATCHSIZE)
+                               break;
+               }
+               if (self->write_func(self, &setitems, 1) < 0)
+                       return -1;
+               if (PyDict_Size(obj) != dict_size) {
+                       PyErr_Format(
+                               PyExc_RuntimeError,
+                               "dictionary changed size during iteration");
+                       return -1;
+               }
+
+       } while (i == BATCHSIZE);
+       return 0;
+}
+
 static int
 save_dict(Picklerobject *self, PyObject *args)
 {
        int res = -1;
        char s[3];
        int len;
-       PyObject *iter;
 
        if (self->fast && !fast_save_enter(self, args))
                goto finally;
@@ -1898,15 +1959,23 @@ save_dict(Picklerobject *self, PyObject *args)
                goto finally;
 
        /* Materialize the dict items. */
-       iter = PyObject_CallMethod(args, "iteritems", "()");
-       if (iter == NULL)
-               goto finally;
-       if (Py_EnterRecursiveCall(" while pickling an object") == 0)
-       {
-               res = batch_dict(self, iter);
-               Py_LeaveRecursiveCall();
+       if (PyDict_CheckExact(args) && self->proto > 0) {
+               /* We can take certain shortcuts if we know this is a dict and
+                  not a dict subclass. */
+               if (Py_EnterRecursiveCall(" while pickling an object") == 0) {
+                       res = batch_dict_exact(self, args);
+                       Py_LeaveRecursiveCall();
+               }
+       } else {
+               PyObject *iter = PyObject_CallMethod(args, "iteritems", "()");
+               if (iter == NULL)
+                       goto finally;
+               if (Py_EnterRecursiveCall(" while pickling an object") == 0) {
+                       res = batch_dict(self, iter);
+                       Py_LeaveRecursiveCall();
+               }
+               Py_DECREF(iter);
        }
-       Py_DECREF(iter);
 
   finally:
        if (self->fast && !fast_save_leave(self, args))