]> granicus.if.org Git - python/commitdiff
Implemented batching for dicts in cPickle. This is after two failed
authorTim Peters <tim.peters@gmail.com>
Tue, 11 Feb 2003 22:43:24 +0000 (22:43 +0000)
committerTim Peters <tim.peters@gmail.com>
Tue, 11 Feb 2003 22:43:24 +0000 (22:43 +0000)
attempts to merge the C list-batch and dict-batch code -- they worked, but
it was a godawful mess to read.

Lib/pickle.py
Lib/test/pickletester.py
Modules/cPickle.c

index 0173c1f83a7e2e5c2d1bc49a27c01b74fb6250fb..00f5834beaccf4d1d793ceb51bc97bf1bb16e220 100644 (file)
@@ -612,7 +612,8 @@ class Pickler:
 
     dispatch[ListType] = save_list
 
-    # Keep in synch with cPickle's BATCHSIZE.
+    # Keep in synch with cPickle's BATCHSIZE.  Nothing will break if it gets
+    # out of synch, though.
     _BATCHSIZE = 1000
 
     def _batch_appends(self, items):
index 6ed29b1b42bda5f0bb785c2f7234d5015d1af0db..734f2a30c0b2ded73b3eb8dc06481b5a8e7320bc 100644 (file)
@@ -694,23 +694,6 @@ class AbstractPickleTests(unittest.TestCase):
             else:
                 self.failUnless(num_appends >= 2)
 
-# XXX Temporary hack, so long as the C implementation of pickle protocol
-# XXX 2 isn't ready.  When it is, move the methods in TempAbstractPickleTests
-# XXX into AbstractPickleTests above, and get rid of TempAbstractPickleTests
-# XXX along with the references to it in test_pickle.py.
-class TempAbstractPickleTests(unittest.TestCase):
-
-    def test_newobj_list_slots(self):
-        x = SlotList([1, 2, 3])
-        x.foo = 42
-        x.bar = "hello"
-        s = self.dumps(x, 2)
-        y = self.loads(s)
-        self.assertEqual(list(x), list(y))
-        self.assertEqual(x.__dict__, y.__dict__)
-        self.assertEqual(x.foo, y.foo)
-        self.assertEqual(x.bar, y.bar)
-
     def test_dict_chunking(self):
         n = 10  # too small to chunk
         x = dict.fromkeys(range(n))
@@ -733,6 +716,23 @@ class TempAbstractPickleTests(unittest.TestCase):
             else:
                 self.failUnless(num_setitems >= 2)
 
+# XXX Temporary hack, so long as the C implementation of pickle protocol
+# XXX 2 isn't ready.  When it is, move the methods in TempAbstractPickleTests
+# XXX into AbstractPickleTests above, and get rid of TempAbstractPickleTests
+# XXX along with the references to it in test_pickle.py.
+class TempAbstractPickleTests(unittest.TestCase):
+
+    def test_newobj_list_slots(self):
+        x = SlotList([1, 2, 3])
+        x.foo = 42
+        x.bar = "hello"
+        s = self.dumps(x, 2)
+        y = self.loads(s)
+        self.assertEqual(list(x), list(y))
+        self.assertEqual(x.__dict__, y.__dict__)
+        self.assertEqual(x.foo, y.foo)
+        self.assertEqual(x.bar, y.bar)
+
 class MyInt(int):
     sample = 1
 
index a35905d78856f237be1e4b22eb2d8fd6ca074dd2..6af4afd3a7de46423666b49b2ef808ed4a8171ce 100644 (file)
@@ -88,7 +88,9 @@ PyDoc_STRVAR(cPickle_module_documentation,
 #define FALSE       "I00\n"
 
 /* Keep in synch with pickle.Pickler._BATCHSIZE.  This is how many elements
- * batch_{list, dict} pump out before doing APPENDS/SETITEMS.
+ * batch_list/dict() pumps out before doing APPENDS/SETITEMS.  Nothing will
+ * break if this gets out of synch with pickle.py, but it's unclear that
+ * would help anything either.
  */
 #define BATCHSIZE 1000
 
@@ -1709,7 +1711,6 @@ save_list(Picklerobject *self, PyObject *args)
        int len;
        PyObject *iter;
 
-
        if (self->fast && !fast_save_enter(self, args))
                goto finally;
 
@@ -1756,18 +1757,123 @@ save_list(Picklerobject *self, PyObject *args)
 }
 
 
+/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
+ *     MARK key value ... key value SETITEMS
+ * opcode sequences.  Calling code should have arranged to first create an
+ * empty dict, or dict-like object, for the SETITEMS to operate on.
+ * Returns 0 on success, <0 on error.
+ *
+ * This is very much like batch_list().  The difference between saving
+ * elements directly, and picking apart two-tuples, is so long-winded at
+ * the C level, though, that attempts to combine these routines were too
+ * ugly to bear.
+ */
+static int
+batch_dict(Picklerobject *self, PyObject *iter)
+{
+       PyObject *p;
+       PyObject *slice[BATCHSIZE];
+       int i, n;
+
+       static char setitem = SETITEM;
+       static char setitems = SETITEMS;
+
+       assert(iter != NULL);
+
+       if (self->proto == 0) {
+               /* SETITEMS isn't available; do one at a time. */
+               for (;;) {
+                       p = PyIter_Next(iter);
+                       if (p == NULL) {
+                               if (PyErr_Occurred())
+                                       return -1;
+                               break;
+                       }
+                       if (!PyTuple_Check(p) || PyTuple_Size(p) != 2) {
+                               PyErr_SetString(PyExc_TypeError, "dict items "
+                                       "iterator must return 2-tuples");
+                               return -1;
+                       }
+                       i = save(self, PyTuple_GET_ITEM(p, 0), 0);
+                       if (i >= 0)
+                               i = save(self, PyTuple_GET_ITEM(p, 1), 0);
+                       Py_DECREF(p);
+                       if (i < 0)
+                               return -1;
+                       if (self->write_func(self, &setitem, 1) < 0)
+                               return -1;
+
+               }
+               return 0;
+       }
+
+       /* proto > 0:  write in batches of BATCHSIZE. */
+       do {
+               /* Get next group of (no more than) BATCHSIZE elements. */
+               for (n = 0; n < BATCHSIZE; ++n) {
+                       p = PyIter_Next(iter);
+                       if (p == NULL) {
+                               if (PyErr_Occurred())
+                                       goto BatchFailed;
+                               break;
+                       }
+                       if (!PyTuple_Check(p) || PyTuple_Size(p) != 2) {
+                               PyErr_SetString(PyExc_TypeError, "dict items "
+                                       "iterator must return 2-tuples");
+                               goto BatchFailed;
+                       }
+                       slice[n] = p;
+               }
+
+               if (n > 1) {
+                       /* Pump out MARK, slice[0:n], SETITEMS. */
+                       if (self->write_func(self, &MARKv, 1) < 0)
+                               goto BatchFailed;
+                       for (i = 0; i < n; ++i) {
+                               p = slice[i];
+                               if (save(self, PyTuple_GET_ITEM(p, 0), 0) < 0)
+                                       goto BatchFailed;
+                               if (save(self, PyTuple_GET_ITEM(p, 1), 0) < 0)
+                                       goto BatchFailed;
+                       }
+                       if (self->write_func(self, &setitems, 1) < 0)
+                               goto BatchFailed;
+               }
+               else if (n == 1) {
+                       p = slice[0];
+                       if (save(self, PyTuple_GET_ITEM(p, 0), 0) < 0)
+                               goto BatchFailed;
+                       if (save(self, PyTuple_GET_ITEM(p, 1), 0) < 0)
+                               goto BatchFailed;
+                       if (self->write_func(self, &setitem, 1) < 0)
+                               goto BatchFailed;
+               }
+
+               for (i = 0; i < n; ++i) {
+                       Py_DECREF(slice[i]);
+               }
+       }while (n == BATCHSIZE);
+       return 0;
+
+BatchFailed:
+       while (--n >= 0) {
+               Py_DECREF(slice[n]);
+       }
+       return -1;
+}
+
 static int
 save_dict(Picklerobject *self, PyObject *args)
 {
-       PyObject *key = 0, *value = 0;
-       int i, len, res = -1, using_setitems;
+       int res = -1;
        char s[3];
-
-       static char setitem = SETITEM, setitems = SETITEMS;
+       int len;
+       PyObject *iter;
 
        if (self->fast && !fast_save_enter(self, args))
                goto finally;
 
+       /* Create an empty dict. */
        if (self->bin) {
                s[0] = EMPTY_DICT;
                len = 1;
@@ -1781,6 +1887,7 @@ save_dict(Picklerobject *self, PyObject *args)
        if (self->write_func(self, s, len) < 0)
                goto finally;
 
+       /* Get dict size, and bow out early if empty. */
        if ((len = PyDict_Size(args)) < 0)
                goto finally;
 
@@ -1793,30 +1900,12 @@ save_dict(Picklerobject *self, PyObject *args)
                        goto finally;
        }
 
-       if ((using_setitems = (self->bin && (PyDict_Size(args) > 1))))
-               if (self->write_func(self, &MARKv, 1) < 0)
-                       goto finally;
-
-       i = 0;
-       while (PyDict_Next(args, &i, &key, &value)) {
-               if (save(self, key, 0) < 0)
-                       goto finally;
-
-               if (save(self, value, 0) < 0)
-                       goto finally;
-
-               if (!using_setitems) {
-                       if (self->write_func(self, &setitem, 1) < 0)
-                               goto finally;
-               }
-       }
-
-       if (using_setitems) {
-               if (self->write_func(self, &setitems, 1) < 0)
-                       goto finally;
-       }
-
-       res = 0;
+       /* Materialize the dict items. */
+       iter = PyObject_CallMethod(args, "iteritems", "()");
+       if (iter == NULL)
+               goto finally;
+       res = batch_dict(self, iter);
+       Py_DECREF(iter);
 
   finally:
        if (self->fast && !fast_save_leave(self, args))