]> granicus.if.org Git - python/commitdiff
Issue #15513: Added a __sizeof__ implementation for pickle classes.
authorSerhiy Storchaka <storchaka@gmail.com>
Tue, 16 Dec 2014 17:39:08 +0000 (19:39 +0200)
committerSerhiy Storchaka <storchaka@gmail.com>
Tue, 16 Dec 2014 17:39:08 +0000 (19:39 +0200)
Lib/test/test_pickle.py
Misc/NEWS
Modules/_pickle.c
Modules/clinic/_pickle.c.h

index 0b2fe1ef2aa365d0e9e6ea3ad2afac83746b9774..e1a88b6b5861dd15140cd5133b11c046dd0901c6 100644 (file)
@@ -1,7 +1,10 @@
 import pickle
 import io
 import collections
+import struct
+import sys
 
+import unittest
 from test import support
 
 from test.pickletester import AbstractPickleTests
@@ -138,6 +141,71 @@ if has_c_implementation:
         def get_dispatch_table(self):
             return collections.ChainMap({}, pickle.dispatch_table)
 
+    @support.cpython_only
+    class SizeofTests(unittest.TestCase):
+        check_sizeof = support.check_sizeof
+
+        def test_pickler(self):
+            basesize = support.calcobjsize('5P2n3i2n3iP')
+            p = _pickle.Pickler(io.BytesIO())
+            self.assertEqual(object.__sizeof__(p), basesize)
+            MT_size = struct.calcsize('3nP0n')
+            ME_size = struct.calcsize('Pn0P')
+            check = self.check_sizeof
+            check(p, basesize +
+                MT_size + 8 * ME_size +  # Minimal memo table size.
+                sys.getsizeof(b'x'*4096))  # Minimal write buffer size.
+            for i in range(6):
+                p.dump(chr(i))
+            check(p, basesize +
+                MT_size + 32 * ME_size +  # Size of memo table required to
+                                          # save references to 6 objects.
+                0)  # Write buffer is cleared after every dump().
+
+        def test_unpickler(self):
+            basesize = support.calcobjsize('2Pn2P 2P2n2i5P 2P3n6P2n2i')
+            unpickler = _pickle.Unpickler
+            P = struct.calcsize('P')  # Size of memo table entry.
+            n = struct.calcsize('n')  # Size of mark table entry.
+            check = self.check_sizeof
+            for encoding in 'ASCII', 'UTF-16', 'latin-1':
+                for errors in 'strict', 'replace':
+                    u = unpickler(io.BytesIO(),
+                                  encoding=encoding, errors=errors)
+                    self.assertEqual(object.__sizeof__(u), basesize)
+                    check(u, basesize +
+                             32 * P +  # Minimal memo table size.
+                             len(encoding) + 1 + len(errors) + 1)
+
+            stdsize = basesize + len('ASCII') + 1 + len('strict') + 1
+            def check_unpickler(data, memo_size, marks_size):
+                dump = pickle.dumps(data)
+                u = unpickler(io.BytesIO(dump),
+                              encoding='ASCII', errors='strict')
+                u.load()
+                check(u, stdsize + memo_size * P + marks_size * n)
+
+            check_unpickler(0, 32, 0)
+            # 20 is minimal non-empty mark stack size.
+            check_unpickler([0] * 100, 32, 20)
+            # 128 is memo table size required to save references to 100 objects.
+            check_unpickler([chr(i) for i in range(100)], 128, 20)
+            def recurse(deep):
+                data = 0
+                for i in range(deep):
+                    data = [data, data]
+                return data
+            check_unpickler(recurse(0), 32, 0)
+            check_unpickler(recurse(1), 32, 20)
+            check_unpickler(recurse(20), 32, 58)
+            check_unpickler(recurse(50), 64, 58)
+            check_unpickler(recurse(100), 128, 134)
+
+            u = unpickler(io.BytesIO(pickle.dumps('a', 0)),
+                          encoding='ASCII', errors='strict')
+            u.load()
+            check(u, stdsize + 32 * P + 2 + 1)
+
 
 def test_main():
     tests = [PickleTests, PyPicklerTests, PyPersPicklerTests,
@@ -148,7 +216,7 @@ def test_main():
                       PyPicklerUnpicklerObjectTests,
                       CPicklerUnpicklerObjectTests,
                       CDispatchTableTests, CChainDispatchTableTests,
-                      InMemoryPickleTests])
+                      InMemoryPickleTests, SizeofTests])
     support.run_unittest(*tests)
     support.run_doctest(pickle)
 
index ae37fd1fb1575f3712127a6ffa332f5807629057..928c3cf9b6c852bb63b44339607951751ad84a11 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -41,7 +41,9 @@ Core and Builtins
 Library
 -------
 
-- Issue #19858:  pickletools.optimize() now aware of the MEMOIZE opcode, can
+- Issue #15513: Added a __sizeof__ implementation for pickle classes.
+
+- Issue #19858: pickletools.optimize() now aware of the MEMOIZE opcode, can
   produce more compact result and no longer produces invalid output if input
   data contains MEMOIZE opcodes together with PUT or BINPUT opcodes.
 
index 7faf96dd93c21319d756d4cb16e57927d8e08d0f..10eb513149e803c57c8d1b3fc9b9c27af2dc69a4 100644 (file)
@@ -375,7 +375,7 @@ static PyTypeObject Pdata_Type = {
     PyVarObject_HEAD_INIT(NULL, 0)
     "_pickle.Pdata",              /*tp_name*/
     sizeof(Pdata),                /*tp_basicsize*/
-    0,                            /*tp_itemsize*/
+    sizeof(PyObject *),           /*tp_itemsize*/
     (destructor)Pdata_dealloc,    /*tp_dealloc*/
 };
 
@@ -3930,9 +3930,37 @@ _pickle_Pickler_dump(PicklerObject *self, PyObject *obj)
     Py_RETURN_NONE;
 }
 
+/*[clinic input]
+
+_pickle.Pickler.__sizeof__ -> Py_ssize_t
+
+Returns size in memory, in bytes.
+[clinic start generated code]*/
+
+static Py_ssize_t
+_pickle_Pickler___sizeof___impl(PicklerObject *self)
+/*[clinic end generated code: output=106edb3123f332e1 input=8cbbec9bd5540d42]*/
+{
+    Py_ssize_t res, s;
+
+    res = sizeof(PicklerObject);
+    if (self->memo != NULL) {
+        res += sizeof(PyMemoTable);
+        res += self->memo->mt_allocated * sizeof(PyMemoEntry);
+    }
+    if (self->output_buffer != NULL) {
+        s = _PySys_GetSizeOf(self->output_buffer);
+        if (s == -1)
+            return -1;
+        res += s;
+    }
+    return res;
+}
+
 static struct PyMethodDef Pickler_methods[] = {
     _PICKLE_PICKLER_DUMP_METHODDEF
     _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF
+    _PICKLE_PICKLER___SIZEOF___METHODDEF
     {NULL, NULL}                /* sentinel */
 };
 
@@ -6289,9 +6317,37 @@ _pickle_Unpickler_find_class_impl(UnpicklerObject *self, PyObject *module_name,
     return global;
 }
 
+/*[clinic input]
+
+_pickle.Unpickler.__sizeof__ -> Py_ssize_t
+
+Returns size in memory, in bytes.
+[clinic start generated code]*/
+
+static Py_ssize_t
+_pickle_Unpickler___sizeof___impl(UnpicklerObject *self)
+/*[clinic end generated code: output=119d9d03ad4c7651 input=13333471fdeedf5e]*/
+{
+    Py_ssize_t res;
+
+    res = sizeof(UnpicklerObject);
+    if (self->memo != NULL)
+        res += self->memo_size * sizeof(PyObject *);
+    if (self->marks != NULL)
+        res += self->marks_size * sizeof(Py_ssize_t);
+    if (self->input_line != NULL)
+        res += strlen(self->input_line) + 1;
+    if (self->encoding != NULL)
+        res += strlen(self->encoding) + 1;
+    if (self->errors != NULL)
+        res += strlen(self->errors) + 1;
+    return res;
+}
+
 static struct PyMethodDef Unpickler_methods[] = {
     _PICKLE_UNPICKLER_LOAD_METHODDEF
     _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF
+    _PICKLE_UNPICKLER___SIZEOF___METHODDEF
     {NULL, NULL}                /* sentinel */
 };
 
index a20a61e6f58f7b42d9bed9ddfd4e1998b29f0fa2..975298ce4717fc688c261dd927c09af480286f10 100644 (file)
@@ -34,6 +34,33 @@ PyDoc_STRVAR(_pickle_Pickler_dump__doc__,
 #define _PICKLE_PICKLER_DUMP_METHODDEF    \
     {"dump", (PyCFunction)_pickle_Pickler_dump, METH_O, _pickle_Pickler_dump__doc__},
 
+PyDoc_STRVAR(_pickle_Pickler___sizeof____doc__,
+"__sizeof__($self, /)\n"
+"--\n"
+"\n"
+"Returns size in memory, in bytes.");
+
+#define _PICKLE_PICKLER___SIZEOF___METHODDEF    \
+    {"__sizeof__", (PyCFunction)_pickle_Pickler___sizeof__, METH_NOARGS, _pickle_Pickler___sizeof____doc__},
+
+static Py_ssize_t
+_pickle_Pickler___sizeof___impl(PicklerObject *self);
+
+static PyObject *
+_pickle_Pickler___sizeof__(PicklerObject *self, PyObject *Py_UNUSED(ignored))
+{
+    PyObject *return_value = NULL;
+    Py_ssize_t _return_value;
+
+    _return_value = _pickle_Pickler___sizeof___impl(self);
+    if ((_return_value == -1) && PyErr_Occurred())
+        goto exit;
+    return_value = PyLong_FromSsize_t(_return_value);
+
+exit:
+    return return_value;
+}
+
 PyDoc_STRVAR(_pickle_Pickler___init____doc__,
 "Pickler(file, protocol=None, fix_imports=True)\n"
 "--\n"
@@ -191,6 +218,33 @@ exit:
     return return_value;
 }
 
+PyDoc_STRVAR(_pickle_Unpickler___sizeof____doc__,
+"__sizeof__($self, /)\n"
+"--\n"
+"\n"
+"Returns size in memory, in bytes.");
+
+#define _PICKLE_UNPICKLER___SIZEOF___METHODDEF    \
+    {"__sizeof__", (PyCFunction)_pickle_Unpickler___sizeof__, METH_NOARGS, _pickle_Unpickler___sizeof____doc__},
+
+static Py_ssize_t
+_pickle_Unpickler___sizeof___impl(UnpicklerObject *self);
+
+static PyObject *
+_pickle_Unpickler___sizeof__(UnpicklerObject *self, PyObject *Py_UNUSED(ignored))
+{
+    PyObject *return_value = NULL;
+    Py_ssize_t _return_value;
+
+    _return_value = _pickle_Unpickler___sizeof___impl(self);
+    if ((_return_value == -1) && PyErr_Occurred())
+        goto exit;
+    return_value = PyLong_FromSsize_t(_return_value);
+
+exit:
+    return return_value;
+}
+
 PyDoc_STRVAR(_pickle_Unpickler___init____doc__,
 "Unpickler(file, *, fix_imports=True, encoding=\'ASCII\', errors=\'strict\')\n"
 "--\n"
@@ -488,4 +542,4 @@ _pickle_loads(PyModuleDef *module, PyObject *args, PyObject *kwargs)
 exit:
     return return_value;
 }
-/*[clinic end generated code: output=f965b6c7018c898d input=a9049054013a1b77]*/
+/*[clinic end generated code: output=3aba79576e240c62 input=a9049054013a1b77]*/