From 372d705d958964289d762953d0a61622755f5386 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Sun, 28 Oct 2018 20:16:26 +0000 Subject: [PATCH] bpo-33234 Improve list() pre-sizing for inputs with known lengths (GH-9846) The list() constructor isn't taking full advantage of known input lengths or length hints. This commit makes the constructor pre-size and not over-allocate when the input size is known (the input collection implements __len__). One on the main advantages is that this provides 12% difference in memory savings due to the difference between overallocating and allocating exactly the input size. For efficiency purposes and to avoid a performance regression for small generators and collections, the size of the input object is calculated using __len__ and not __length_hint__, as the later is considerably slower. --- Lib/test/test_list.py | 9 +++++ .../2018-04-17-01-24-51.bpo-33234.l9IDtp.rst | 2 + Objects/listobject.c | 40 +++++++++++++++++++ 3 files changed, 51 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2018-04-17-01-24-51.bpo-33234.l9IDtp.rst diff --git a/Lib/test/test_list.py b/Lib/test/test_list.py index def4badbf5..c5002b1273 100644 --- a/Lib/test/test_list.py +++ b/Lib/test/test_list.py @@ -1,5 +1,6 @@ import sys from test import list_tests +from test.support import cpython_only import pickle import unittest @@ -157,5 +158,13 @@ class ListTest(list_tests.CommonTest): with self.assertRaises(TypeError): (3,) + L([1,2]) + @cpython_only + def test_preallocation(self): + iterable = [0] * 10 + iter_size = sys.getsizeof(iterable) + + self.assertEqual(iter_size, sys.getsizeof(list([0] * 10))) + self.assertEqual(iter_size, sys.getsizeof(list(range(10)))) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-04-17-01-24-51.bpo-33234.l9IDtp.rst b/Misc/NEWS.d/next/Core and Builtins/2018-04-17-01-24-51.bpo-33234.l9IDtp.rst new file mode 100644 index 0000000000..2f9cd62e8e --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2018-04-17-01-24-51.bpo-33234.l9IDtp.rst @@ -0,0 +1,2 @@ +The list constructor will pre-size and not over-allocate when +the input lenght is known. diff --git a/Objects/listobject.c b/Objects/listobject.c index fa26444f84..e85fa5c526 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -76,6 +76,33 @@ list_resize(PyListObject *self, Py_ssize_t newsize) return 0; } +static int +list_preallocate_exact(PyListObject *self, Py_ssize_t size) +{ + assert(self->ob_item == NULL); + + PyObject **items; + size_t allocated; + + allocated = (size_t)size; + if (allocated > (size_t)PY_SSIZE_T_MAX / sizeof(PyObject *)) { + PyErr_NoMemory(); + return -1; + } + + if (size == 0) { + allocated = 0; + } + items = (PyObject **)PyMem_New(PyObject*, allocated); + if (items == NULL) { + PyErr_NoMemory(); + return -1; + } + self->ob_item = items; + self->allocated = allocated; + return 0; +} + /* Debug statistic to compare allocations with reuse through the free list */ #undef SHOW_ALLOC_COUNT #ifdef SHOW_ALLOC_COUNT @@ -2683,6 +2710,19 @@ list___init___impl(PyListObject *self, PyObject *iterable) (void)_list_clear(self); } if (iterable != NULL) { + if (_PyObject_HasLen(iterable)) { + Py_ssize_t iter_len = PyObject_Size(iterable); + if (iter_len == -1) { + if (!PyErr_ExceptionMatches(PyExc_TypeError)) { + return -1; + } + PyErr_Clear(); + } + if (iter_len > 0 && self->ob_item == NULL + && list_preallocate_exact(self, iter_len)) { + return -1; + } + } PyObject *rv = list_extend(self, iterable); if (rv == NULL) return -1; -- 2.40.0