]> granicus.if.org Git - python/commitdiff
Merged revisions 70546 via svnmerge from
authorAntoine Pitrou <solipsis@pitrou.net>
Mon, 23 Mar 2009 18:52:06 +0000 (18:52 +0000)
committerAntoine Pitrou <solipsis@pitrou.net>
Mon, 23 Mar 2009 18:52:06 +0000 (18:52 +0000)
svn+ssh://pythondev@svn.python.org/python/trunk

........
  r70546 | antoine.pitrou | 2009-03-23 19:41:45 +0100 (lun., 23 mars 2009) | 9 lines

  Issue #4688: Add a heuristic so that tuples and dicts containing only
  untrackable objects are not tracked by the garbage collector. This can
  reduce the size of collections and therefore the garbage collection overhead
  on long-running programs, depending on their particular use of datatypes.

  (trivia: this makes the "binary_trees" benchmark from the Computer Language
  Shootout 40% faster)
........

Doc/library/gc.rst
Include/dictobject.h
Include/objimpl.h
Include/tupleobject.h
Lib/test/test_dict.py
Lib/test/test_gc.py
Lib/test/test_tuple.py
Misc/NEWS
Modules/gcmodule.c
Objects/dictobject.c
Objects/tupleobject.c

index 7c425e3ec62a8d01bb41c13cb95edca4aee27724..6929a3ddd2ee046da8de6ea7105bbdefaae7c739 100644 (file)
@@ -129,6 +129,31 @@ The :mod:`gc` module provides the following functions:
    from an argument, that integer object may or may not appear in the result list.
 
 
+.. function:: is_tracked(obj)
+
+   Returns True if the object is currently tracked by the garbage collector,
+   False otherwise.  As a general rule, instances of atomic types aren't
+   tracked and instances of non-atomic types (containers, user-defined
+   objects...) are.  However, some type-specific optimizations can be present
+   in order to suppress the garbage collector footprint of simple instances
+   (e.g. dicts containing only atomic keys and values)::
+
+      >>> gc.is_tracked(0)
+      False
+      >>> gc.is_tracked("a")
+      False
+      >>> gc.is_tracked([])
+      True
+      >>> gc.is_tracked({})
+      False
+      >>> gc.is_tracked({"a": 1})
+      False
+      >>> gc.is_tracked({"a": []})
+      True
+
+   .. versionadded:: 2.7
+
+
 The following variable is provided for read-only access (you can mutate its
 value but should not rebind it):
 
index 3d9d5e125615941fe7063962bc6c785acf938b18..d8c409edab9eb141469a1a34c8f262a50e7e89a0 100644 (file)
@@ -125,6 +125,7 @@ PyAPI_FUNC(PyObject *) PyDict_Copy(PyObject *mp);
 PyAPI_FUNC(int) PyDict_Contains(PyObject *mp, PyObject *key);
 PyAPI_FUNC(int) _PyDict_Contains(PyObject *mp, PyObject *key, long hash);
 PyAPI_FUNC(PyObject *) _PyDict_NewPresized(Py_ssize_t minused);
+PyAPI_FUNC(void) _PyDict_MaybeUntrack(PyObject *mp);
 
 /* PyDict_Update(mp, other) is equivalent to PyDict_Merge(mp, other, 1). */
 PyAPI_FUNC(int) PyDict_Update(PyObject *mp, PyObject *other);
index 20d9c241a8c3066588f4407180c99ef78eac203f..5a273825375ef7c59acb96baed404e0e21789481 100644 (file)
@@ -282,6 +282,17 @@ extern PyGC_Head *_PyGC_generation0;
        g->gc.gc_next = NULL; \
     } while (0);
 
+/* True if the object is currently tracked by the GC. */
+#define _PyObject_GC_IS_TRACKED(o) \
+       ((_Py_AS_GC(o))->gc.gc_refs != _PyGC_REFS_UNTRACKED)
+/* True if the object may be tracked by the GC in the future, or already is.
+   This can be useful to implement some optimizations. */
+#define _PyObject_GC_MAY_BE_TRACKED(obj) \
+       (PyObject_IS_GC(obj) && \
+               (!PyTuple_CheckExact(obj) || _PyObject_GC_IS_TRACKED(obj)))
+
+
 PyAPI_FUNC(PyObject *) _PyObject_GC_Malloc(size_t);
 PyAPI_FUNC(PyObject *) _PyObject_GC_New(PyTypeObject *);
 PyAPI_FUNC(PyVarObject *) _PyObject_GC_NewVar(PyTypeObject *, Py_ssize_t);
index 7a887d1d2eb17f1287de608c3172404780d502c5..19fe7a53a5855d81f2ddce50b1d74180c0c3fabf 100644 (file)
@@ -45,6 +45,7 @@ PyAPI_FUNC(int) PyTuple_SetItem(PyObject *, Py_ssize_t, PyObject *);
 PyAPI_FUNC(PyObject *) PyTuple_GetSlice(PyObject *, Py_ssize_t, Py_ssize_t);
 PyAPI_FUNC(int) _PyTuple_Resize(PyObject **, Py_ssize_t);
 PyAPI_FUNC(PyObject *) PyTuple_Pack(Py_ssize_t, ...);
+PyAPI_FUNC(void) _PyTuple_MaybeUntrack(PyObject *);
 
 /* Macro, trading safety for speed */
 #define PyTuple_GET_ITEM(op, i) (((PyTupleObject *)(op))->ob_item[i])
index 308143da5b70c230af16f23201734e9bcec56382..1c9bca8acf4106e2f7f6d7b1b317d911706edddc 100644 (file)
@@ -665,6 +665,104 @@ class DictTest(unittest.TestCase):
             gc.collect()
             self.assert_(ref() is None, "Cycle was not collected")
 
+    def _not_tracked(self, t):
+        # Nested containers can take several collections to untrack
+        gc.collect()
+        gc.collect()
+        self.assertFalse(gc.is_tracked(t), t)
+
+    def _tracked(self, t):
+        self.assertTrue(gc.is_tracked(t), t)
+        gc.collect()
+        gc.collect()
+        self.assertTrue(gc.is_tracked(t), t)
+
+    def test_track_literals(self):
+        # Test GC-optimization of dict literals
+        x, y, z, w = 1.5, "a", (1, None), []
+
+        self._not_tracked({})
+        self._not_tracked({x:(), y:x, z:1})
+        self._not_tracked({1: "a", "b": 2})
+        self._not_tracked({1: 2, (None, True, False, ()): int})
+        self._not_tracked({1: object()})
+
+        # Dicts with mutable elements are always tracked, even if those
+        # elements are not tracked right now.
+        self._tracked({1: []})
+        self._tracked({1: ([],)})
+        self._tracked({1: {}})
+        self._tracked({1: set()})
+
+    def test_track_dynamic(self):
+        # Test GC-optimization of dynamically-created dicts
+        class MyObject(object):
+            pass
+        x, y, z, w, o = 1.5, "a", (1, object()), [], MyObject()
+
+        d = dict()
+        self._not_tracked(d)
+        d[1] = "a"
+        self._not_tracked(d)
+        d[y] = 2
+        self._not_tracked(d)
+        d[z] = 3
+        self._not_tracked(d)
+        self._not_tracked(d.copy())
+        d[4] = w
+        self._tracked(d)
+        self._tracked(d.copy())
+        d[4] = None
+        self._not_tracked(d)
+        self._not_tracked(d.copy())
+
+        # dd isn't tracked right now, but it may mutate and therefore d
+        # which contains it must be tracked.
+        d = dict()
+        dd = dict()
+        d[1] = dd
+        self._not_tracked(dd)
+        self._tracked(d)
+        dd[1] = d
+        self._tracked(dd)
+
+        d = dict.fromkeys([x, y, z])
+        self._not_tracked(d)
+        dd = dict()
+        dd.update(d)
+        self._not_tracked(dd)
+        d = dict.fromkeys([x, y, z, o])
+        self._tracked(d)
+        dd = dict()
+        dd.update(d)
+        self._tracked(dd)
+
+        d = dict(x=x, y=y, z=z)
+        self._not_tracked(d)
+        d = dict(x=x, y=y, z=z, w=w)
+        self._tracked(d)
+        d = dict()
+        d.update(x=x, y=y, z=z)
+        self._not_tracked(d)
+        d.update(w=w)
+        self._tracked(d)
+
+        d = dict([(x, y), (z, 1)])
+        self._not_tracked(d)
+        d = dict([(x, y), (z, w)])
+        self._tracked(d)
+        d = dict()
+        d.update([(x, y), (z, 1)])
+        self._not_tracked(d)
+        d.update([(x, y), (z, w)])
+        self._tracked(d)
+
+    def test_track_subtypes(self):
+        # Dict subtypes are always tracked
+        class MyDict(dict):
+            pass
+        self._tracked(MyDict())
+
 
 from test import mapping_tests
 
index 414e17aa8fc1c18b4c2f78cc051b4c2cebb348b0..2262b367faac4976143d6b22f9c672b07c200953 100644 (file)
@@ -415,6 +415,33 @@ class GCTests(unittest.TestCase):
 
         self.assertEqual(gc.get_referents(1, 'a', 4j), [])
 
+    def test_is_tracked(self):
+        # Atomic built-in types are not tracked, user-defined objects and
+        # mutable containers are.
+        # NOTE: types with special optimizations (e.g. tuple) have tests
+        # in their own test files instead.
+        self.assertFalse(gc.is_tracked(None))
+        self.assertFalse(gc.is_tracked(1))
+        self.assertFalse(gc.is_tracked(1.0))
+        self.assertFalse(gc.is_tracked(1.0 + 5.0j))
+        self.assertFalse(gc.is_tracked(True))
+        self.assertFalse(gc.is_tracked(False))
+        self.assertFalse(gc.is_tracked(b"a"))
+        self.assertFalse(gc.is_tracked("a"))
+        self.assertFalse(gc.is_tracked(bytearray(b"a")))
+        self.assertFalse(gc.is_tracked(type))
+        self.assertFalse(gc.is_tracked(int))
+        self.assertFalse(gc.is_tracked(object))
+        self.assertFalse(gc.is_tracked(object()))
+
+        class UserClass:
+            pass
+        self.assertTrue(gc.is_tracked(gc))
+        self.assertTrue(gc.is_tracked(UserClass))
+        self.assertTrue(gc.is_tracked(UserClass()))
+        self.assertTrue(gc.is_tracked([]))
+        self.assertTrue(gc.is_tracked(set()))
+
     def test_bug1055820b(self):
         # Corresponds to temp2b.py in the bug report.
 
index de081314db01ec71a943095cf2da72d765c5aab5..f82af31449589e04c30c98a3b3400541bf2f6345 100644 (file)
@@ -1,5 +1,7 @@
 from test import support, seq_tests
 
+import gc
+
 class TupleTest(seq_tests.CommonTest):
     type2test = tuple
 
@@ -82,6 +84,69 @@ class TupleTest(seq_tests.CommonTest):
         self.assertEqual(repr(a0), "()")
         self.assertEqual(repr(a2), "(0, 1, 2)")
 
+    def _not_tracked(self, t):
+        # Nested tuples can take several collections to untrack
+        gc.collect()
+        gc.collect()
+        self.assertFalse(gc.is_tracked(t), t)
+
+    def _tracked(self, t):
+        self.assertTrue(gc.is_tracked(t), t)
+        gc.collect()
+        gc.collect()
+        self.assertTrue(gc.is_tracked(t), t)
+
+    def test_track_literals(self):
+        # Test GC-optimization of tuple literals
+        x, y, z = 1.5, "a", []
+
+        self._not_tracked(())
+        self._not_tracked((1,))
+        self._not_tracked((1, 2))
+        self._not_tracked((1, 2, "a"))
+        self._not_tracked((1, 2, (None, True, False, ()), int))
+        self._not_tracked((object(),))
+        self._not_tracked(((1, x), y, (2, 3)))
+
+        # Tuples with mutable elements are always tracked, even if those
+        # elements are not tracked right now.
+        self._tracked(([],))
+        self._tracked(([1],))
+        self._tracked(({},))
+        self._tracked((set(),))
+        self._tracked((x, y, z))
+
+    def check_track_dynamic(self, tp, always_track):
+        x, y, z = 1.5, "a", []
+
+        check = self._tracked if always_track else self._not_tracked
+        check(tp())
+        check(tp([]))
+        check(tp(set()))
+        check(tp([1, x, y]))
+        check(tp(obj for obj in [1, x, y]))
+        check(tp(set([1, x, y])))
+        check(tp(tuple([obj]) for obj in [1, x, y]))
+        check(tuple(tp([obj]) for obj in [1, x, y]))
+
+        self._tracked(tp([z]))
+        self._tracked(tp([[x, y]]))
+        self._tracked(tp([{x: y}]))
+        self._tracked(tp(obj for obj in [x, y, z]))
+        self._tracked(tp(tuple([obj]) for obj in [x, y, z]))
+        self._tracked(tuple(tp([obj]) for obj in [x, y, z]))
+
+    def test_track_dynamic(self):
+        # Test GC-optimization of dynamically constructed tuples.
+        self.check_track_dynamic(tuple, False)
+
+    def test_track_subtypes(self):
+        # Tuple subtypes must always be tracked
+        class MyTuple(tuple):
+            pass
+        self.check_track_dynamic(MyTuple, True)
+
+
 def test_main():
     support.run_unittest(TupleTest)
 
index 8c752a7a7d17ba231a5d067bc0af2a7d93478c25..c690deaeae60af58cf92a3c300dcc2d70dc6014f 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,11 @@ What's New in Python 3.1 alpha 2?
 Core and Builtins
 -----------------
 
+- Issue #4688: Add a heuristic so that tuples and dicts containing only
+  untrackable objects are not tracked by the garbage collector. This can
+  reduce the size of collections and therefore the garbage collection overhead
+  on long-running programs, depending on their particular use of datatypes.
+
 - Issue #5512: Rewrite PyLong long division algorithm (x_divrem) to
   improve its performance.  Long divisions and remainder operations
   are now between 50% and 150% faster.
index 2474721eeb6379285ac25555a88b5198ccb6db1c..d4a0900fcb2eb8d7109463586d32f490d35396f2 100644 (file)
@@ -433,7 +433,13 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
                         (void) traverse(op,
                                         (visitproc)visit_reachable,
                                         (void *)young);
-                        next = gc->gc.gc_next;
+                       next = gc->gc.gc_next;
+                       if (PyTuple_CheckExact(op)) {
+                               _PyTuple_MaybeUntrack(op);
+                       }
+                       else if (PyDict_CheckExact(op)) {
+                               _PyDict_MaybeUntrack(op);
+                       }
                }
                else {
                        /* This *may* be unreachable.  To make progress,
@@ -1229,6 +1235,26 @@ gc_get_objects(PyObject *self, PyObject *noargs)
        return result;
 }
 
+PyDoc_STRVAR(gc_is_tracked__doc__,
+"is_tracked(obj) -> bool\n"
+"\n"
+"Returns true if the object is tracked by the garbage collector.\n"
+"Simple atomic objects will return false.\n"
+);
+
+static PyObject *
+gc_is_tracked(PyObject *self, PyObject *obj)
+{
+       PyObject *result;
+       
+       if (PyObject_IS_GC(obj) && IS_TRACKED(obj))
+               result = Py_True;
+       else
+               result = Py_False;
+       Py_INCREF(result);
+       return result;
+}
+
 
 PyDoc_STRVAR(gc__doc__,
 "This module provides access to the garbage collector for reference cycles.\n"
@@ -1243,6 +1269,7 @@ PyDoc_STRVAR(gc__doc__,
 "set_threshold() -- Set the collection thresholds.\n"
 "get_threshold() -- Return the current the collection thresholds.\n"
 "get_objects() -- Return a list of all objects tracked by the collector.\n"
+"is_tracked() -- Returns true if a given object is tracked.\n"
 "get_referrers() -- Return the list of objects that refer to an object.\n"
 "get_referents() -- Return the list of objects that an object refers to.\n");
 
@@ -1258,6 +1285,7 @@ static PyMethodDef GcMethods[] = {
        {"collect",        (PyCFunction)gc_collect,
                METH_VARARGS | METH_KEYWORDS,           gc_collect__doc__},
        {"get_objects",    gc_get_objects,METH_NOARGS,  gc_get_objects__doc__},
+       {"is_tracked",     gc_is_tracked, METH_O,       gc_is_tracked__doc__},
        {"get_referrers",  gc_get_referrers, METH_VARARGS,
                gc_get_referrers__doc__},
        {"get_referents",  gc_get_referents, METH_VARARGS,
index e235993e9a6e92e4d8ea4a11f32706bb47dbc1be..e005f8ea5ca4e83412cd64c44880a3b95f5a0154 100644 (file)
@@ -181,6 +181,24 @@ show_alloc(void)
 }
 #endif
 
+/* Debug statistic to count GC tracking of dicts */
+#ifdef SHOW_TRACK_COUNT
+static Py_ssize_t count_untracked = 0;
+static Py_ssize_t count_tracked = 0;
+
+static void
+show_track(void)
+{
+       fprintf(stderr, "Dicts created: %" PY_FORMAT_SIZE_T "d\n",
+               count_tracked + count_untracked);
+       fprintf(stderr, "Dicts tracked by the GC: %" PY_FORMAT_SIZE_T
+               "d\n", count_tracked);
+       fprintf(stderr, "%.2f%% dict tracking rate\n\n",
+               (100.0*count_tracked/(count_untracked+count_tracked)));
+}
+#endif
+
+
 /* Initialization macros.
    There are two ways to create a dict:  PyDict_New() is the main C API
    function, and the tp_new slot maps to dict_new().  In the latter case we
@@ -233,6 +251,9 @@ PyDict_New(void)
 #endif
 #ifdef SHOW_ALLOC_COUNT
                Py_AtExit(show_alloc);
+#endif
+#ifdef SHOW_TRACK_COUNT
+               Py_AtExit(show_track);
 #endif
        }
        if (numfree) {
@@ -263,10 +284,12 @@ PyDict_New(void)
 #endif
        }
        mp->ma_lookup = lookdict_unicode;
+#ifdef SHOW_TRACK_COUNT
+       count_untracked++;
+#endif
 #ifdef SHOW_CONVERSION_COUNTS
        ++created;
 #endif
-       _PyObject_GC_TRACK(mp);
        return (PyObject *)mp;
 }
 
@@ -435,6 +458,52 @@ lookdict_unicode(PyDictObject *mp, PyObject *key, register long hash)
        return 0;
 }
 
+#ifdef SHOW_TRACK_COUNT
+#define INCREASE_TRACK_COUNT \
+       (count_tracked++, count_untracked--);
+#define DECREASE_TRACK_COUNT \
+       (count_tracked--, count_untracked++);
+#else
+#define INCREASE_TRACK_COUNT
+#define DECREASE_TRACK_COUNT
+#endif
+
+#define MAINTAIN_TRACKING(mp, key, value) \
+       do { \
+               if (!_PyObject_GC_IS_TRACKED(mp)) { \
+                       if (_PyObject_GC_MAY_BE_TRACKED(key) || \
+                               _PyObject_GC_MAY_BE_TRACKED(value)) { \
+                               _PyObject_GC_TRACK(mp); \
+                               INCREASE_TRACK_COUNT \
+                       } \
+               } \
+       } while(0)
+
+void
+_PyDict_MaybeUntrack(PyObject *op)
+{
+       PyDictObject *mp;
+       PyObject *value;
+       Py_ssize_t mask, i;
+       PyDictEntry *ep;
+
+       if (!PyDict_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op))
+               return;
+       
+       mp = (PyDictObject *) op;
+       ep = mp->ma_table;
+       mask = mp->ma_mask;
+       for (i = 0; i <= mask; i++) {
+               if ((value = ep[i].me_value) == NULL)
+                       continue;
+               if (_PyObject_GC_MAY_BE_TRACKED(value) ||
+                       _PyObject_GC_MAY_BE_TRACKED(ep[i].me_key))
+                       return;
+       }
+       _PyObject_GC_UNTRACK(op);
+}
+
+
 /*
 Internal routine to insert a new item into the table.
 Used both by the internal resize routine and by the public insert routine.
@@ -455,6 +524,7 @@ insertdict(register PyDictObject *mp, PyObject *key, long hash, PyObject *value)
                Py_DECREF(value);
                return -1;
        }
+       MAINTAIN_TRACKING(mp, key, value);
        if (ep->me_value != NULL) {
                old_value = ep->me_value;
                ep->me_value = value;
@@ -494,6 +564,7 @@ insertdict_clean(register PyDictObject *mp, PyObject *key, long hash,
        PyDictEntry *ep0 = mp->ma_table;
        register PyDictEntry *ep;
 
+       MAINTAIN_TRACKING(mp, key, value);
        i = hash & mask;
        ep = &ep0[i];
        for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) {
@@ -1993,8 +2064,17 @@ dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
                assert(d->ma_table == NULL && d->ma_fill == 0 && d->ma_used == 0);
                INIT_NONZERO_DICT_SLOTS(d);
                d->ma_lookup = lookdict_unicode;
+               /* The object has been implicitely tracked by tp_alloc */
+               if (type == &PyDict_Type)
+                       _PyObject_GC_UNTRACK(d);
 #ifdef SHOW_CONVERSION_COUNTS
                ++created;
+#endif
+#ifdef SHOW_TRACK_COUNT
+               if (_PyObject_GC_IS_TRACKED(d))
+                       count_tracked++;
+               else
+                       count_untracked++;
 #endif
        }
        return self;
index 8828a2d5e6b1bcaa56ef86b824885c20af237c0b..11be0e19dc62d217cfdfa8d5c742eda304d05377 100644 (file)
@@ -23,11 +23,36 @@ Py_ssize_t fast_tuple_allocs;
 Py_ssize_t tuple_zero_allocs;
 #endif
 
+/* Debug statistic to count GC tracking of tuples.
+   Please note that tuples are only untracked when considered by the GC, and
+   many of them will be dead before. Therefore, a tracking rate close to 100%
+   does not necessarily prove that the heuristic is inefficient.
+*/
+#ifdef SHOW_TRACK_COUNT
+static Py_ssize_t count_untracked = 0;
+static Py_ssize_t count_tracked = 0;
+
+static void
+show_track(void)
+{
+       fprintf(stderr, "Tuples created: %" PY_FORMAT_SIZE_T "d\n",
+               count_tracked + count_untracked);
+       fprintf(stderr, "Tuples tracked by the GC: %" PY_FORMAT_SIZE_T
+               "d\n", count_tracked);
+       fprintf(stderr, "%.2f%% tuple tracking rate\n\n",
+               (100.0*count_tracked/(count_untracked+count_tracked)));
+}
+#endif
+
+
 PyObject *
 PyTuple_New(register Py_ssize_t size)
 {
        register PyTupleObject *op;
        Py_ssize_t i;
+#ifdef SHOW_TRACK_COUNT
+       count_tracked++;
+#endif
        if (size < 0) {
                PyErr_BadInternalCall();
                return NULL;
@@ -131,6 +156,32 @@ PyTuple_SetItem(register PyObject *op, register Py_ssize_t i, PyObject *newitem)
        return 0;
 }
 
+void
+_PyTuple_MaybeUntrack(PyObject *op)
+{
+       PyTupleObject *t;
+       Py_ssize_t i, n;
+       
+       if (!PyTuple_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op))
+               return;
+       t = (PyTupleObject *) op;
+       n = Py_SIZE(t);
+       for (i = 0; i < n; i++) {
+               PyObject *elt = PyTuple_GET_ITEM(t, i);
+               /* Tuple with NULL elements aren't
+                  fully constructed, don't untrack
+                  them yet. */
+               if (!elt ||
+                       _PyObject_GC_MAY_BE_TRACKED(elt))
+                       return;
+       }
+#ifdef SHOW_TRACK_COUNT
+       count_tracked--;
+       count_untracked++;
+#endif
+       _PyObject_GC_UNTRACK(op);
+}
+
 PyObject *
 PyTuple_Pack(Py_ssize_t n, ...)
 {
@@ -855,6 +906,9 @@ PyTuple_Fini(void)
 
        (void)PyTuple_ClearFreeList();
 #endif
+#ifdef SHOW_TRACK_COUNT
+       show_track();
+#endif
 }
 
 /*********************** Tuple Iterator **************************/