From 466326dcdf038b948d94302c315be407c73e60d1 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Sun, 13 Oct 2019 16:48:59 +0100 Subject: [PATCH] bpo-38379: Don't block collection of unreachable objects when some objects resurrect (GH-16687) Currently if any finalizer invoked during garbage collection resurrects any object, the gc gives up and aborts the collection. Although finalizers are assured to only run once per object, this behaviour of the gc can lead to an ever-increasing memory situation if new resurrecting objects are allocated in every new gc collection. To avoid this, recompute what objects among the unreachable set need to be resurrected and what objects can be safely collected. In this way, resurrecting objects will not block the collection of other objects in the unreachable set. --- Lib/test/test_gc.py | 98 ++++++++-- .../2019-10-10-01-41-02.bpo-38379._q4dhn.rst | 4 + Modules/gcmodule.c | 182 ++++++++++++------ 3 files changed, 208 insertions(+), 76 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2019-10-10-01-41-02.bpo-38379._q4dhn.rst diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index f52db1eab1..fe9d6c2f76 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -822,7 +822,78 @@ class GCTests(unittest.TestCase): self.assertRaises(TypeError, gc.get_objects, "1") self.assertRaises(TypeError, gc.get_objects, 1.234) - def test_38379(self): + def test_resurrection_only_happens_once_per_object(self): + class A: # simple self-loop + def __init__(self): + self.me = self + + class Lazarus(A): + resurrected = 0 + resurrected_instances = [] + + def __del__(self): + Lazarus.resurrected += 1 + Lazarus.resurrected_instances.append(self) + + gc.collect() + gc.disable() + + # We start with 0 resurrections + laz = Lazarus() + self.assertEqual(Lazarus.resurrected, 0) + + # Deleting the instance and triggering a collection + # resurrects the object + del laz + gc.collect() + self.assertEqual(Lazarus.resurrected, 1) + self.assertEqual(len(Lazarus.resurrected_instances), 1) + + # Clearing the references and forcing a collection + # should not resurrect the object again. + Lazarus.resurrected_instances.clear() + self.assertEqual(Lazarus.resurrected, 1) + gc.collect() + self.assertEqual(Lazarus.resurrected, 1) + + gc.enable() + + def test_resurrection_is_transitive(self): + class Cargo: + def __init__(self): + self.me = self + + class Lazarus: + resurrected_instances = [] + + def __del__(self): + Lazarus.resurrected_instances.append(self) + + gc.collect() + gc.disable() + + laz = Lazarus() + cargo = Cargo() + cargo_id = id(cargo) + + # Create a cycle between cargo and laz + laz.cargo = cargo + cargo.laz = laz + + # Drop the references, force a collection and check that + # everything was resurrected. + del laz, cargo + gc.collect() + self.assertEqual(len(Lazarus.resurrected_instances), 1) + instance = Lazarus.resurrected_instances.pop() + self.assertTrue(hasattr(instance, "cargo")) + self.assertEqual(id(instance.cargo), cargo_id) + + gc.collect() + gc.enable() + + def test_resurrection_does_not_block_cleanup_of_other_objects(self): + # When a finalizer resurrects objects, stats were reporting them as # having been collected. This affected both collect()'s return # value and the dicts returned by get_stats(). @@ -861,34 +932,29 @@ class GCTests(unittest.TestCase): # Nothing is collected - Z() is merely resurrected. t = gc.collect() c, nc = getstats() - #self.assertEqual(t, 2) # before - self.assertEqual(t, 0) # after - #self.assertEqual(c - oldc, 2) # before - self.assertEqual(c - oldc, 0) # after + self.assertEqual(t, 0) + self.assertEqual(c - oldc, 0) self.assertEqual(nc - oldnc, 0) - # Unfortunately, a Z() prevents _anything_ from being collected. - # It should be possible to collect the A instances anyway, but - # that will require non-trivial code changes. + # Z() should not prevent anything else from being collected. oldc, oldnc = c, nc for i in range(N): A() Z() - # Z() prevents anything from being collected. t = gc.collect() c, nc = getstats() - #self.assertEqual(t, 2*N + 2) # before - self.assertEqual(t, 0) # after - #self.assertEqual(c - oldc, 2*N + 2) # before - self.assertEqual(c - oldc, 0) # after + self.assertEqual(t, 2*N) + self.assertEqual(c - oldc, 2*N) self.assertEqual(nc - oldnc, 0) - # But the A() trash is reclaimed on the next run. + # The A() trash should have been reclaimed already but the + # 2 copies of Z are still in zs (and the associated dicts). oldc, oldnc = c, nc + zs.clear() t = gc.collect() c, nc = getstats() - self.assertEqual(t, 2*N) - self.assertEqual(c - oldc, 2*N) + self.assertEqual(t, 4) + self.assertEqual(c - oldc, 4) self.assertEqual(nc - oldnc, 0) gc.enable() diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-10-10-01-41-02.bpo-38379._q4dhn.rst b/Misc/NEWS.d/next/Core and Builtins/2019-10-10-01-41-02.bpo-38379._q4dhn.rst new file mode 100644 index 0000000000..86f908b677 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2019-10-10-01-41-02.bpo-38379._q4dhn.rst @@ -0,0 +1,4 @@ +When the garbage collector makes a collection in which some objects +resurrect (they are reachable from outside the isolated cycles after the +finalizers have been executed), do not block the collection of all objects +that are still unreachable. Patch by Pablo Galindo and Tim Peters. diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index a1cb323bd2..0fb9ac2ac1 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -301,8 +301,18 @@ gc_list_size(PyGC_Head *list) return n; } +/* Walk the list and mark all objects as non-collecting */ +static inline void +gc_list_clear_collecting(PyGC_Head *collectable) +{ + PyGC_Head *gc; + for (gc = GC_NEXT(collectable); gc != collectable; gc = GC_NEXT(gc)) { + gc_clear_collecting(gc); + } +} + /* Append objects in a GC list to a Python list. - * Return 0 if all OK, < 0 if error (out of memory for list). + * Return 0 if all OK, < 0 if error (out of memory for list) */ static int append_objects(PyObject *py_list, PyGC_Head *gc_list) @@ -613,6 +623,22 @@ move_legacy_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers) } } +static inline void +clear_unreachable_mask(PyGC_Head *unreachable) +{ + /* Check that the list head does not have the unreachable bit set */ + assert(((uintptr_t)unreachable & NEXT_MASK_UNREACHABLE) == 0); + + PyGC_Head *gc, *next; + unreachable->_gc_next &= ~NEXT_MASK_UNREACHABLE; + for (gc = GC_NEXT(unreachable); gc != unreachable; gc = next) { + _PyObject_ASSERT((PyObject*)FROM_GC(gc), gc->_gc_next & NEXT_MASK_UNREACHABLE); + gc->_gc_next &= ~NEXT_MASK_UNREACHABLE; + next = (PyGC_Head*)gc->_gc_next; + } + validate_list(unreachable, PREV_MASK_COLLECTING); +} + /* A traversal callback for move_legacy_finalizer_reachable. */ static int visit_move(PyObject *op, PyGC_Head *tolist) @@ -891,36 +917,6 @@ finalize_garbage(PyGC_Head *collectable) gc_list_merge(&seen, collectable); } -/* Walk the collectable list and check that they are really unreachable - from the outside (some objects could have been resurrected by a - finalizer). */ -static int -check_garbage(PyGC_Head *collectable) -{ - int ret = 0; - PyGC_Head *gc; - for (gc = GC_NEXT(collectable); gc != collectable; gc = GC_NEXT(gc)) { - // Use gc_refs and break gc_prev again. - gc_set_refs(gc, Py_REFCNT(FROM_GC(gc))); - _PyObject_ASSERT(FROM_GC(gc), gc_get_refs(gc) != 0); - } - subtract_refs(collectable); - PyGC_Head *prev = collectable; - for (gc = GC_NEXT(collectable); gc != collectable; gc = GC_NEXT(gc)) { - _PyObject_ASSERT_WITH_MSG(FROM_GC(gc), - gc_get_refs(gc) >= 0, - "refcount is too small"); - if (gc_get_refs(gc) != 0) { - ret = -1; - } - // Restore gc_prev here. - _PyGCHead_SET_PREV(gc, prev); - gc_clear_collecting(gc); - prev = gc; - } - return ret; -} - /* Break reference cycles by clearing the containers involved. This is * tricky business as the lists can be changing and we don't know which * objects may be freed. It is possible I screwed something up here. @@ -958,6 +954,7 @@ delete_garbage(struct _gc_runtime_state *state, } if (GC_NEXT(collectable) == gc) { /* object is still alive, move it, it may die later */ + gc_clear_collecting(gc); gc_list_move(gc, old); } } @@ -1003,6 +1000,87 @@ show_stats_each_generations(struct _gc_runtime_state *state) buf, gc_list_size(&state->permanent_generation.head)); } +/* Deduce wich objects among "base" are unreachable from outside the list + and move them to 'unreachable'. The process consist in the following steps: + +1. Copy all reference counts to a different field (gc_prev is used to hold + this copy to save memory). +2. Traverse all objects in "base" and visit all referred objects using + "tp_traverse" and for every visited object, substract 1 to the reference + count (the one that we copied in the previous step). After this step, all + objects that can be reached directly from outside must have strictly positive + reference count, while all unreachable objects must have a count of exactly 0. +3. Indentify all unreachable objects (the ones with 0 reference count) and move + them to the "unreachable" list. This step also needs to move back to "base" all + objects that were initially marked as unreachable but are referred transitively + by the reachable objects (the ones with strictly positive reference count). + +Contracts: + + * The "base" has to be a valid list with no mask set. + + * The "unreachable" list must be uninitialized (this function calls + gc_list_init over 'unreachable'). + +IMPORTANT: This function leaves 'unreachable' with the NEXT_MASK_UNREACHABLE +flag set but it does not clear it to skip unnecessary iteration. Before the +flag is cleared (for example, by using 'clear_unreachable_mask' function or +by a call to 'move_legacy_finalizers'), the 'unreachable' list is not a normal +list and we can not use most gc_list_* functions for it. */ +static inline void +deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) { + validate_list(base, 0); + /* Using ob_refcnt and gc_refs, calculate which objects in the + * container set are reachable from outside the set (i.e., have a + * refcount greater than 0 when all the references within the + * set are taken into account). + */ + update_refs(base); // gc_prev is used for gc_refs + subtract_refs(base); + + /* Leave everything reachable from outside base in base, and move + * everything else (in base) to unreachable. + * NOTE: This used to move the reachable objects into a reachable + * set instead. But most things usually turn out to be reachable, + * so it's more efficient to move the unreachable things. + */ + gc_list_init(unreachable); + move_unreachable(base, unreachable); // gc_prev is pointer again + validate_list(base, 0); +} + +/* Handle objects that may have resurrected after a call to 'finalize_garbage', moving + them to 'old_generation' and placing the rest on 'still_unreachable'. + + Contracts: + * After this function 'unreachable' must not be used anymore and 'still_unreachable' + will contain the objects that did not resurrect. + + * The "still_unreachable" list must be uninitialized (this function calls + gc_list_init over 'still_unreachable'). + +IMPORTANT: After a call to this function, the 'still_unreachable' set will have the +PREV_MARK_COLLECTING set, but the objects in this set are going to be removed so +we can skip the expense of clearing the flag to avoid extra iteration. */ +static inline void +handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable, + PyGC_Head *old_generation) +{ + // Remove the PREV_MASK_COLLECTING from unreachable + // to prepare it for a new call to 'deduce_unreachable' + gc_list_clear_collecting(unreachable); + + // After the call to deduce_unreachable, the 'still_unreachable' set will + // have the PREV_MARK_COLLECTING set, but the objects are going to be + // removed so we can skip the expense of clearing the flag. + PyGC_Head* resurrected = unreachable; + deduce_unreachable(resurrected, still_unreachable); + clear_unreachable_mask(still_unreachable); + + // Move the resurrected objects to the old generation for future collection. + gc_list_merge(resurrected, old_generation); +} + /* This is the main function. Read this to understand how the * collection process works. */ static Py_ssize_t @@ -1045,26 +1123,9 @@ collect(struct _gc_runtime_state *state, int generation, old = GEN_HEAD(state, generation+1); else old = young; - - validate_list(young, 0); validate_list(old, 0); - /* Using ob_refcnt and gc_refs, calculate which objects in the - * container set are reachable from outside the set (i.e., have a - * refcount greater than 0 when all the references within the - * set are taken into account). - */ - update_refs(young); // gc_prev is used for gc_refs - subtract_refs(young); - /* Leave everything reachable from outside young in young, and move - * everything else (in young) to unreachable. - * NOTE: This used to move the reachable objects into a reachable - * set instead. But most things usually turn out to be reachable, - * so it's more efficient to move the unreachable things. - */ - gc_list_init(&unreachable); - move_unreachable(young, &unreachable); // gc_prev is pointer again - validate_list(young, 0); + deduce_unreachable(young, &unreachable); untrack_tuples(young); /* Move reachable objects to next generation. */ @@ -1114,17 +1175,18 @@ collect(struct _gc_runtime_state *state, int generation, /* Call tp_finalize on objects which have one. */ finalize_garbage(&unreachable); - if (check_garbage(&unreachable)) { // clear PREV_MASK_COLLECTING here - gc_list_merge(&unreachable, old); - } - else { - /* Call tp_clear on objects in the unreachable set. This will cause - * the reference cycles to be broken. It may also cause some objects - * in finalizers to be freed. - */ - m += gc_list_size(&unreachable); - delete_garbage(state, &unreachable, old); - } + /* Handle any objects that may have resurrected after the call + * to 'finalize_garbage' and continue the collection with the + * objects that are still unreachable */ + PyGC_Head final_unreachable; + handle_resurrected_objects(&unreachable, &final_unreachable, old); + + /* Call tp_clear on objects in the final_unreachable set. This will cause + * the reference cycles to be broken. It may also cause some objects + * in finalizers to be freed. + */ + m += gc_list_size(&final_unreachable); + delete_garbage(state, &final_unreachable, old); /* Collect statistics on uncollectable objects found and print * debugging information. */ -- 2.40.0