diff --git a/Doc/library/gc.rst b/Doc/library/gc.rst index 1065ec30802..480a9dec7f1 100644 --- a/Doc/library/gc.rst +++ b/Doc/library/gc.rst @@ -204,8 +204,6 @@ The :mod:`gc` module provides the following functions: >>> gc.is_tracked({}) False >>> gc.is_tracked({"a": 1}) - False - >>> gc.is_tracked({"a": []}) True .. versionadded:: 3.1 diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index c5399ad8e04..f7d747cc6c6 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -43,8 +43,6 @@ extern int _PyDict_Next( extern int _PyDict_HasOnlyStringKeys(PyObject *mp); -extern void _PyDict_MaybeUntrack(PyObject *mp); - // Export for '_ctypes' shared extension PyAPI_FUNC(Py_ssize_t) _PyDict_SizeOf(PyDictObject *); diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index 377a846428a..5de4aa05398 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -532,8 +532,8 @@ currently in. Instead, when that's needed, ad hoc tricks (like the `NEXT_MASK_UNREACHABLE` flag) are employed. -Optimization: delay tracking containers -======================================= +Optimization: delayed untracking containers +=========================================== Certain types of containers cannot participate in a reference cycle, and so do not need to be tracked by the garbage collector. Untracking these objects @@ -546,26 +546,17 @@ 2. When the container is examined by the garbage collector. As a general rule, instances of atomic types aren't tracked and instances of -non-atomic types (containers, user-defined objects...) are. However, some -type-specific optimizations can be present in order to suppress the garbage -collector footprint of simple instances. Some examples of native types that -benefit from delayed tracking: +non-atomic types (containers, user-defined objects...) are. -- Tuples containing only immutable objects (integers, strings etc, - and recursively, tuples of immutable objects) do not need to be tracked. The - interpreter creates a large number of tuples, many of which will not survive - until garbage collection. It is therefore not worthwhile to untrack eligible - tuples at creation time. Instead, all tuples except the empty tuple are tracked - when created. During garbage collection it is determined whether any surviving - tuples can be untracked. A tuple can be untracked if all of its contents are - already not tracked. Tuples are examined for untracking in all garbage collection - cycles. It may take more than one cycle to untrack a tuple. - -- Dictionaries containing only immutable objects also do not need to be tracked. - Dictionaries are untracked when created. If a tracked item is inserted into a - dictionary (either as a key or value), the dictionary becomes tracked. During a - full garbage collection (all generations), the collector will untrack any dictionaries - whose contents are not tracked. +Tuples containing only immutable objects (integers, strings etc, +and recursively, tuples of immutable objects) do not need to be tracked. The +interpreter creates a large number of tuples, many of which will not survive +until garbage collection. It is therefore not worthwhile to untrack eligible +tuples at creation time. Instead, all tuples except the empty tuple are tracked +when created. During garbage collection it is determined whether any surviving +tuples can be untracked. A tuple can be untracked if all of its contents are +already not tracked. Tuples are examined for untracking in all garbage collection +cycles. The garbage collector module provides the Python function `is_tracked(obj)`, which returns the current tracking status of the object. Subsequent garbage collections may change the @@ -578,11 +569,11 @@ False >>> gc.is_tracked([]) True + >>> gc.is_tracked(()) + False >>> gc.is_tracked({}) - False + True >>> gc.is_tracked({"a": 1}) - False - >>> gc.is_tracked({"a": []}) True ``` diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py index 4030716efb5..c94dc2df4f0 100644 --- a/Lib/test/test_dict.py +++ b/Lib/test/test_dict.py @@ -880,115 +880,6 @@ class C(object): gc.collect() self.assertIs(ref(), None, "Cycle was not collected") - def _not_tracked(self, t): - # Nested containers can take several collections to untrack - gc.collect() - gc.collect() - self.assertFalse(gc.is_tracked(t), t) - - def _tracked(self, t): - self.assertTrue(gc.is_tracked(t), t) - gc.collect() - gc.collect() - self.assertTrue(gc.is_tracked(t), t) - - def test_string_keys_can_track_values(self): - # Test that this doesn't leak. - for i in range(10): - d = {} - for j in range(10): - d[str(j)] = j - d["foo"] = d - - @support.cpython_only - def test_track_literals(self): - # Test GC-optimization of dict literals - x, y, z, w = 1.5, "a", (1, None), [] - - self._not_tracked({}) - self._not_tracked({x:(), y:x, z:1}) - self._not_tracked({1: "a", "b": 2}) - self._not_tracked({1: 2, (None, True, False, ()): int}) - self._not_tracked({1: object()}) - - # Dicts with mutable elements are always tracked, even if those - # elements are not tracked right now. - self._tracked({1: []}) - self._tracked({1: ([],)}) - self._tracked({1: {}}) - self._tracked({1: set()}) - - @support.cpython_only - def test_track_dynamic(self): - # Test GC-optimization of dynamically-created dicts - class MyObject(object): - pass - x, y, z, w, o = 1.5, "a", (1, object()), [], MyObject() - - d = dict() - self._not_tracked(d) - d[1] = "a" - self._not_tracked(d) - d[y] = 2 - self._not_tracked(d) - d[z] = 3 - self._not_tracked(d) - self._not_tracked(d.copy()) - d[4] = w - self._tracked(d) - self._tracked(d.copy()) - d[4] = None - self._not_tracked(d) - self._not_tracked(d.copy()) - - # dd isn't tracked right now, but it may mutate and therefore d - # which contains it must be tracked. - d = dict() - dd = dict() - d[1] = dd - self._not_tracked(dd) - self._tracked(d) - dd[1] = d - self._tracked(dd) - - d = dict.fromkeys([x, y, z]) - self._not_tracked(d) - dd = dict() - dd.update(d) - self._not_tracked(dd) - d = dict.fromkeys([x, y, z, o]) - self._tracked(d) - dd = dict() - dd.update(d) - self._tracked(dd) - - d = dict(x=x, y=y, z=z) - self._not_tracked(d) - d = dict(x=x, y=y, z=z, w=w) - self._tracked(d) - d = dict() - d.update(x=x, y=y, z=z) - self._not_tracked(d) - d.update(w=w) - self._tracked(d) - - d = dict([(x, y), (z, 1)]) - self._not_tracked(d) - d = dict([(x, y), (z, w)]) - self._tracked(d) - d = dict() - d.update([(x, y), (z, 1)]) - self._not_tracked(d) - d.update([(x, y), (z, w)]) - self._tracked(d) - - @support.cpython_only - def test_track_subtypes(self): - # Dict subtypes are always tracked - class MyDict(dict): - pass - self._tracked(MyDict()) - def make_shared_key_dict(self, n): class C: pass diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-11-19-17-17-32.gh-issue-127010.9Cl4bb.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-19-17-17-32.gh-issue-127010.9Cl4bb.rst new file mode 100644 index 00000000000..36e379c88ab --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-19-17-17-32.gh-issue-127010.9Cl4bb.rst @@ -0,0 +1,4 @@ +Simplify GC tracking of dictionaries. All dictionaries are tracked when +created, rather than being lazily tracked when a trackable object was added +to them. This simplifies the code considerably and results in a slight +speedup. diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 2090008055b..23616b3918b 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -883,6 +883,7 @@ new_dict(PyInterpreterState *interp, mp->ma_used = used; mp->_ma_watcher_tag = 0; ASSERT_CONSISTENT(mp); + _PyObject_GC_TRACK(mp); return (PyObject *)mp; } @@ -1578,64 +1579,6 @@ _PyDict_HasOnlyStringKeys(PyObject *dict) return 1; } -#define MAINTAIN_TRACKING(mp, key, value) \ - do { \ - if (!_PyObject_GC_IS_TRACKED(mp)) { \ - if (_PyObject_GC_MAY_BE_TRACKED(key) || \ - _PyObject_GC_MAY_BE_TRACKED(value)) { \ - _PyObject_GC_TRACK(mp); \ - } \ - } \ - } while(0) - -void -_PyDict_MaybeUntrack(PyObject *op) -{ - PyDictObject *mp; - PyObject *value; - Py_ssize_t i, numentries; - - ASSERT_WORLD_STOPPED_OR_DICT_LOCKED(op); - - if (!PyDict_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op)) - return; - - mp = (PyDictObject *) op; - ASSERT_CONSISTENT(mp); - numentries = mp->ma_keys->dk_nentries; - if (_PyDict_HasSplitTable(mp)) { - for (i = 0; i < numentries; i++) { - if ((value = mp->ma_values->values[i]) == NULL) - continue; - if (_PyObject_GC_MAY_BE_TRACKED(value)) { - return; - } - } - } - else { - if (DK_IS_UNICODE(mp->ma_keys)) { - PyDictUnicodeEntry *ep0 = DK_UNICODE_ENTRIES(mp->ma_keys); - for (i = 0; i < numentries; i++) { - if ((value = ep0[i].me_value) == NULL) - continue; - if (_PyObject_GC_MAY_BE_TRACKED(value)) - return; - } - } - else { - PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys); - for (i = 0; i < numentries; i++) { - if ((value = ep0[i].me_value) == NULL) - continue; - if (_PyObject_GC_MAY_BE_TRACKED(value) || - _PyObject_GC_MAY_BE_TRACKED(ep0[i].me_key)) - return; - } - } - } - _PyObject_GC_UNTRACK(op); -} - void _PyDict_EnablePerThreadRefcounting(PyObject *op) { @@ -1761,7 +1704,6 @@ insert_split_value(PyInterpreterState *interp, PyDictObject *mp, PyObject *key, { assert(PyUnicode_CheckExact(key)); ASSERT_DICT_LOCKED(mp); - MAINTAIN_TRACKING(mp, key, value); PyObject *old_value = mp->ma_values->values[ix]; if (old_value == NULL) { _PyDict_NotifyEvent(interp, PyDict_EVENT_ADDED, mp, key, value); @@ -1818,8 +1760,6 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, if (ix == DKIX_ERROR) goto Fail; - MAINTAIN_TRACKING(mp, key, value); - if (ix == DKIX_EMPTY) { assert(!_PyDict_HasSplitTable(mp)); /* Insert into new slot. */ @@ -1878,8 +1818,6 @@ insert_to_emptydict(PyInterpreterState *interp, PyDictObject *mp, /* We don't decref Py_EMPTY_KEYS here because it is immortal. */ assert(mp->ma_values == NULL); - MAINTAIN_TRACKING(mp, key, value); - size_t hashpos = (size_t)hash & (PyDict_MINSIZE-1); dictkeys_set_index(newkeys, hashpos, 0); if (unicode) { @@ -4024,8 +3962,7 @@ copy_lock_held(PyObject *o) split_copy->ma_used = mp->ma_used; split_copy->_ma_watcher_tag = 0; dictkeys_incref(mp->ma_keys); - if (_PyObject_GC_IS_TRACKED(mp)) - _PyObject_GC_TRACK(split_copy); + _PyObject_GC_TRACK(split_copy); return (PyObject *)split_copy; } @@ -4060,11 +3997,6 @@ copy_lock_held(PyObject *o) new->ma_used = mp->ma_used; ASSERT_CONSISTENT(new); - if (_PyObject_GC_IS_TRACKED(mp)) { - /* Maintain tracking. */ - _PyObject_GC_TRACK(new); - } - return (PyObject *)new; } @@ -4351,7 +4283,6 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu } } - MAINTAIN_TRACKING(mp, key, value); STORE_USED(mp, mp->ma_used + 1); assert(mp->ma_keys->dk_usable >= 0); ASSERT_CONSISTENT(mp); @@ -4800,16 +4731,8 @@ dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds) d->ma_keys = Py_EMPTY_KEYS; d->ma_values = NULL; ASSERT_CONSISTENT(d); - - if (type != &PyDict_Type) { - // Don't track if a subclass tp_alloc is PyType_GenericAlloc() - if (!_PyObject_GC_IS_TRACKED(d)) { - _PyObject_GC_TRACK(d); - } - } - else { - // _PyType_AllocNoTrack() does not track the created object - assert(!_PyObject_GC_IS_TRACKED(d)); + if (!_PyObject_GC_IS_TRACKED(d)) { + _PyObject_GC_TRACK(d); } return self; } @@ -6746,19 +6669,14 @@ make_dict_from_instance_attributes(PyInterpreterState *interp, { dictkeys_incref(keys); Py_ssize_t used = 0; - Py_ssize_t track = 0; size_t size = shared_keys_usable_size(keys); for (size_t i = 0; i < size; i++) { PyObject *val = values->values[i]; if (val != NULL) { used += 1; - track += _PyObject_GC_MAY_BE_TRACKED(val); } } PyDictObject *res = (PyDictObject *)new_dict(interp, keys, values, used, 0); - if (track && res) { - _PyObject_GC_TRACK(res); - } return res; } @@ -7204,6 +7122,7 @@ _PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict) // since we locked it. dict = _PyObject_ManagedDictPointer(obj)->dict; err = _PyDict_DetachFromObject(dict, obj); + assert(err == 0 || new_dict == NULL); if (err == 0) { FT_ATOMIC_STORE_PTR(_PyObject_ManagedDictPointer(obj)->dict, (PyDictObject *)Py_XNewRef(new_dict)); @@ -7236,7 +7155,21 @@ void PyObject_ClearManagedDict(PyObject *obj) { if (_PyObject_SetManagedDict(obj, NULL) < 0) { + /* Must be out of memory */ + assert(PyErr_Occurred() == PyExc_MemoryError); PyErr_WriteUnraisable(NULL); + /* Clear the dict */ + PyDictObject *dict = _PyObject_GetManagedDict(obj); + Py_BEGIN_CRITICAL_SECTION2(dict, obj); + dict = _PyObject_ManagedDictPointer(obj)->dict; + PyInterpreterState *interp = _PyInterpreterState_GET(); + PyDictKeysObject *oldkeys = dict->ma_keys; + set_keys(dict, Py_EMPTY_KEYS); + dict->ma_values = NULL; + dictkeys_decref(interp, oldkeys, IS_DICT_SHARED(dict)); + STORE_USED(dict, 0); + set_dict_inline_values(obj, NULL); + Py_END_CRITICAL_SECTION2(); } } @@ -7261,12 +7194,6 @@ _PyDict_DetachFromObject(PyDictObject *mp, PyObject *obj) PyDictValues *values = copy_values(mp->ma_values); if (values == NULL) { - /* Out of memory. Clear the dict */ - PyInterpreterState *interp = _PyInterpreterState_GET(); - PyDictKeysObject *oldkeys = mp->ma_keys; - set_keys(mp, Py_EMPTY_KEYS); - dictkeys_decref(interp, oldkeys, IS_DICT_SHARED(mp)); - STORE_USED(mp, 0); PyErr_NoMemory(); return -1; } diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c index 535b0d068f0..a8d64c9aefa 100644 --- a/Objects/moduleobject.c +++ b/Objects/moduleobject.c @@ -107,8 +107,6 @@ static void track_module(PyModuleObject *m) { _PyDict_EnablePerThreadRefcounting(m->md_dict); - PyObject_GC_Track(m->md_dict); - _PyObject_SetDeferredRefcount((PyObject *)m); PyObject_GC_Track(m); } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 029bdb2d9f5..7ffe2f5b940 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2340,10 +2340,6 @@ dummy_func( DEOPT_IF(ep->me_key != name); PyObject *old_value = ep->me_value; DEOPT_IF(old_value == NULL); - /* Ensure dict is GC tracked if it needs to be */ - if (!_PyObject_GC_IS_TRACKED(dict) && _PyObject_GC_MAY_BE_TRACKED(PyStackRef_AsPyObjectBorrow(value))) { - _PyObject_GC_TRACK(dict); - } _PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, PyStackRef_AsPyObjectBorrow(value)); ep->me_value = PyStackRef_AsPyObjectSteal(value); // old_value should be DECREFed after GC track checking is done, if not, it could raise a segmentation fault, diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index e9c9ed2b5da..976a3429b2e 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2914,10 +2914,6 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - /* Ensure dict is GC tracked if it needs to be */ - if (!_PyObject_GC_IS_TRACKED(dict) && _PyObject_GC_MAY_BE_TRACKED(PyStackRef_AsPyObjectBorrow(value))) { - _PyObject_GC_TRACK(dict); - } _PyFrame_SetStackPointer(frame, stack_pointer); _PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, PyStackRef_AsPyObjectBorrow(value)); stack_pointer = _PyFrame_GetStackPointer(frame); diff --git a/Python/gc.c b/Python/gc.c index a774183c49d..63adecf0e05 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -5,7 +5,7 @@ #include "Python.h" #include "pycore_ceval.h" // _Py_set_eval_breaker_bit() #include "pycore_context.h" -#include "pycore_dict.h" // _PyDict_MaybeUntrack() +#include "pycore_dict.h" // _PyInlineValuesSize() #include "pycore_initconfig.h" #include "pycore_interp.h" // PyInterpreterState.gc #include "pycore_object.h" @@ -747,21 +747,6 @@ untrack_tuples(PyGC_Head *head) } } -/* Try to untrack all currently tracked dictionaries */ -static void -untrack_dicts(PyGC_Head *head) -{ - PyGC_Head *next, *gc = GC_NEXT(head); - while (gc != head) { - PyObject *op = FROM_GC(gc); - next = GC_NEXT(gc); - if (PyDict_CheckExact(op)) { - _PyDict_MaybeUntrack(op); - } - gc = next; - } -} - /* Return true if object has a pre-PEP 442 finalization method. */ static int has_legacy_finalizer(PyObject *op) @@ -1258,15 +1243,10 @@ handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable, gc_list_merge(resurrected, old_generation); } - -#define UNTRACK_TUPLES 1 -#define UNTRACK_DICTS 2 - static void gc_collect_region(PyThreadState *tstate, PyGC_Head *from, PyGC_Head *to, - int untrack, struct gc_collection_stats *stats); static inline Py_ssize_t @@ -1328,7 +1308,7 @@ gc_collect_young(PyThreadState *tstate, PyGC_Head survivors; gc_list_init(&survivors); - gc_collect_region(tstate, young, &survivors, UNTRACK_TUPLES, stats); + gc_collect_region(tstate, young, &survivors, stats); Py_ssize_t survivor_count = 0; if (gcstate->visited_space) { /* objects in visited space have bit set, so we set it here */ @@ -1471,7 +1451,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) gc_list_validate_space(&increment, gcstate->visited_space); PyGC_Head survivors; gc_list_init(&survivors); - gc_collect_region(tstate, &increment, &survivors, UNTRACK_TUPLES, stats); + gc_collect_region(tstate, &increment, &survivors, stats); gc_list_validate_space(&survivors, gcstate->visited_space); gc_list_merge(&survivors, visited); assert(gc_list_is_empty(&increment)); @@ -1504,7 +1484,6 @@ gc_collect_full(PyThreadState *tstate, gc_list_merge(pending, visited); gc_collect_region(tstate, visited, visited, - UNTRACK_TUPLES | UNTRACK_DICTS, stats); gcstate->young.count = 0; gcstate->old[0].count = 0; @@ -1522,7 +1501,6 @@ static void gc_collect_region(PyThreadState *tstate, PyGC_Head *from, PyGC_Head *to, - int untrack, struct gc_collection_stats *stats) { PyGC_Head unreachable; /* non-problematic unreachable trash */ @@ -1536,12 +1514,7 @@ gc_collect_region(PyThreadState *tstate, gc_list_init(&unreachable); deduce_unreachable(from, &unreachable); validate_consistent_old_space(from); - if (untrack & UNTRACK_TUPLES) { - untrack_tuples(from); - } - if (untrack & UNTRACK_DICTS) { - untrack_dicts(from); - } + untrack_tuples(from); validate_consistent_old_space(to); if (from != to) { gc_list_merge(from, to); diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 499ee51fdb2..a6e0022340b 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -3,7 +3,7 @@ #include "pycore_brc.h" // struct _brc_thread_state #include "pycore_ceval.h" // _Py_set_eval_breaker_bit() #include "pycore_context.h" -#include "pycore_dict.h" // _PyDict_MaybeUntrack() +#include "pycore_dict.h" // _PyInlineValuesSize() #include "pycore_freelist.h" // _PyObject_ClearFreeLists() #include "pycore_initconfig.h" #include "pycore_interp.h" // PyInterpreterState.gc @@ -493,13 +493,6 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t *area, return true; } } - else if (PyDict_CheckExact(op)) { - _PyDict_MaybeUntrack(op); - if (!_PyObject_GC_IS_TRACKED(op)) { - gc_restore_refs(op); - return true; - } - } } // We repurpose ob_tid to compute "gc_refs", the number of external diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 4d89753bae7..f3db2f9abc7 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -7435,10 +7435,6 @@ DEOPT_IF(ep->me_key != name, STORE_ATTR); PyObject *old_value = ep->me_value; DEOPT_IF(old_value == NULL, STORE_ATTR); - /* Ensure dict is GC tracked if it needs to be */ - if (!_PyObject_GC_IS_TRACKED(dict) && _PyObject_GC_MAY_BE_TRACKED(PyStackRef_AsPyObjectBorrow(value))) { - _PyObject_GC_TRACK(dict); - } _PyFrame_SetStackPointer(frame, stack_pointer); _PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, PyStackRef_AsPyObjectBorrow(value)); stack_pointer = _PyFrame_GetStackPointer(frame);