GH-127010: Don't lazily track and untrack dicts (GH-127027)

This commit is contained in:
Mark Shannon 2024-11-20 16:41:20 +00:00 committed by GitHub
parent 7191b7662e
commit aea0c586d1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 43 additions and 282 deletions

View File

@ -204,8 +204,6 @@ The :mod:`gc` module provides the following functions:
>>> gc.is_tracked({})
False
>>> gc.is_tracked({"a": 1})
False
>>> gc.is_tracked({"a": []})
True
.. versionadded:: 3.1

View File

@ -43,8 +43,6 @@ extern int _PyDict_Next(
extern int _PyDict_HasOnlyStringKeys(PyObject *mp);
extern void _PyDict_MaybeUntrack(PyObject *mp);
// Export for '_ctypes' shared extension
PyAPI_FUNC(Py_ssize_t) _PyDict_SizeOf(PyDictObject *);

View File

@ -532,8 +532,8 @@
currently in. Instead, when that's needed, ad hoc tricks (like the
`NEXT_MASK_UNREACHABLE` flag) are employed.
Optimization: delay tracking containers
=======================================
Optimization: delayed untracking containers
===========================================
Certain types of containers cannot participate in a reference cycle, and so do
not need to be tracked by the garbage collector. Untracking these objects
@ -546,26 +546,17 @@
2. When the container is examined by the garbage collector.
As a general rule, instances of atomic types aren't tracked and instances of
non-atomic types (containers, user-defined objects...) are. However, some
type-specific optimizations can be present in order to suppress the garbage
collector footprint of simple instances. Some examples of native types that
benefit from delayed tracking:
non-atomic types (containers, user-defined objects...) are.
- Tuples containing only immutable objects (integers, strings etc,
and recursively, tuples of immutable objects) do not need to be tracked. The
interpreter creates a large number of tuples, many of which will not survive
until garbage collection. It is therefore not worthwhile to untrack eligible
tuples at creation time. Instead, all tuples except the empty tuple are tracked
when created. During garbage collection it is determined whether any surviving
tuples can be untracked. A tuple can be untracked if all of its contents are
already not tracked. Tuples are examined for untracking in all garbage collection
cycles. It may take more than one cycle to untrack a tuple.
- Dictionaries containing only immutable objects also do not need to be tracked.
Dictionaries are untracked when created. If a tracked item is inserted into a
dictionary (either as a key or value), the dictionary becomes tracked. During a
full garbage collection (all generations), the collector will untrack any dictionaries
whose contents are not tracked.
Tuples containing only immutable objects (integers, strings etc,
and recursively, tuples of immutable objects) do not need to be tracked. The
interpreter creates a large number of tuples, many of which will not survive
until garbage collection. It is therefore not worthwhile to untrack eligible
tuples at creation time. Instead, all tuples except the empty tuple are tracked
when created. During garbage collection it is determined whether any surviving
tuples can be untracked. A tuple can be untracked if all of its contents are
already not tracked. Tuples are examined for untracking in all garbage collection
cycles.
The garbage collector module provides the Python function `is_tracked(obj)`, which returns
the current tracking status of the object. Subsequent garbage collections may change the
@ -578,11 +569,11 @@
False
>>> gc.is_tracked([])
True
>>> gc.is_tracked(())
False
>>> gc.is_tracked({})
False
True
>>> gc.is_tracked({"a": 1})
False
>>> gc.is_tracked({"a": []})
True
```

View File

@ -880,115 +880,6 @@ class C(object):
gc.collect()
self.assertIs(ref(), None, "Cycle was not collected")
def _not_tracked(self, t):
# Nested containers can take several collections to untrack
gc.collect()
gc.collect()
self.assertFalse(gc.is_tracked(t), t)
def _tracked(self, t):
self.assertTrue(gc.is_tracked(t), t)
gc.collect()
gc.collect()
self.assertTrue(gc.is_tracked(t), t)
def test_string_keys_can_track_values(self):
# Test that this doesn't leak.
for i in range(10):
d = {}
for j in range(10):
d[str(j)] = j
d["foo"] = d
@support.cpython_only
def test_track_literals(self):
# Test GC-optimization of dict literals
x, y, z, w = 1.5, "a", (1, None), []
self._not_tracked({})
self._not_tracked({x:(), y:x, z:1})
self._not_tracked({1: "a", "b": 2})
self._not_tracked({1: 2, (None, True, False, ()): int})
self._not_tracked({1: object()})
# Dicts with mutable elements are always tracked, even if those
# elements are not tracked right now.
self._tracked({1: []})
self._tracked({1: ([],)})
self._tracked({1: {}})
self._tracked({1: set()})
@support.cpython_only
def test_track_dynamic(self):
# Test GC-optimization of dynamically-created dicts
class MyObject(object):
pass
x, y, z, w, o = 1.5, "a", (1, object()), [], MyObject()
d = dict()
self._not_tracked(d)
d[1] = "a"
self._not_tracked(d)
d[y] = 2
self._not_tracked(d)
d[z] = 3
self._not_tracked(d)
self._not_tracked(d.copy())
d[4] = w
self._tracked(d)
self._tracked(d.copy())
d[4] = None
self._not_tracked(d)
self._not_tracked(d.copy())
# dd isn't tracked right now, but it may mutate and therefore d
# which contains it must be tracked.
d = dict()
dd = dict()
d[1] = dd
self._not_tracked(dd)
self._tracked(d)
dd[1] = d
self._tracked(dd)
d = dict.fromkeys([x, y, z])
self._not_tracked(d)
dd = dict()
dd.update(d)
self._not_tracked(dd)
d = dict.fromkeys([x, y, z, o])
self._tracked(d)
dd = dict()
dd.update(d)
self._tracked(dd)
d = dict(x=x, y=y, z=z)
self._not_tracked(d)
d = dict(x=x, y=y, z=z, w=w)
self._tracked(d)
d = dict()
d.update(x=x, y=y, z=z)
self._not_tracked(d)
d.update(w=w)
self._tracked(d)
d = dict([(x, y), (z, 1)])
self._not_tracked(d)
d = dict([(x, y), (z, w)])
self._tracked(d)
d = dict()
d.update([(x, y), (z, 1)])
self._not_tracked(d)
d.update([(x, y), (z, w)])
self._tracked(d)
@support.cpython_only
def test_track_subtypes(self):
# Dict subtypes are always tracked
class MyDict(dict):
pass
self._tracked(MyDict())
def make_shared_key_dict(self, n):
class C:
pass

View File

@ -0,0 +1,4 @@
Simplify GC tracking of dictionaries. All dictionaries are tracked when
created, rather than being lazily tracked when a trackable object was added
to them. This simplifies the code considerably and results in a slight
speedup.

View File

@ -883,6 +883,7 @@ new_dict(PyInterpreterState *interp,
mp->ma_used = used;
mp->_ma_watcher_tag = 0;
ASSERT_CONSISTENT(mp);
_PyObject_GC_TRACK(mp);
return (PyObject *)mp;
}
@ -1578,64 +1579,6 @@ _PyDict_HasOnlyStringKeys(PyObject *dict)
return 1;
}
#define MAINTAIN_TRACKING(mp, key, value) \
do { \
if (!_PyObject_GC_IS_TRACKED(mp)) { \
if (_PyObject_GC_MAY_BE_TRACKED(key) || \
_PyObject_GC_MAY_BE_TRACKED(value)) { \
_PyObject_GC_TRACK(mp); \
} \
} \
} while(0)
void
_PyDict_MaybeUntrack(PyObject *op)
{
PyDictObject *mp;
PyObject *value;
Py_ssize_t i, numentries;
ASSERT_WORLD_STOPPED_OR_DICT_LOCKED(op);
if (!PyDict_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op))
return;
mp = (PyDictObject *) op;
ASSERT_CONSISTENT(mp);
numentries = mp->ma_keys->dk_nentries;
if (_PyDict_HasSplitTable(mp)) {
for (i = 0; i < numentries; i++) {
if ((value = mp->ma_values->values[i]) == NULL)
continue;
if (_PyObject_GC_MAY_BE_TRACKED(value)) {
return;
}
}
}
else {
if (DK_IS_UNICODE(mp->ma_keys)) {
PyDictUnicodeEntry *ep0 = DK_UNICODE_ENTRIES(mp->ma_keys);
for (i = 0; i < numentries; i++) {
if ((value = ep0[i].me_value) == NULL)
continue;
if (_PyObject_GC_MAY_BE_TRACKED(value))
return;
}
}
else {
PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys);
for (i = 0; i < numentries; i++) {
if ((value = ep0[i].me_value) == NULL)
continue;
if (_PyObject_GC_MAY_BE_TRACKED(value) ||
_PyObject_GC_MAY_BE_TRACKED(ep0[i].me_key))
return;
}
}
}
_PyObject_GC_UNTRACK(op);
}
void
_PyDict_EnablePerThreadRefcounting(PyObject *op)
{
@ -1761,7 +1704,6 @@ insert_split_value(PyInterpreterState *interp, PyDictObject *mp, PyObject *key,
{
assert(PyUnicode_CheckExact(key));
ASSERT_DICT_LOCKED(mp);
MAINTAIN_TRACKING(mp, key, value);
PyObject *old_value = mp->ma_values->values[ix];
if (old_value == NULL) {
_PyDict_NotifyEvent(interp, PyDict_EVENT_ADDED, mp, key, value);
@ -1818,8 +1760,6 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp,
if (ix == DKIX_ERROR)
goto Fail;
MAINTAIN_TRACKING(mp, key, value);
if (ix == DKIX_EMPTY) {
assert(!_PyDict_HasSplitTable(mp));
/* Insert into new slot. */
@ -1878,8 +1818,6 @@ insert_to_emptydict(PyInterpreterState *interp, PyDictObject *mp,
/* We don't decref Py_EMPTY_KEYS here because it is immortal. */
assert(mp->ma_values == NULL);
MAINTAIN_TRACKING(mp, key, value);
size_t hashpos = (size_t)hash & (PyDict_MINSIZE-1);
dictkeys_set_index(newkeys, hashpos, 0);
if (unicode) {
@ -4024,8 +3962,7 @@ copy_lock_held(PyObject *o)
split_copy->ma_used = mp->ma_used;
split_copy->_ma_watcher_tag = 0;
dictkeys_incref(mp->ma_keys);
if (_PyObject_GC_IS_TRACKED(mp))
_PyObject_GC_TRACK(split_copy);
_PyObject_GC_TRACK(split_copy);
return (PyObject *)split_copy;
}
@ -4060,11 +3997,6 @@ copy_lock_held(PyObject *o)
new->ma_used = mp->ma_used;
ASSERT_CONSISTENT(new);
if (_PyObject_GC_IS_TRACKED(mp)) {
/* Maintain tracking. */
_PyObject_GC_TRACK(new);
}
return (PyObject *)new;
}
@ -4351,7 +4283,6 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu
}
}
MAINTAIN_TRACKING(mp, key, value);
STORE_USED(mp, mp->ma_used + 1);
assert(mp->ma_keys->dk_usable >= 0);
ASSERT_CONSISTENT(mp);
@ -4800,16 +4731,8 @@ dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
d->ma_keys = Py_EMPTY_KEYS;
d->ma_values = NULL;
ASSERT_CONSISTENT(d);
if (type != &PyDict_Type) {
// Don't track if a subclass tp_alloc is PyType_GenericAlloc()
if (!_PyObject_GC_IS_TRACKED(d)) {
_PyObject_GC_TRACK(d);
}
}
else {
// _PyType_AllocNoTrack() does not track the created object
assert(!_PyObject_GC_IS_TRACKED(d));
if (!_PyObject_GC_IS_TRACKED(d)) {
_PyObject_GC_TRACK(d);
}
return self;
}
@ -6746,19 +6669,14 @@ make_dict_from_instance_attributes(PyInterpreterState *interp,
{
dictkeys_incref(keys);
Py_ssize_t used = 0;
Py_ssize_t track = 0;
size_t size = shared_keys_usable_size(keys);
for (size_t i = 0; i < size; i++) {
PyObject *val = values->values[i];
if (val != NULL) {
used += 1;
track += _PyObject_GC_MAY_BE_TRACKED(val);
}
}
PyDictObject *res = (PyDictObject *)new_dict(interp, keys, values, used, 0);
if (track && res) {
_PyObject_GC_TRACK(res);
}
return res;
}
@ -7204,6 +7122,7 @@ _PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict)
// since we locked it.
dict = _PyObject_ManagedDictPointer(obj)->dict;
err = _PyDict_DetachFromObject(dict, obj);
assert(err == 0 || new_dict == NULL);
if (err == 0) {
FT_ATOMIC_STORE_PTR(_PyObject_ManagedDictPointer(obj)->dict,
(PyDictObject *)Py_XNewRef(new_dict));
@ -7236,7 +7155,21 @@ void
PyObject_ClearManagedDict(PyObject *obj)
{
if (_PyObject_SetManagedDict(obj, NULL) < 0) {
/* Must be out of memory */
assert(PyErr_Occurred() == PyExc_MemoryError);
PyErr_WriteUnraisable(NULL);
/* Clear the dict */
PyDictObject *dict = _PyObject_GetManagedDict(obj);
Py_BEGIN_CRITICAL_SECTION2(dict, obj);
dict = _PyObject_ManagedDictPointer(obj)->dict;
PyInterpreterState *interp = _PyInterpreterState_GET();
PyDictKeysObject *oldkeys = dict->ma_keys;
set_keys(dict, Py_EMPTY_KEYS);
dict->ma_values = NULL;
dictkeys_decref(interp, oldkeys, IS_DICT_SHARED(dict));
STORE_USED(dict, 0);
set_dict_inline_values(obj, NULL);
Py_END_CRITICAL_SECTION2();
}
}
@ -7261,12 +7194,6 @@ _PyDict_DetachFromObject(PyDictObject *mp, PyObject *obj)
PyDictValues *values = copy_values(mp->ma_values);
if (values == NULL) {
/* Out of memory. Clear the dict */
PyInterpreterState *interp = _PyInterpreterState_GET();
PyDictKeysObject *oldkeys = mp->ma_keys;
set_keys(mp, Py_EMPTY_KEYS);
dictkeys_decref(interp, oldkeys, IS_DICT_SHARED(mp));
STORE_USED(mp, 0);
PyErr_NoMemory();
return -1;
}

View File

@ -107,8 +107,6 @@ static void
track_module(PyModuleObject *m)
{
_PyDict_EnablePerThreadRefcounting(m->md_dict);
PyObject_GC_Track(m->md_dict);
_PyObject_SetDeferredRefcount((PyObject *)m);
PyObject_GC_Track(m);
}

View File

@ -2340,10 +2340,6 @@ dummy_func(
DEOPT_IF(ep->me_key != name);
PyObject *old_value = ep->me_value;
DEOPT_IF(old_value == NULL);
/* Ensure dict is GC tracked if it needs to be */
if (!_PyObject_GC_IS_TRACKED(dict) && _PyObject_GC_MAY_BE_TRACKED(PyStackRef_AsPyObjectBorrow(value))) {
_PyObject_GC_TRACK(dict);
}
_PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, PyStackRef_AsPyObjectBorrow(value));
ep->me_value = PyStackRef_AsPyObjectSteal(value);
// old_value should be DECREFed after GC track checking is done, if not, it could raise a segmentation fault,

View File

@ -2914,10 +2914,6 @@
UOP_STAT_INC(uopcode, miss);
JUMP_TO_JUMP_TARGET();
}
/* Ensure dict is GC tracked if it needs to be */
if (!_PyObject_GC_IS_TRACKED(dict) && _PyObject_GC_MAY_BE_TRACKED(PyStackRef_AsPyObjectBorrow(value))) {
_PyObject_GC_TRACK(dict);
}
_PyFrame_SetStackPointer(frame, stack_pointer);
_PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, PyStackRef_AsPyObjectBorrow(value));
stack_pointer = _PyFrame_GetStackPointer(frame);

View File

@ -5,7 +5,7 @@
#include "Python.h"
#include "pycore_ceval.h" // _Py_set_eval_breaker_bit()
#include "pycore_context.h"
#include "pycore_dict.h" // _PyDict_MaybeUntrack()
#include "pycore_dict.h" // _PyInlineValuesSize()
#include "pycore_initconfig.h"
#include "pycore_interp.h" // PyInterpreterState.gc
#include "pycore_object.h"
@ -747,21 +747,6 @@ untrack_tuples(PyGC_Head *head)
}
}
/* Try to untrack all currently tracked dictionaries */
static void
untrack_dicts(PyGC_Head *head)
{
PyGC_Head *next, *gc = GC_NEXT(head);
while (gc != head) {
PyObject *op = FROM_GC(gc);
next = GC_NEXT(gc);
if (PyDict_CheckExact(op)) {
_PyDict_MaybeUntrack(op);
}
gc = next;
}
}
/* Return true if object has a pre-PEP 442 finalization method. */
static int
has_legacy_finalizer(PyObject *op)
@ -1258,15 +1243,10 @@ handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable,
gc_list_merge(resurrected, old_generation);
}
#define UNTRACK_TUPLES 1
#define UNTRACK_DICTS 2
static void
gc_collect_region(PyThreadState *tstate,
PyGC_Head *from,
PyGC_Head *to,
int untrack,
struct gc_collection_stats *stats);
static inline Py_ssize_t
@ -1328,7 +1308,7 @@ gc_collect_young(PyThreadState *tstate,
PyGC_Head survivors;
gc_list_init(&survivors);
gc_collect_region(tstate, young, &survivors, UNTRACK_TUPLES, stats);
gc_collect_region(tstate, young, &survivors, stats);
Py_ssize_t survivor_count = 0;
if (gcstate->visited_space) {
/* objects in visited space have bit set, so we set it here */
@ -1471,7 +1451,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats)
gc_list_validate_space(&increment, gcstate->visited_space);
PyGC_Head survivors;
gc_list_init(&survivors);
gc_collect_region(tstate, &increment, &survivors, UNTRACK_TUPLES, stats);
gc_collect_region(tstate, &increment, &survivors, stats);
gc_list_validate_space(&survivors, gcstate->visited_space);
gc_list_merge(&survivors, visited);
assert(gc_list_is_empty(&increment));
@ -1504,7 +1484,6 @@ gc_collect_full(PyThreadState *tstate,
gc_list_merge(pending, visited);
gc_collect_region(tstate, visited, visited,
UNTRACK_TUPLES | UNTRACK_DICTS,
stats);
gcstate->young.count = 0;
gcstate->old[0].count = 0;
@ -1522,7 +1501,6 @@ static void
gc_collect_region(PyThreadState *tstate,
PyGC_Head *from,
PyGC_Head *to,
int untrack,
struct gc_collection_stats *stats)
{
PyGC_Head unreachable; /* non-problematic unreachable trash */
@ -1536,12 +1514,7 @@ gc_collect_region(PyThreadState *tstate,
gc_list_init(&unreachable);
deduce_unreachable(from, &unreachable);
validate_consistent_old_space(from);
if (untrack & UNTRACK_TUPLES) {
untrack_tuples(from);
}
if (untrack & UNTRACK_DICTS) {
untrack_dicts(from);
}
untrack_tuples(from);
validate_consistent_old_space(to);
if (from != to) {
gc_list_merge(from, to);

View File

@ -3,7 +3,7 @@
#include "pycore_brc.h" // struct _brc_thread_state
#include "pycore_ceval.h" // _Py_set_eval_breaker_bit()
#include "pycore_context.h"
#include "pycore_dict.h" // _PyDict_MaybeUntrack()
#include "pycore_dict.h" // _PyInlineValuesSize()
#include "pycore_freelist.h" // _PyObject_ClearFreeLists()
#include "pycore_initconfig.h"
#include "pycore_interp.h" // PyInterpreterState.gc
@ -493,13 +493,6 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t *area,
return true;
}
}
else if (PyDict_CheckExact(op)) {
_PyDict_MaybeUntrack(op);
if (!_PyObject_GC_IS_TRACKED(op)) {
gc_restore_refs(op);
return true;
}
}
}
// We repurpose ob_tid to compute "gc_refs", the number of external

View File

@ -7435,10 +7435,6 @@
DEOPT_IF(ep->me_key != name, STORE_ATTR);
PyObject *old_value = ep->me_value;
DEOPT_IF(old_value == NULL, STORE_ATTR);
/* Ensure dict is GC tracked if it needs to be */
if (!_PyObject_GC_IS_TRACKED(dict) && _PyObject_GC_MAY_BE_TRACKED(PyStackRef_AsPyObjectBorrow(value))) {
_PyObject_GC_TRACK(dict);
}
_PyFrame_SetStackPointer(frame, stack_pointer);
_PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, PyStackRef_AsPyObjectBorrow(value));
stack_pointer = _PyFrame_GetStackPointer(frame);