diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index 4df1b95bca7..d009f57e47e 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -6,6 +6,7 @@ import collections import decimal import fractions +import gc import io import locale import os @@ -1606,6 +1607,18 @@ def __iter__(self): self.assertIs(cm.exception, exception) + @support.cpython_only + def test_zip_result_gc(self): + # bpo-42536: zip's tuple-reuse speed trick breaks the GC's assumptions + # about what can be untracked. Make sure we re-track result tuples + # whenever we reuse them. + it = zip([[]]) + gc.collect() + # That GC collection probably untracked the recycled internal result + # tuple, which is initialized to (None,). Make sure it's re-tracked when + # it's mutated and returned from __next__: + self.assertTrue(gc.is_tracked(next(it))) + def test_format(self): # Test the basic machinery of the format() builtin. Don't test # the specifics of the various formatters diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py index 6b8596fff6a..b10c07534dd 100644 --- a/Lib/test/test_dict.py +++ b/Lib/test/test_dict.py @@ -1422,6 +1422,25 @@ def items(self): d = CustomReversedDict(pairs) self.assertEqual(pairs[::-1], list(dict(d).items())) + @support.cpython_only + def test_dict_items_result_gc(self): + # bpo-42536: dict.items's tuple-reuse speed trick breaks the GC's + # assumptions about what can be untracked. Make sure we re-track result + # tuples whenever we reuse them. + it = iter({None: []}.items()) + gc.collect() + # That GC collection probably untracked the recycled internal result + # tuple, which is initialized to (None, None). Make sure it's re-tracked + # when it's mutated and returned from __next__: + self.assertTrue(gc.is_tracked(next(it))) + + @support.cpython_only + def test_dict_items_result_gc(self): + # Same as test_dict_items_result_gc above, but reversed. + it = reversed({None: []}.items()) + gc.collect() + self.assertTrue(gc.is_tracked(next(it))) + class CAPITest(unittest.TestCase): diff --git a/Lib/test/test_enumerate.py b/Lib/test/test_enumerate.py index 5785cb46492..906bfc21a26 100644 --- a/Lib/test/test_enumerate.py +++ b/Lib/test/test_enumerate.py @@ -2,6 +2,7 @@ import operator import sys import pickle +import gc from test import support @@ -134,6 +135,18 @@ def test_tuple_reuse(self): self.assertEqual(len(set(map(id, list(enumerate(self.seq))))), len(self.seq)) self.assertEqual(len(set(map(id, enumerate(self.seq)))), min(1,len(self.seq))) + @support.cpython_only + def test_enumerate_result_gc(self): + # bpo-42536: enumerate's tuple-reuse speed trick breaks the GC's + # assumptions about what can be untracked. Make sure we re-track result + # tuples whenever we reuse them. + it = self.enum([[]]) + gc.collect() + # That GC collection probably untracked the recycled internal result + # tuple, which is initialized to (None, None). Make sure it's re-tracked + # when it's mutated and returned from __next__: + self.assertTrue(gc.is_tracked(next(it))) + class MyEnum(enumerate): pass diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py index 702cf082031..71012642849 100644 --- a/Lib/test/test_itertools.py +++ b/Lib/test/test_itertools.py @@ -12,6 +12,8 @@ import sys import struct import threading +import gc + maxsize = support.MAX_Py_ssize_t minsize = -maxsize-1 @@ -1554,6 +1556,51 @@ def test_StopIteration(self): self.assertRaises(StopIteration, next, f(lambda x:x, [])) self.assertRaises(StopIteration, next, f(lambda x:x, StopNow())) + @support.cpython_only + def test_combinations_result_gc(self): + # bpo-42536: combinations's tuple-reuse speed trick breaks the GC's + # assumptions about what can be untracked. Make sure we re-track result + # tuples whenever we reuse them. + it = combinations([None, []], 1) + next(it) + gc.collect() + # That GC collection probably untracked the recycled internal result + # tuple, which has the value (None,). Make sure it's re-tracked when + # it's mutated and returned from __next__: + self.assertTrue(gc.is_tracked(next(it))) + + @support.cpython_only + def test_combinations_with_replacement_result_gc(self): + # Ditto for combinations_with_replacement. + it = combinations_with_replacement([None, []], 1) + next(it) + gc.collect() + self.assertTrue(gc.is_tracked(next(it))) + + @support.cpython_only + def test_permutations_result_gc(self): + # Ditto for permutations. + it = permutations([None, []], 1) + next(it) + gc.collect() + self.assertTrue(gc.is_tracked(next(it))) + + @support.cpython_only + def test_product_result_gc(self): + # Ditto for product. + it = product([None, []]) + next(it) + gc.collect() + self.assertTrue(gc.is_tracked(next(it))) + + @support.cpython_only + def test_zip_longest_result_gc(self): + # Ditto for zip_longest. + it = zip_longest([[]]) + gc.collect() + self.assertTrue(gc.is_tracked(next(it))) + + class TestExamples(unittest.TestCase): def test_accumulate(self): diff --git a/Lib/test/test_ordered_dict.py b/Lib/test/test_ordered_dict.py index fdea44e4d85..9df4efbfffa 100644 --- a/Lib/test/test_ordered_dict.py +++ b/Lib/test/test_ordered_dict.py @@ -697,6 +697,17 @@ def test_merge_operator(self): with self.assertRaises(ValueError): a |= "BAD" + @support.cpython_only + def test_ordered_dict_items_result_gc(self): + # bpo-42536: OrderedDict.items's tuple-reuse speed trick breaks the GC's + # assumptions about what can be untracked. Make sure we re-track result + # tuples whenever we reuse them. + it = iter(self.OrderedDict({None: []}).items()) + gc.collect() + # That GC collection probably untracked the recycled internal result + # tuple, which is initialized to (None, None). Make sure it's re-tracked + # when it's mutated and returned from __next__: + self.assertTrue(gc.is_tracked(next(it))) class PurePythonOrderedDictTests(OrderedDictTests, unittest.TestCase): diff --git a/Misc/NEWS.d/next/Core and Builtins/2020-12-02-20-23-31.bpo-42536.Kx3ZOu.rst b/Misc/NEWS.d/next/Core and Builtins/2020-12-02-20-23-31.bpo-42536.Kx3ZOu.rst new file mode 100644 index 00000000000..6ccacab1f64 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2020-12-02-20-23-31.bpo-42536.Kx3ZOu.rst @@ -0,0 +1,26 @@ +Several built-in and standard library types now ensure that their internal +result tuples are always tracked by the :term:`garbage collector +`: + +- :meth:`collections.OrderedDict.items() ` + +- :meth:`dict.items` + +- :func:`enumerate` + +- :func:`functools.reduce` + +- :func:`itertools.combinations` + +- :func:`itertools.combinations_with_replacement` + +- :func:`itertools.permutations` + +- :func:`itertools.product` + +- :func:`itertools.zip_longest` + +- :func:`zip` + +Previously, they could have become untracked by a prior garbage collection. +Patch by Brandt Bucher. diff --git a/Modules/_functoolsmodule.c b/Modules/_functoolsmodule.c index d158d3bae15..42764a181d2 100644 --- a/Modules/_functoolsmodule.c +++ b/Modules/_functoolsmodule.c @@ -1,4 +1,5 @@ #include "Python.h" +#include "pycore_object.h" // _PyObject_GC_TRACK #include "pycore_pystate.h" // _PyThreadState_GET() #include "pycore_tupleobject.h" #include "structmember.h" // PyMemberDef @@ -673,6 +674,11 @@ functools_reduce(PyObject *self, PyObject *args) if ((result = PyObject_Call(func, args, NULL)) == NULL) { goto Fail; } + // bpo-42536: The GC may have untracked this args tuple. Since we're + // recycling it, make sure it's tracked again: + if (!_PyObject_GC_IS_TRACKED(args)) { + _PyObject_GC_TRACK(args); + } } } diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index 18fcebdf25b..95ef8d79a16 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -2,6 +2,7 @@ #define PY_SSIZE_T_CLEAN #include "Python.h" #include "pycore_tupleobject.h" +#include "pycore_object.h" // _PyObject_GC_TRACK() #include // offsetof() /* Itertools module written and maintained @@ -2245,6 +2246,11 @@ product_next(productobject *lz) lz->result = result; Py_DECREF(old_result); } + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + else if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } /* Now, we've got the only copy so we can update it in-place */ assert (npools==0 || Py_REFCNT(result) == 1); @@ -2568,6 +2574,11 @@ combinations_next(combinationsobject *co) co->result = result; Py_DECREF(old_result); } + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + else if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } /* Now, we've got the only copy so we can update it in-place * CPython's empty tuple is a singleton and cached in * PyTuple's freelist. @@ -2902,6 +2913,11 @@ cwr_next(cwrobject *co) co->result = result; Py_DECREF(old_result); } + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + else if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } /* Now, we've got the only copy so we can update it in-place CPython's empty tuple is a singleton and cached in PyTuple's freelist. */ assert(r == 0 || Py_REFCNT(result) == 1); @@ -3246,6 +3262,11 @@ permutations_next(permutationsobject *po) po->result = result; Py_DECREF(old_result); } + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + else if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } /* Now, we've got the only copy so we can update it in-place */ assert(r == 0 || Py_REFCNT(result) == 1); @@ -4515,6 +4536,11 @@ zip_longest_next(ziplongestobject *lz) PyTuple_SET_ITEM(result, i, item); Py_DECREF(olditem); } + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } } else { result = PyTuple_New(tuplesize); if (result == NULL) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index faee6bc901e..8a056530a45 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -3861,6 +3861,11 @@ dictiter_iternextitem(dictiterobject *di) Py_INCREF(result); Py_DECREF(oldkey); Py_DECREF(oldvalue); + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } } else { result = PyTuple_New(2); @@ -3976,6 +3981,11 @@ dictreviter_iternext(dictiterobject *di) Py_INCREF(result); Py_DECREF(oldkey); Py_DECREF(oldvalue); + // bpo-42536: The GC may have untracked this result tuple. Since + // we're recycling it, make sure it's tracked again: + if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } } else { result = PyTuple_New(2); diff --git a/Objects/enumobject.c b/Objects/enumobject.c index 4a83bb45aa6..bdd0ea5f397 100644 --- a/Objects/enumobject.c +++ b/Objects/enumobject.c @@ -1,6 +1,7 @@ /* enumerate object */ #include "Python.h" +#include "pycore_object.h" // _PyObject_GC_TRACK() #include "clinic/enumobject.c.h" @@ -130,6 +131,11 @@ enum_next_long(enumobject *en, PyObject* next_item) PyTuple_SET_ITEM(result, 1, next_item); Py_DECREF(old_index); Py_DECREF(old_item); + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } return result; } result = PyTuple_New(2); @@ -175,6 +181,11 @@ enum_next(enumobject *en) PyTuple_SET_ITEM(result, 1, next_item); Py_DECREF(old_index); Py_DECREF(old_item); + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } return result; } result = PyTuple_New(2); diff --git a/Objects/odictobject.c b/Objects/odictobject.c index d5bf499575d..b9f2169a72a 100644 --- a/Objects/odictobject.c +++ b/Objects/odictobject.c @@ -1817,6 +1817,11 @@ odictiter_iternext(odictiterobject *di) Py_INCREF(result); Py_DECREF(PyTuple_GET_ITEM(result, 0)); /* borrowed */ Py_DECREF(PyTuple_GET_ITEM(result, 1)); /* borrowed */ + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } } else { result = PyTuple_New(2); diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 199b09c4d8c..614012df9b1 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -2619,6 +2619,11 @@ zip_next(zipobject *lz) PyTuple_SET_ITEM(result, i, item); Py_DECREF(olditem); } + // bpo-42536: The GC may have untracked this result tuple. Since we're + // recycling it, make sure it's tracked again: + if (!_PyObject_GC_IS_TRACKED(result)) { + _PyObject_GC_TRACK(result); + } } else { result = PyTuple_New(tuplesize); if (result == NULL)