diff --git a/conftest.py b/conftest.py index 9104aef0d..16c2c7583 100644 --- a/conftest.py +++ b/conftest.py @@ -318,7 +318,7 @@ class ChromeWrapper(SeleniumWrapper): options = Options() options.add_argument("--headless") options.add_argument("--no-sandbox") - + options.add_argument("--js-flags=--expose-gc") return Chrome(options=options) diff --git a/docs/project/changelog.md b/docs/project/changelog.md index 5c9fa0b49..4d2524aac 100644 --- a/docs/project/changelog.md +++ b/docs/project/changelog.md @@ -66,6 +66,10 @@ substitutions: [#1407](https://github.com/pyodide/pyodide/pull/1407) - {{ API }} Added {any}`pyodide.isPyProxy` to test if an object is a `PyProxy`. [#1456](https://github.com/pyodide/pyodide/pull/1456) +- {{ Enhancement }} `PyProxy` and `PyBuffer` objects are now garbage collected + if the browser supports `FinalizationRegistry`. + [#1306](https://github.com/pyodide/pyodide/pull/1306) + ### Fixed - {{ Fix }} getattr and dir on JsProxy now report consistent results and include all diff --git a/docs/usage/faq.md b/docs/usage/faq.md index 9653ab419..27c65deac 100644 --- a/docs/usage/faq.md +++ b/docs/usage/faq.md @@ -167,3 +167,32 @@ Unpickling data is similar to `eval`. On any public-facing server it is a really bad idea to unpickle any data sent from the client. For sending data from client to server, try some other serialization format like JSON. ``` + +## How can I use a Python function as an event handler and then remove it later? + +Note that the most straight forward way of doing this will not work: +```py +from js import document +def f(*args): + document.querySelector("h1").innerHTML += "(>.<)" + +document.body.addEventListener('click', f) +document.body.removeEventListener('click', f) +``` +This leaks `f` and does not remove the event listener (instead +`removeEventListener` will silently do nothing). + +To do this correctly use :func:`pyodide.create_proxy` as follows: +```py +from js import document +from pyodide import create_proxy +def f(*args): + document.querySelector("h1").innerHTML += "(>.<)" + +proxy_f = create_proxy(f) +document.body.addEventListener('click', proxy_f) +# Store proxy_f in Python then later: +document.body.removeEventListener('click', proxy_f) +proxy_f.destroy() +``` +This also avoids memory leaks. diff --git a/src/core/pyproxy.c b/src/core/pyproxy.c index a1300bf6e..d0cfd0199 100644 --- a/src/core/pyproxy.c +++ b/src/core/pyproxy.c @@ -785,11 +785,10 @@ EM_JS_REF(JsRef, create_once_callable, (PyObject * obj), { if (alreadyCalled) { throw new Error("OnceProxy can only be called once"); } - alreadyCalled = true; try { return Module.callPyObject(obj, ... args); } finally { - _Py_DecRef(obj); + wrapper.destroy(); } } wrapper.destroy = function() @@ -798,8 +797,10 @@ EM_JS_REF(JsRef, create_once_callable, (PyObject * obj), { throw new Error("OnceProxy has already been destroyed"); } alreadyCalled = true; + Module.finalizationRegistry.unregister(wrapper); _Py_DecRef(obj); }; + Module.finalizationRegistry.register(wrapper, obj, wrapper); return Module.hiwire.new_value(wrapper); }); @@ -813,6 +814,9 @@ create_once_callable_py(PyObject* _mod, PyObject* obj) } // clang-format off + +// At some point it would be nice to use FinalizationRegistry with these, but +// it's a bit tricky. EM_JS_REF(JsRef, create_promise_handles, ( PyObject* handle_result, PyObject* handle_exception ), { diff --git a/src/core/pyproxy.js b/src/core/pyproxy.js index d976f2415..b9fe4c105 100644 --- a/src/core/pyproxy.js +++ b/src/core/pyproxy.js @@ -18,6 +18,33 @@ JS_FILE(pyproxy_init_js, () => {0,0; /* Magic, see include_js_file.h */ Module.PyProxies = {}; // clang-format on + if (globalThis.FinalizationRegistry) { + Module.finalizationRegistry = new FinalizationRegistry((ptr) => { + try { + _Py_DecRef(ptr); + } catch (e) { + // I'm not really sure what happens if an error occurs inside of a + // finalizer... + Module.fatal_error(e); + } + }); + // For some unclear reason this code screws up selenium FirefoxDriver. Works + // fine in chrome and when I test it in browser. It seems to be sensitive to + // changes that don't make a difference to the semantics. + // TODO: after v0.17.0 release, fix selenium issues with this code. + // Module.bufferFinalizationRegistry = new FinalizationRegistry((ptr) => { + // try { + // _PyBuffer_Release(ptr); + // _PyMem_Free(ptr); + // } catch (e) { + // Module.fatal_error(e); + // } + // }); + } else { + Module.finalizationRegistry = {register() {}, unregister() {}}; + // Module.bufferFinalizationRegistry = finalizationRegistry; + } + /** * In the case that the Python object is callable, PyProxyClass inherits from * Function so that PyProxy objects can be callable. @@ -39,14 +66,6 @@ JS_FILE(pyproxy_init_js, () => {0,0; /* Magic, see include_js_file.h */ * @private */ Module.pyproxy_new = function(ptrobj) { - // Technically, this leaks memory, since we're holding on to a reference - // to the proxy forever. But we have that problem anyway since we don't - // have a destructor in Javascript to free the Python object. - // _pyproxy_destroy, which is a way for users to manually delete the proxy, - // also deletes the proxy from this set. - if (Module.PyProxies.hasOwnProperty(ptrobj)) { - return Module.PyProxies[ptrobj]; - } let flags = _pyproxy_getflags(ptrobj); let cls = Module.getPyProxyClass(flags); // Reflect.construct calls the constructor of Module.PyProxyClass but sets @@ -58,7 +77,7 @@ JS_FILE(pyproxy_init_js, () => {0,0; /* Magic, see include_js_file.h */ // To make a callable proxy, we must call the Function constructor. // In this case we are effectively subclassing Function. target = Reflect.construct(Function, [], cls); - // Remove undesireable properties added by Function constructor. Note: we + // Remove undesirable properties added by Function constructor. Note: we // can't remove "arguments" or "caller" because they are not configurable // and not writable delete target.length; @@ -72,7 +91,7 @@ JS_FILE(pyproxy_init_js, () => {0,0; /* Magic, see include_js_file.h */ {value : {ptr : ptrobj, type : 'PyProxy'}}); _Py_IncRef(ptrobj); let proxy = new Proxy(target, Module.PyProxyHandlers); - Module.PyProxies[ptrobj] = proxy; + Module.finalizationRegistry.register(proxy, ptrobj, proxy); return proxy; }; @@ -193,11 +212,11 @@ JS_FILE(pyproxy_init_js, () => {0,0; /* Magic, see include_js_file.h */ */ destroy() { let ptrobj = _getPtr(this); - // Maybe the destructor will calls Javascript code that will somehow try + Module.finalizationRegistry.unregister(this); + // Maybe the destructor will call Javascript code that will somehow try // to use this proxy. Mark it deleted before decrementing reference count // just in case! this.$$.ptr = null; - delete Module.PyProxies[ptrobj]; try { _Py_DecRef(ptrobj); } catch (e) { @@ -216,9 +235,10 @@ JS_FILE(pyproxy_init_js, () => {0,0; /* Magic, see include_js_file.h */ * @returns The Javascript object resulting from the conversion. */ toJs(depth = -1) { + let ptrobj = _getPtr(this); let idresult; try { - idresult = _python2js_with_depth(_getPtr(this), depth); + idresult = _python2js_with_depth(ptrobj, depth); } catch (e) { Module.fatal_error(e); } @@ -373,6 +393,47 @@ JS_FILE(pyproxy_init_js, () => {0,0; /* Magic, see include_js_file.h */ }; class TempError extends Error {}; + + /** + * A helper for [Symbol.iterator]. + * + * Because "it is possible for a generator to be garbage collected without + * ever running its finally block", we take extra care to try to ensure that + * we don't leak the iterator. We register it with the finalizationRegistry, + * but if the finally block is executed, we decref the pointer and unregister. + * + * In order to do this, we create the generator with this inner method, + * register the finalizer, and then return it. + * + * Quote from: + * https://hacks.mozilla.org/2015/07/es6-in-depth-generators-continued/ + * + * @private + */ + function* iter_helper(iterptr, token) { + try { + if (iterptr === 0) { + throw new TempError(); + } + let item; + while ((item = __pyproxy_iter_next(iterptr))) { + yield Module.hiwire.pop_value(item); + } + if (_PyErr_Occurred()) { + throw new TempError(); + } + } catch (e) { + if (e instanceof TempError) { + _pythonexc2js(); + } else { + Module.fatal_error(e); + } + } finally { + Module.finalizationRegistry.unregister(token); + _Py_DecRef(iterptr); + } + } + // Controlled by IS_ITERABLE, appears for any object with __iter__ or tp_iter, // unless they are iterators. See: https://docs.python.org/3/c-api/iter.html // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Iteration_protocols @@ -390,28 +451,20 @@ JS_FILE(pyproxy_init_js, () => {0,0; /* Magic, see include_js_file.h */ * * @returns {Iterator} An iterator for the proxied Python object. */ - [Symbol.iterator] : function*() { + [Symbol.iterator] : function() { + let ptrobj = _getPtr(this); + let token = {}; + let iterptr; try { - let iterptr = _PyObject_GetIter(_getPtr(this)); - if (iterptr === 0) { - throw new TempError(); - } - let item; - while ((item = __pyproxy_iter_next(iterptr))) { - yield Module.hiwire.pop_value(item); - } - _Py_DecRef(iterptr); - if (_PyErr_Occurred()) { - throw new TempError(); - } + iterptr = _PyObject_GetIter(ptrobj); } catch (e) { - if (e instanceof TempError) { - _pythonexc2js(); - } else { - Module.fatal_error(e); - } + Module.fatal_error(e); } - } + + let result = iter_helper(iterptr, token); + Module.finalizationRegistry.register(result, iterptr, token); + return result; + }, }; // Controlled by IS_ITERATOR, appears for any object with a __next__ or @@ -875,7 +928,7 @@ JS_FILE(pyproxy_init_js, () => {0,0; /* Magic, see include_js_file.h */ success = true; // clang-format off - return Object.create(Module.PyBuffer.prototype, + let result = Object.create(Module.PyBuffer.prototype, Object.getOwnPropertyDescriptors({ offset, readonly, @@ -892,6 +945,8 @@ JS_FILE(pyproxy_init_js, () => {0,0; /* Magic, see include_js_file.h */ _released : false }) ); + // Module.bufferFinalizationRegistry.register(result, view_ptr, result); + return result; // clang-format on } finally { if (!success) { @@ -1072,6 +1127,7 @@ JS_FILE(pyproxy_init_js, () => {0,0; /* Magic, see include_js_file.h */ if (this._released) { return; } + // Module.bufferFinalizationRegistry.unregister(this); try { _PyBuffer_Release(this._view_ptr); _PyMem_Free(this._view_ptr); diff --git a/src/tests/test_pyodide.py b/src/tests/test_pyodide.py index b603e2c93..c29c05e7f 100644 --- a/src/tests/test_pyodide.py +++ b/src/tests/test_pyodide.py @@ -366,7 +366,7 @@ def test_create_proxy(selenium): assert sys.getrefcount(f) == 3 assert testCallListener() == 7 assert sys.getrefcount(f) == 3 - assert testRemoveListener(f) + assert testRemoveListener(proxy) assert sys.getrefcount(f) == 3 proxy.destroy() assert sys.getrefcount(f) == 2 diff --git a/src/tests/test_pyproxy.py b/src/tests/test_pyproxy.py index 8326f3675..990ef0ee5 100644 --- a/src/tests/test_pyproxy.py +++ b/src/tests/test_pyproxy.py @@ -95,8 +95,8 @@ def test_pyproxy_refcount(selenium): result.push([getRefCount(), 3]) pyodide.runPython(` - window.jsfunc(pyfunc) # re-used existing PyProxy - window.jsfunc(pyfunc) # re-used existing PyProxy + window.jsfunc(pyfunc) # create new PyProxy + window.jsfunc(pyfunc) # create new PyProxy `) // the refcount should be 3 because: @@ -104,7 +104,7 @@ def test_pyproxy_refcount(selenium): // 1. pyfunc exists // 2. one reference from PyProxy to pyfunc is alive // 3. pyfunc is referenced from the sys.getrefcount()-test - result.push([getRefCount(), 3]); + result.push([getRefCount(), 5]); return result; """ ) @@ -461,6 +461,142 @@ def test_pyproxy_mixins2(selenium): ) +def test_pyproxy_gc(selenium): + if selenium.browser != "chrome": + pytest.skip("No gc exposed") + + # Two ways to trigger garbage collection in Chrome: + # 1. options.add_argument("--js-flags=--expose-gc") in conftest, and use + # gc() in javascript. + # 2. selenium.driver.execute_cdp_cmd("HeapProfiler.collectGarbage", {}) + # + # Unclear how to trigger gc in Firefox. Possible to do this by navigating to + # "about:memory" and then triggering a button press to the "GC" button, but + # that seems too annoying. + + selenium.run_js( + """ + window.x = new FinalizationRegistry((val) => { window.val = val; }); + x.register({}, 77); + gc(); + """ + ) + assert selenium.run_js("return window.val;") == 77 + + selenium.run_js( + """ + window.res = new Map(); + + let d = pyodide.runPython(` + from js import res + def get_ref_count(x): + res[x] = sys.getrefcount(d) + return res[x] + + import sys + class Test: + def __del__(self): + res["destructor_ran"] = True + + def get(self): + return 7 + + d = Test() + get_ref_count(0) + d + `); + let get_ref_count = pyodide.globals.get("get_ref_count"); + get_ref_count(1); + d.get(); + get_ref_count(2); + d.get(); + """ + ) + selenium.driver.execute_cdp_cmd("HeapProfiler.collectGarbage", {}) + + selenium.run( + """ + get_ref_count(3) + del d + """ + ) + selenium.driver.execute_cdp_cmd("HeapProfiler.collectGarbage", {}) + a = selenium.run_js("return Array.from(res.entries());") + assert dict(a) == {0: 2, 1: 3, 2: 4, 3: 2, "destructor_ran": True} + + +def test_pyproxy_gc_destroy(selenium): + if selenium.browser != "chrome": + pytest.skip("No gc exposed") + + selenium.run_js( + """ + window.res = new Map(); + let d = pyodide.runPython(` + from js import res + def get_ref_count(x): + res[x] = sys.getrefcount(d) + return res[x] + import sys + class Test: + def __del__(self): + res["destructor_ran"] = True + + def get(self): + return 7 + + d = Test() + get_ref_count(0) + d + `); + let get_ref_count = pyodide.globals.get("get_ref_count"); + get_ref_count(1); + d.get(); + get_ref_count(2); + d.get(); + get_ref_count(3); + d.destroy(); + get_ref_count(4); + gc(); + get_ref_count(5); + """ + ) + selenium.driver.execute_cdp_cmd("HeapProfiler.collectGarbage", {}) + selenium.run( + """ + get_ref_count(6) + del d + """ + ) + a = selenium.run_js("return Array.from(res.entries());") + assert dict(a) == { + 0: 2, + 1: 3, + 2: 4, + 3: 5, + 4: 4, + 5: 4, + 6: 2, + "destructor_ran": True, + } + + +def test_pyproxy_copy(selenium): + result = selenium.run_js( + """ + let result = []; + let a = pyodide.runPython(`d = { 1 : 2}; d`); + let b = pyodide.runPython(`d`); + result.push(a.get(1)); + a.destroy(); + result.push(b.get(1)); + return result; + """ + ) + assert result[0] == 2 + assert result[1] == 2 + + def test_errors(selenium): selenium.run_js( """ diff --git a/src/tests/test_python.py b/src/tests/test_python.py index ac4fc24eb..e8afa4f6f 100644 --- a/src/tests/test_python.py +++ b/src/tests/test_python.py @@ -42,14 +42,6 @@ def test_globals_get_multiple(selenium): selenium.run_js("pyodide.globals.get('v')") -def test_globals_get_same(selenium): - """See #382""" - selenium.run("def func(): return 42") - assert selenium.run_js( - "return pyodide.globals.get('func') == pyodide.globals.get('func')" - ) - - def test_open_url(selenium, httpserver): httpserver.expect_request("/data").respond_with_data( b"HELLO", content_type="text/text", headers={"Access-Control-Allow-Origin": "*"}