Use FinalizationRegistry for garbage collection when available (#1306)

This commit is contained in:
Hood Chatham 2021-04-18 02:23:18 -04:00 committed by GitHub
parent 47ea3d1a68
commit 62829f45db
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 269 additions and 48 deletions

View File

@ -318,7 +318,7 @@ class ChromeWrapper(SeleniumWrapper):
options = Options()
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--js-flags=--expose-gc")
return Chrome(options=options)

View File

@ -66,6 +66,10 @@ substitutions:
[#1407](https://github.com/pyodide/pyodide/pull/1407)
- {{ API }} Added {any}`pyodide.isPyProxy` to test if an object is a `PyProxy`.
[#1456](https://github.com/pyodide/pyodide/pull/1456)
- {{ Enhancement }} `PyProxy` and `PyBuffer` objects are now garbage collected
if the browser supports `FinalizationRegistry`.
[#1306](https://github.com/pyodide/pyodide/pull/1306)
### Fixed
- {{ Fix }} getattr and dir on JsProxy now report consistent results and include all

View File

@ -167,3 +167,32 @@ Unpickling data is similar to `eval`. On any public-facing server it is a really
bad idea to unpickle any data sent from the client. For sending data from client
to server, try some other serialization format like JSON.
```
## How can I use a Python function as an event handler and then remove it later?
Note that the most straight forward way of doing this will not work:
```py
from js import document
def f(*args):
document.querySelector("h1").innerHTML += "(>.<)"
document.body.addEventListener('click', f)
document.body.removeEventListener('click', f)
```
This leaks `f` and does not remove the event listener (instead
`removeEventListener` will silently do nothing).
To do this correctly use :func:`pyodide.create_proxy` as follows:
```py
from js import document
from pyodide import create_proxy
def f(*args):
document.querySelector("h1").innerHTML += "(>.<)"
proxy_f = create_proxy(f)
document.body.addEventListener('click', proxy_f)
# Store proxy_f in Python then later:
document.body.removeEventListener('click', proxy_f)
proxy_f.destroy()
```
This also avoids memory leaks.

View File

@ -785,11 +785,10 @@ EM_JS_REF(JsRef, create_once_callable, (PyObject * obj), {
if (alreadyCalled) {
throw new Error("OnceProxy can only be called once");
}
alreadyCalled = true;
try {
return Module.callPyObject(obj, ... args);
} finally {
_Py_DecRef(obj);
wrapper.destroy();
}
}
wrapper.destroy = function()
@ -798,8 +797,10 @@ EM_JS_REF(JsRef, create_once_callable, (PyObject * obj), {
throw new Error("OnceProxy has already been destroyed");
}
alreadyCalled = true;
Module.finalizationRegistry.unregister(wrapper);
_Py_DecRef(obj);
};
Module.finalizationRegistry.register(wrapper, obj, wrapper);
return Module.hiwire.new_value(wrapper);
});
@ -813,6 +814,9 @@ create_once_callable_py(PyObject* _mod, PyObject* obj)
}
// clang-format off
// At some point it would be nice to use FinalizationRegistry with these, but
// it's a bit tricky.
EM_JS_REF(JsRef, create_promise_handles, (
PyObject* handle_result, PyObject* handle_exception
), {

View File

@ -18,6 +18,33 @@ JS_FILE(pyproxy_init_js, () => {0,0; /* Magic, see include_js_file.h */
Module.PyProxies = {};
// clang-format on
if (globalThis.FinalizationRegistry) {
Module.finalizationRegistry = new FinalizationRegistry((ptr) => {
try {
_Py_DecRef(ptr);
} catch (e) {
// I'm not really sure what happens if an error occurs inside of a
// finalizer...
Module.fatal_error(e);
}
});
// For some unclear reason this code screws up selenium FirefoxDriver. Works
// fine in chrome and when I test it in browser. It seems to be sensitive to
// changes that don't make a difference to the semantics.
// TODO: after v0.17.0 release, fix selenium issues with this code.
// Module.bufferFinalizationRegistry = new FinalizationRegistry((ptr) => {
// try {
// _PyBuffer_Release(ptr);
// _PyMem_Free(ptr);
// } catch (e) {
// Module.fatal_error(e);
// }
// });
} else {
Module.finalizationRegistry = {register() {}, unregister() {}};
// Module.bufferFinalizationRegistry = finalizationRegistry;
}
/**
* In the case that the Python object is callable, PyProxyClass inherits from
* Function so that PyProxy objects can be callable.
@ -39,14 +66,6 @@ JS_FILE(pyproxy_init_js, () => {0,0; /* Magic, see include_js_file.h */
* @private
*/
Module.pyproxy_new = function(ptrobj) {
// Technically, this leaks memory, since we're holding on to a reference
// to the proxy forever. But we have that problem anyway since we don't
// have a destructor in Javascript to free the Python object.
// _pyproxy_destroy, which is a way for users to manually delete the proxy,
// also deletes the proxy from this set.
if (Module.PyProxies.hasOwnProperty(ptrobj)) {
return Module.PyProxies[ptrobj];
}
let flags = _pyproxy_getflags(ptrobj);
let cls = Module.getPyProxyClass(flags);
// Reflect.construct calls the constructor of Module.PyProxyClass but sets
@ -58,7 +77,7 @@ JS_FILE(pyproxy_init_js, () => {0,0; /* Magic, see include_js_file.h */
// To make a callable proxy, we must call the Function constructor.
// In this case we are effectively subclassing Function.
target = Reflect.construct(Function, [], cls);
// Remove undesireable properties added by Function constructor. Note: we
// Remove undesirable properties added by Function constructor. Note: we
// can't remove "arguments" or "caller" because they are not configurable
// and not writable
delete target.length;
@ -72,7 +91,7 @@ JS_FILE(pyproxy_init_js, () => {0,0; /* Magic, see include_js_file.h */
{value : {ptr : ptrobj, type : 'PyProxy'}});
_Py_IncRef(ptrobj);
let proxy = new Proxy(target, Module.PyProxyHandlers);
Module.PyProxies[ptrobj] = proxy;
Module.finalizationRegistry.register(proxy, ptrobj, proxy);
return proxy;
};
@ -193,11 +212,11 @@ JS_FILE(pyproxy_init_js, () => {0,0; /* Magic, see include_js_file.h */
*/
destroy() {
let ptrobj = _getPtr(this);
// Maybe the destructor will calls Javascript code that will somehow try
Module.finalizationRegistry.unregister(this);
// Maybe the destructor will call Javascript code that will somehow try
// to use this proxy. Mark it deleted before decrementing reference count
// just in case!
this.$$.ptr = null;
delete Module.PyProxies[ptrobj];
try {
_Py_DecRef(ptrobj);
} catch (e) {
@ -216,9 +235,10 @@ JS_FILE(pyproxy_init_js, () => {0,0; /* Magic, see include_js_file.h */
* @returns The Javascript object resulting from the conversion.
*/
toJs(depth = -1) {
let ptrobj = _getPtr(this);
let idresult;
try {
idresult = _python2js_with_depth(_getPtr(this), depth);
idresult = _python2js_with_depth(ptrobj, depth);
} catch (e) {
Module.fatal_error(e);
}
@ -373,6 +393,47 @@ JS_FILE(pyproxy_init_js, () => {0,0; /* Magic, see include_js_file.h */
};
class TempError extends Error {};
/**
* A helper for [Symbol.iterator].
*
* Because "it is possible for a generator to be garbage collected without
* ever running its finally block", we take extra care to try to ensure that
* we don't leak the iterator. We register it with the finalizationRegistry,
* but if the finally block is executed, we decref the pointer and unregister.
*
* In order to do this, we create the generator with this inner method,
* register the finalizer, and then return it.
*
* Quote from:
* https://hacks.mozilla.org/2015/07/es6-in-depth-generators-continued/
*
* @private
*/
function* iter_helper(iterptr, token) {
try {
if (iterptr === 0) {
throw new TempError();
}
let item;
while ((item = __pyproxy_iter_next(iterptr))) {
yield Module.hiwire.pop_value(item);
}
if (_PyErr_Occurred()) {
throw new TempError();
}
} catch (e) {
if (e instanceof TempError) {
_pythonexc2js();
} else {
Module.fatal_error(e);
}
} finally {
Module.finalizationRegistry.unregister(token);
_Py_DecRef(iterptr);
}
}
// Controlled by IS_ITERABLE, appears for any object with __iter__ or tp_iter,
// unless they are iterators. See: https://docs.python.org/3/c-api/iter.html
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Iteration_protocols
@ -390,28 +451,20 @@ JS_FILE(pyproxy_init_js, () => {0,0; /* Magic, see include_js_file.h */
*
* @returns {Iterator} An iterator for the proxied Python object.
*/
[Symbol.iterator] : function*() {
[Symbol.iterator] : function() {
let ptrobj = _getPtr(this);
let token = {};
let iterptr;
try {
let iterptr = _PyObject_GetIter(_getPtr(this));
if (iterptr === 0) {
throw new TempError();
}
let item;
while ((item = __pyproxy_iter_next(iterptr))) {
yield Module.hiwire.pop_value(item);
}
_Py_DecRef(iterptr);
if (_PyErr_Occurred()) {
throw new TempError();
}
iterptr = _PyObject_GetIter(ptrobj);
} catch (e) {
if (e instanceof TempError) {
_pythonexc2js();
} else {
Module.fatal_error(e);
}
}
}
let result = iter_helper(iterptr, token);
Module.finalizationRegistry.register(result, iterptr, token);
return result;
},
};
// Controlled by IS_ITERATOR, appears for any object with a __next__ or
@ -875,7 +928,7 @@ JS_FILE(pyproxy_init_js, () => {0,0; /* Magic, see include_js_file.h */
success = true;
// clang-format off
return Object.create(Module.PyBuffer.prototype,
let result = Object.create(Module.PyBuffer.prototype,
Object.getOwnPropertyDescriptors({
offset,
readonly,
@ -892,6 +945,8 @@ JS_FILE(pyproxy_init_js, () => {0,0; /* Magic, see include_js_file.h */
_released : false
})
);
// Module.bufferFinalizationRegistry.register(result, view_ptr, result);
return result;
// clang-format on
} finally {
if (!success) {
@ -1072,6 +1127,7 @@ JS_FILE(pyproxy_init_js, () => {0,0; /* Magic, see include_js_file.h */
if (this._released) {
return;
}
// Module.bufferFinalizationRegistry.unregister(this);
try {
_PyBuffer_Release(this._view_ptr);
_PyMem_Free(this._view_ptr);

View File

@ -366,7 +366,7 @@ def test_create_proxy(selenium):
assert sys.getrefcount(f) == 3
assert testCallListener() == 7
assert sys.getrefcount(f) == 3
assert testRemoveListener(f)
assert testRemoveListener(proxy)
assert sys.getrefcount(f) == 3
proxy.destroy()
assert sys.getrefcount(f) == 2

View File

@ -95,8 +95,8 @@ def test_pyproxy_refcount(selenium):
result.push([getRefCount(), 3])
pyodide.runPython(`
window.jsfunc(pyfunc) # re-used existing PyProxy
window.jsfunc(pyfunc) # re-used existing PyProxy
window.jsfunc(pyfunc) # create new PyProxy
window.jsfunc(pyfunc) # create new PyProxy
`)
// the refcount should be 3 because:
@ -104,7 +104,7 @@ def test_pyproxy_refcount(selenium):
// 1. pyfunc exists
// 2. one reference from PyProxy to pyfunc is alive
// 3. pyfunc is referenced from the sys.getrefcount()-test
result.push([getRefCount(), 3]);
result.push([getRefCount(), 5]);
return result;
"""
)
@ -461,6 +461,142 @@ def test_pyproxy_mixins2(selenium):
)
def test_pyproxy_gc(selenium):
if selenium.browser != "chrome":
pytest.skip("No gc exposed")
# Two ways to trigger garbage collection in Chrome:
# 1. options.add_argument("--js-flags=--expose-gc") in conftest, and use
# gc() in javascript.
# 2. selenium.driver.execute_cdp_cmd("HeapProfiler.collectGarbage", {})
#
# Unclear how to trigger gc in Firefox. Possible to do this by navigating to
# "about:memory" and then triggering a button press to the "GC" button, but
# that seems too annoying.
selenium.run_js(
"""
window.x = new FinalizationRegistry((val) => { window.val = val; });
x.register({}, 77);
gc();
"""
)
assert selenium.run_js("return window.val;") == 77
selenium.run_js(
"""
window.res = new Map();
let d = pyodide.runPython(`
from js import res
def get_ref_count(x):
res[x] = sys.getrefcount(d)
return res[x]
import sys
class Test:
def __del__(self):
res["destructor_ran"] = True
def get(self):
return 7
d = Test()
get_ref_count(0)
d
`);
let get_ref_count = pyodide.globals.get("get_ref_count");
get_ref_count(1);
d.get();
get_ref_count(2);
d.get();
"""
)
selenium.driver.execute_cdp_cmd("HeapProfiler.collectGarbage", {})
selenium.run(
"""
get_ref_count(3)
del d
"""
)
selenium.driver.execute_cdp_cmd("HeapProfiler.collectGarbage", {})
a = selenium.run_js("return Array.from(res.entries());")
assert dict(a) == {0: 2, 1: 3, 2: 4, 3: 2, "destructor_ran": True}
def test_pyproxy_gc_destroy(selenium):
if selenium.browser != "chrome":
pytest.skip("No gc exposed")
selenium.run_js(
"""
window.res = new Map();
let d = pyodide.runPython(`
from js import res
def get_ref_count(x):
res[x] = sys.getrefcount(d)
return res[x]
import sys
class Test:
def __del__(self):
res["destructor_ran"] = True
def get(self):
return 7
d = Test()
get_ref_count(0)
d
`);
let get_ref_count = pyodide.globals.get("get_ref_count");
get_ref_count(1);
d.get();
get_ref_count(2);
d.get();
get_ref_count(3);
d.destroy();
get_ref_count(4);
gc();
get_ref_count(5);
"""
)
selenium.driver.execute_cdp_cmd("HeapProfiler.collectGarbage", {})
selenium.run(
"""
get_ref_count(6)
del d
"""
)
a = selenium.run_js("return Array.from(res.entries());")
assert dict(a) == {
0: 2,
1: 3,
2: 4,
3: 5,
4: 4,
5: 4,
6: 2,
"destructor_ran": True,
}
def test_pyproxy_copy(selenium):
result = selenium.run_js(
"""
let result = [];
let a = pyodide.runPython(`d = { 1 : 2}; d`);
let b = pyodide.runPython(`d`);
result.push(a.get(1));
a.destroy();
result.push(b.get(1));
return result;
"""
)
assert result[0] == 2
assert result[1] == 2
def test_errors(selenium):
selenium.run_js(
"""

View File

@ -42,14 +42,6 @@ def test_globals_get_multiple(selenium):
selenium.run_js("pyodide.globals.get('v')")
def test_globals_get_same(selenium):
"""See #382"""
selenium.run("def func(): return 42")
assert selenium.run_js(
"return pyodide.globals.get('func') == pyodide.globals.get('func')"
)
def test_open_url(selenium, httpserver):
httpserver.expect_request("/data").respond_with_data(
b"HELLO", content_type="text/text", headers={"Access-Control-Allow-Origin": "*"}