From 2d2732b55d57008545e818fc1b5ef4dca5625b9e Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Tue, 22 Oct 2024 15:16:03 +0200 Subject: [PATCH 5/8] Use wasm-gc based call adaptor if available Part of the ongoing quest to support JSPI. The JSPI spec removed its dependence on JS type reflection, and now the plan is for runtimes to ship JSPI while keeping type reflection in stage 3. So we need an alternative way to count the number of parameters of a function. It is possible to count them by repeatedly trying to instantiate a webassembly module with the function as an import of a different type signature. But this is pretty inefficient. Since WebAssembly gc is now stage 4, there is a new option. WebAssembly gc added the `ref.test` instruction which can ask if a funcref has a given type. It's a bit difficult to apply because even our usual assembler the wasm binary toolkit doesn't support this instruction yet. But all JS engines that support JSPI support it. We just have to do some manual work to produce the binary. This code also has to be written carefully to interact properly with memory snapshots. Importantly, no JS initialization code can be called from the C initialization code. For this reason, we make a C function pointer to fill from JS and fill it in a preRun function. Upstream PR: https://github.com/python/cpython/pull/128628 --- .../internal/pycore_emscripten_trampoline.h | 25 +- Include/internal/pycore_runtime.h | 11 +- Python/emscripten_trampoline.c | 231 ++++++++++++++---- Python/pylifecycle.c | 1 - Python/pystate.c | 7 +- 5 files changed, 204 insertions(+), 71 deletions(-) diff --git a/Include/internal/pycore_emscripten_trampoline.h b/Include/internal/pycore_emscripten_trampoline.h index 900d527e48d..5479c9d2605 100644 --- a/Include/internal/pycore_emscripten_trampoline.h +++ b/Include/internal/pycore_emscripten_trampoline.h @@ -27,25 +27,14 @@ #if defined(__EMSCRIPTEN__) && defined(PY_CALL_TRAMPOLINE) -void _Py_EmscriptenTrampoline_Init(_PyRuntimeState *runtime); +void +_Py_EmscriptenTrampoline_Init(_PyRuntimeState *runtime); PyObject* -_PyEM_TrampolineCall_JS(PyCFunctionWithKeywords func, - PyObject* self, - PyObject* args, - PyObject* kw); - -PyObject* -_PyEM_TrampolineCall_Reflection(PyCFunctionWithKeywords func, - PyObject* self, - PyObject* args, - PyObject* kw); - -#define _PyEM_TrampolineCall(meth, self, args, kw) \ - ((_PyRuntime.wasm_type_reflection_available) ? \ - (_PyEM_TrampolineCall_Reflection(meth, self, args, kw)) : \ - (_PyEM_TrampolineCall_JS(meth, self, args, kw))) - +_PyEM_TrampolineCall(PyCFunctionWithKeywords func, + PyObject* self, + PyObject* args, + PyObject* kw); #define _PyCFunction_TrampolineCall(meth, self, args) \ _PyEM_TrampolineCall( \ @@ -56,8 +45,6 @@ _PyEM_TrampolineCall_Reflection(PyCFunctionWithKeywords func, #else // defined(__EMSCRIPTEN__) && defined(PY_CALL_TRAMPOLINE) -#define _Py_EmscriptenTrampoline_Init(runtime) - #define _PyCFunction_TrampolineCall(meth, self, args) \ (meth)((self), (args)) #define _PyCFunctionWithKeywords_TrampolineCall(meth, self, args, kw) \ diff --git a/Include/internal/pycore_runtime.h b/Include/internal/pycore_runtime.h index 8ceb6b72ec1..7195ff2f932 100644 --- a/Include/internal/pycore_runtime.h +++ b/Include/internal/pycore_runtime.h @@ -77,11 +77,6 @@ typedef struct pyruntimestate { /* Is Python fully initialized? Set to 1 by Py_Initialize() */ int initialized; -#if defined(__EMSCRIPTEN__) && defined(PY_CALL_TRAMPOLINE) - /* Choose between trampoline based on type reflection vs based on EM_JS */ - int wasm_type_reflection_available; -#endif - /* Set by Py_FinalizeEx(). Only reset to NULL if Py_Initialize() is called again. @@ -161,6 +156,12 @@ typedef struct pyruntimestate { struct _Py_unicode_runtime_state unicode_state; struct _types_runtime_state types; +#if defined(__EMSCRIPTEN__) && defined(PY_CALL_TRAMPOLINE) + // Used in "Python/emscripten_trampoline.c" to choose between type + // reflection trampoline and EM_JS trampoline. + int (*emscripten_count_args_function)(PyCFunctionWithKeywords func); +#endif + /* All the objects that are shared by the runtime's interpreters. */ struct _Py_static_objects static_objects; struct _Py_cached_objects cached_objects; diff --git a/Python/emscripten_trampoline.c b/Python/emscripten_trampoline.c index 8d29393bd87..e78a94e5e99 100644 --- a/Python/emscripten_trampoline.c +++ b/Python/emscripten_trampoline.c @@ -4,71 +4,214 @@ #include #include "pycore_runtime.h" // _PyRuntime +typedef int (*CountArgsFunc)(PyCFunctionWithKeywords func); -/** - * This is the GoogleChromeLabs approved way to feature detect type-reflection: - * https://github.com/GoogleChromeLabs/wasm-feature-detect/blob/main/src/detectors/type-reflection/index.js - */ -EM_JS(int, _PyEM_detect_type_reflection, (), { - return "Function" in WebAssembly; +// Offset of emscripten_count_args_function in _PyRuntimeState. There's a couple +// of alternatives: +// 1. Just make emscripten_count_args_function a real C global variable instead +// of a field of _PyRuntimeState. This would violate our rule against mutable +// globals. +// 2. #define a preprocessor constant equal to a hard coded number and make a +// _Static_assert(offsetof(_PyRuntimeState, emscripten_count_args_function) +// == OURCONSTANT) This has the disadvantage that we have to update the hard +// coded constant when _PyRuntimeState changes +// +// So putting the mutable constant in _PyRuntime and using a immutable global to +// record the offset so we can access it from JS is probably the best way. +EMSCRIPTEN_KEEPALIVE const int _PyEM_EMSCRIPTEN_COUNT_ARGS_OFFSET = offsetof(_PyRuntimeState, emscripten_count_args_function); + +EM_JS(CountArgsFunc, _PyEM_GetCountArgsPtr, (), { + return Module._PyEM_CountArgsPtr; // initialized below +} +// Binary module for the checks. It has to be done in web assembly because +// clang/llvm have no support yet for the reference types yet. In fact, the wasm +// binary toolkit doesn't yet support the ref.test instruction either. To +// convert the following module to the binary, my approach is to find and +// replace "ref.test $type" -> "drop i32.const n" on the source text. This +// results in the bytes "0x1a, 0x41, n" where we need the bytes "0xfb, 0x14, n" +// so doing a find and replace on the output from "0x1a, 0x41" -> "0xfb, 0x14" +// gets us the output we need. +// +// (module +// (type $type0 (func (param) (result i32))) +// (type $type1 (func (param i32) (result i32))) +// (type $type2 (func (param i32 i32) (result i32))) +// (type $type3 (func (param i32 i32 i32) (result i32))) +// (type $blocktype (func (param i32) (result))) +// (table $funcs (import "e" "t") 0 funcref) +// (export "f" (func $f)) +// (func $f (param $fptr i32) (result i32) +// (local $fref funcref) +// local.get $fptr +// table.get $funcs +// local.tee $fref +// ref.test $type3 +// (block $b (type $blocktype) +// i32.eqz +// br_if $b +// i32.const 3 +// return +// ) +// local.get $fref +// ref.test $type2 +// (block $b (type $blocktype) +// i32.eqz +// br_if $b +// i32.const 2 +// return +// ) +// local.get $fref +// ref.test $type1 +// (block $b (type $blocktype) +// i32.eqz +// br_if $b +// i32.const 1 +// return +// ) +// local.get $fref +// ref.test $type0 +// (block $b (type $blocktype) +// i32.eqz +// br_if $b +// i32.const 0 +// return +// ) +// i32.const -1 +// ) +// ) +addOnPreRun(() => { + // Try to initialize countArgsFunc + const code = new Uint8Array([ + 0x00, 0x61, 0x73, 0x6d, // \0asm magic number + 0x01, 0x00, 0x00, 0x00, // version 1 + 0x01, 0x1b, // Type section, body is 0x1b bytes + 0x05, // 6 entries + 0x60, 0x00, 0x01, 0x7f, // (type $type0 (func (param) (result i32))) + 0x60, 0x01, 0x7f, 0x01, 0x7f, // (type $type1 (func (param i32) (result i32))) + 0x60, 0x02, 0x7f, 0x7f, 0x01, 0x7f, // (type $type2 (func (param i32 i32) (result i32))) + 0x60, 0x03, 0x7f, 0x7f, 0x7f, 0x01, 0x7f, // (type $type3 (func (param i32 i32 i32) (result i32))) + 0x60, 0x01, 0x7f, 0x00, // (type $blocktype (func (param i32) (result))) + 0x02, 0x09, // Import section, 0x9 byte body + 0x01, // 1 import (table $funcs (import "e" "t") 0 funcref) + 0x01, 0x65, // "e" + 0x01, 0x74, // "t" + 0x01, // importing a table + 0x70, // of entry type funcref + 0x00, 0x00, // table limits: no max, min of 0 + 0x03, 0x02, // Function section + 0x01, 0x01, // We're going to define one function of type 1 (func (param i32) (result i32)) + 0x07, 0x05, // export section + 0x01, // 1 export + 0x01, 0x66, // called "f" + 0x00, // a function + 0x00, // at index 0 + + 0x0a, 0x44, // Code section, + 0x01, 0x42, // one entry of length 50 + 0x01, 0x01, 0x70, // one local of type funcref + // Body of the function + 0x20, 0x00, // local.get $fptr + 0x25, 0x00, // table.get $funcs + 0x22, 0x01, // local.tee $fref + 0xfb, 0x14, 0x03, // ref.test $type3 + 0x02, 0x04, // block $b (type $blocktype) + 0x45, // i32.eqz + 0x0d, 0x00, // br_if $b + 0x41, 0x03, // i32.const 3 + 0x0f, // return + 0x0b, // end block + + 0x20, 0x01, // local.get $fref + 0xfb, 0x14, 0x02, // ref.test $type2 + 0x02, 0x04, // block $b (type $blocktype) + 0x45, // i32.eqz + 0x0d, 0x00, // br_if $b + 0x41, 0x02, // i32.const 2 + 0x0f, // return + 0x0b, // end block + + 0x20, 0x01, // local.get $fref + 0xfb, 0x14, 0x01, // ref.test $type1 + 0x02, 0x04, // block $b (type $blocktype) + 0x45, // i32.eqz + 0x0d, 0x00, // br_if $b + 0x41, 0x01, // i32.const 1 + 0x0f, // return + 0x0b, // end block + + 0x20, 0x01, // local.get $fref + 0xfb, 0x14, 0x00, // ref.test $type0 + 0x02, 0x04, // block $b (type $blocktype) + 0x45, // i32.eqz + 0x0d, 0x00, // br_if $b + 0x41, 0x00, // i32.const 0 + 0x0f, // return + 0x0b, // end block + + 0x41, 0x7f, // i32.const -1 + 0x0b // end function + ]); + let ptr = 0; + try { + const mod = new WebAssembly.Module(code); + const inst = new WebAssembly.Instance(mod, { e: { t: wasmTable } }); + ptr = addFunction(inst.exports.f); + } catch (e) { + // If something goes wrong, we'll null out _PyEM_CountFuncParams and fall + // back to the JS trampoline. + } + Module._PyEM_CountArgsPtr = ptr; + const offset = HEAP32[__PyEM_EMSCRIPTEN_COUNT_ARGS_OFFSET / 4]; + HEAP32[(__PyRuntime + offset) / 4] = ptr; }); +); void _Py_EmscriptenTrampoline_Init(_PyRuntimeState *runtime) { - runtime->wasm_type_reflection_available = _PyEM_detect_type_reflection(); + runtime->emscripten_count_args_function = _PyEM_GetCountArgsPtr(); } +// We have to be careful to work correctly with memory snapshots. Even if we are +// loading a memory snapshot, we need to perform the JS initialization work. +// That means we can't call the initialization code from C. Instead, we export +// this function pointer to JS and then fill it in a preRun function which runs +// unconditionally. /** * Backwards compatible trampoline works with all JS runtimes */ EM_JS(PyObject*, _PyEM_TrampolineCall_JS, (PyCFunctionWithKeywords func, PyObject *arg1, PyObject *arg2, PyObject *arg3), { return wasmTable.get(func)(arg1, arg2, arg3); -} -); - -/** - * In runtimes with WebAssembly type reflection, count the number of parameters - * and cast to the appropriate signature - */ -EM_JS(int, _PyEM_CountFuncParams, (PyCFunctionWithKeywords func), { - let n = _PyEM_CountFuncParams.cache.get(func); - if (n !== undefined) { - return n; - } - n = wasmFunctionType(wasmTable.get(func)).parameters.length; - _PyEM_CountFuncParams.cache.set(func, n); - return n; -} -_PyEM_CountFuncParams.cache = new Map(); -) - +}); typedef PyObject* (*zero_arg)(void); typedef PyObject* (*one_arg)(PyObject*); typedef PyObject* (*two_arg)(PyObject*, PyObject*); typedef PyObject* (*three_arg)(PyObject*, PyObject*, PyObject*); - PyObject* -_PyEM_TrampolineCall_Reflection(PyCFunctionWithKeywords func, - PyObject* self, - PyObject* args, - PyObject* kw) +_PyEM_TrampolineCall(PyCFunctionWithKeywords func, + PyObject* self, + PyObject* args, + PyObject* kw) { - switch (_PyEM_CountFuncParams(func)) { - case 0: - return ((zero_arg)func)(); - case 1: - return ((one_arg)func)(self); - case 2: - return ((two_arg)func)(self, args); - case 3: - return ((three_arg)func)(self, args, kw); - default: - PyErr_SetString(PyExc_SystemError, "Handler takes too many arguments"); - return NULL; - } + CountArgsFunc count_args = _PyRuntime.emscripten_count_args_function; + if (count_args == 0) { + return _PyEM_TrampolineCall_JS(func, self, args, kw); + } + switch (count_args(func)) { + case 0: + return ((zero_arg)func)(); + case 1: + return ((one_arg)func)(self); + case 2: + return ((two_arg)func)(self, args); + case 3: + return ((three_arg)func)(self, args, kw); + default: + PyErr_SetString(PyExc_SystemError, "Handler takes too many arguments"); + return NULL; + } } #endif diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index f1510f32790..51eab2a8fee 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -4,7 +4,6 @@ #include "pycore_ceval.h" // _PyEval_FiniGIL() #include "pycore_context.h" // _PyContext_Init() -#include "pycore_emscripten_trampoline.h" // _Py_EmscriptenTrampoline_Init() #include "pycore_exceptions.h" // _PyExc_InitTypes() #include "pycore_dict.h" // _PyDict_Fini() #include "pycore_fileutils.h" // _Py_ResetForceASCII() diff --git a/Python/pystate.c b/Python/pystate.c index a7981bc4877..ab03729266c 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2,9 +2,9 @@ /* Thread and interpreter state structures and their interfaces */ #include "Python.h" -#include "pycore_emscripten_trampoline.h" // _Py_EmscriptenTrampoline_Init() #include "pycore_ceval.h" #include "pycore_code.h" // stats +#include "pycore_emscripten_trampoline.h" // _Py_EmscriptenTrampoline_Init() #include "pycore_dtoa.h" // _dtoa_state_INIT() #include "pycore_frame.h" #include "pycore_initconfig.h" @@ -451,8 +451,11 @@ init_runtime(_PyRuntimeState *runtime, runtime->unicode_state.ids.next_index = unicode_next_index; - runtime->_initialized = 1; +#if defined(__EMSCRIPTEN__) && defined(PY_CALL_TRAMPOLINE) _Py_EmscriptenTrampoline_Init(runtime); +#endif + + runtime->_initialized = 1; } PyStatus -- 2.34.1