diff --git a/docs/project/changelog.md b/docs/project/changelog.md index 4e89e27e7..9ce5445e1 100644 --- a/docs/project/changelog.md +++ b/docs/project/changelog.md @@ -32,6 +32,10 @@ substitutions: arguments and return values are automatically destroyed when the function is finished. {pr}`1573` +- {{Enhancement}} Added {any}`JsProxy.to_string`, {any}`JsProxy.to_bytes`, and + {any}`JsProxy.to_memoryview` to allow for conversion of `TypedArray` to + standard Python types without unneeded copies. {pr}`1864` + - {{Fix}} It is now possible to destroy borrowed attribute `PyProxy` of a `PyProxy` (as introduced by {pr}`1636`) before destroying the root `PyProxy`. {pr}`1854` diff --git a/src/core/docstring.c b/src/core/docstring.c index f0582a98c..c5362b8fe 100644 --- a/src/core/docstring.c +++ b/src/core/docstring.c @@ -14,7 +14,14 @@ set_method_docstring(PyMethodDef* method, PyObject* parent) PyObject* py_result = NULL; py_method = PyObject_GetAttrString(parent, method->ml_name); - FAIL_IF_NULL(py_method); + if (py_method == NULL) { + PyErr_Format(PyExc_AttributeError, + "set_method_docstring failed for method %s, documentation " + "stub '%.50s' has no such attribute.", + method->ml_name, + Py_TYPE(parent)->tp_name); + FAIL(); + } py_result = _PyObject_CallMethodIdOneArg( py_docstring_mod, &PyId_get_cmeth_docstring, py_method); diff --git a/src/core/hiwire.c b/src/core/hiwire.c index e5b833a80..24774af59 100644 --- a/src/core/hiwire.c +++ b/src/core/hiwire.c @@ -772,12 +772,14 @@ EM_JS_NUM(errcode, hiwire_assign_from_ptr, (JsRef idobj, void* ptr), { // clang-format off EM_JS_NUM( errcode, -hiwire_get_buffer_datatype, -(JsRef idobj, char** format_ptr, Py_ssize_t* size_ptr, bool* checked_ptr), +hiwire_get_buffer_info, +(JsRef idobj, Py_ssize_t* byteLength_ptr, char** format_ptr, Py_ssize_t* size_ptr, bool* checked_ptr), { let jsobj = Module.hiwire.get_value(idobj); + let byteLength = jsobj.byteLength; let [format_utf8, size, checked] = Module.get_buffer_datatype(jsobj); // Store results into arguments + DEREF_U32(byteLength_ptr, 0) = byteLength; DEREF_U32(format_ptr, 0) = format_utf8; DEREF_U32(size_ptr, 0) = size; DEREF_U8(checked_ptr, 0) = checked; diff --git a/src/core/hiwire.h b/src/core/hiwire.h index f57805e4f..f3f0f8857 100644 --- a/src/core/hiwire.h +++ b/src/core/hiwire.h @@ -562,10 +562,11 @@ hiwire_assign_from_ptr(JsRef idobj, void* ptr); * Get a data type identifier for a given typedarray. */ errcode -hiwire_get_buffer_datatype(JsRef idobj, - char** format_ptr, - Py_ssize_t* size_ptr, - bool* check_assignments); +hiwire_get_buffer_info(JsRef idobj, + Py_ssize_t* byteLength_ptr, + char** format_ptr, + Py_ssize_t* size_ptr, + bool* check_assignments); /** * Get a subarray from a TypedArray diff --git a/src/core/js2python.c b/src/core/js2python.c index f1b27cc21..2fdb1c5e0 100644 --- a/src/core/js2python.c +++ b/src/core/js2python.c @@ -346,7 +346,7 @@ EM_JS_NUM(errcode, js2python_init, (), { } if (toStringTag === "[object ArrayBuffer]" || ArrayBuffer.isView(value)){ let [format_utf8, itemsize] = Module.get_buffer_datatype(value); - return _JsBuffer_CloneIntoPython(id, value.byteLength, format_utf8, itemsize); + return _JsBuffer_CopyIntoMemoryView(id, value.byteLength, format_utf8, itemsize); } // clang-format on return _JsProxy_create(id); diff --git a/src/core/jsproxy.c b/src/core/jsproxy.c index 1e59ba171..2297a2209 100644 --- a/src/core/jsproxy.c +++ b/src/core/jsproxy.c @@ -338,7 +338,7 @@ JsProxy_object_entries(PyObject* o, PyObject* _args) return result; } -PyMethodDef JsProxy_object_entries_MethodDef = { +static PyMethodDef JsProxy_object_entries_MethodDef = { "object_entries", (PyCFunction)JsProxy_object_entries, METH_NOARGS, @@ -361,7 +361,7 @@ JsProxy_object_keys(PyObject* o, PyObject* _args) return result; } -PyMethodDef JsProxy_object_keys_MethodDef = { +static PyMethodDef JsProxy_object_keys_MethodDef = { "object_keys", (PyCFunction)JsProxy_object_keys, METH_NOARGS, @@ -384,7 +384,7 @@ JsProxy_object_values(PyObject* o, PyObject* _args) return result; } -PyMethodDef JsProxy_object_values_MethodDef = { +static PyMethodDef JsProxy_object_values_MethodDef = { "object_values", (PyCFunction)JsProxy_object_values, METH_NOARGS, @@ -682,7 +682,7 @@ finally: return result; } -PyMethodDef JsProxy_Dir_MethodDef = { +static PyMethodDef JsProxy_Dir_MethodDef = { "__dir__", (PyCFunction)JsProxy_Dir, METH_NOARGS, @@ -705,7 +705,7 @@ JsProxy_toPy(PyObject* self, return js2python_convert(GET_JSREF(self), depth); } -PyMethodDef JsProxy_toPy_MethodDef = { +static PyMethodDef JsProxy_toPy_MethodDef = { "to_py", (PyCFunction)JsProxy_toPy, METH_FASTCALL | METH_KEYWORDS, @@ -857,7 +857,7 @@ finally: return result; } -PyMethodDef JsProxy_then_MethodDef = { +static PyMethodDef JsProxy_then_MethodDef = { "then", (PyCFunction)JsProxy_then, METH_VARARGS | METH_KEYWORDS, @@ -894,7 +894,7 @@ finally: return result; } -PyMethodDef JsProxy_catch_MethodDef = { +static PyMethodDef JsProxy_catch_MethodDef = { "catch", (PyCFunction)JsProxy_catch, METH_O, @@ -936,7 +936,7 @@ finally: return result; } -PyMethodDef JsProxy_finally_MethodDef = { +static PyMethodDef JsProxy_finally_MethodDef = { "finally_", (PyCFunction)JsProxy_finally, METH_O, @@ -1285,7 +1285,7 @@ finally: } // clang-format off -PyMethodDef JsMethod_Construct_MethodDef = { +static PyMethodDef JsMethod_Construct_MethodDef = { "new", (PyCFunction)JsMethod_Construct, METH_FASTCALL | METH_KEYWORDS @@ -1386,8 +1386,8 @@ static PyTypeObject BufferType = { }; /** - * This is a helper function to do error checking for JsBuffer_AssignToPyBuffer - * and JsBuffer_AssignPyBuffer. + * This is a helper function to do error checking for JsBuffer_assign_to + * and JsBuffer_assign. * * self -- The JavaScript buffer involved * view -- The Py_buffer view involved @@ -1438,7 +1438,7 @@ check_buffer_compatibility(JsProxy* self, Py_buffer view, bool safe, bool dir) * buffer -- A PyObject whcih supports the buffer protocol and is writable. */ static PyObject* -JsBuffer_AssignToPyBuffer(PyObject* obj, PyObject* target) +JsBuffer_assign_to(PyObject* obj, PyObject* target) { JsProxy* self = (JsProxy*)obj; bool success = false; @@ -1460,13 +1460,19 @@ finally: return NULL; } +static PyMethodDef JsBuffer_assign_to_MethodDef = { + "assign_to", + (PyCFunction)JsBuffer_assign_to, + METH_O, +}; + /** * Assign from a py buffer to a js buffer * obj -- A JsBuffer (meaning a PyProxy of an ArrayBuffer or an ArrayBufferView) * buffer -- A PyObject which supports the buffer protocol (can be read only) */ static PyObject* -JsBuffer_AssignPyBuffer(PyObject* obj, PyObject* source) +JsBuffer_assign(PyObject* obj, PyObject* source) { JsProxy* self = (JsProxy*)obj; bool success = false; @@ -1487,9 +1493,15 @@ finally: return NULL; } +static PyMethodDef JsBuffer_assign_MethodDef = { + "assign", + (PyCFunction)JsBuffer_assign, + METH_O, +}; + /** - * Used from js2python for to_py. Make a new Python buffer with the same data as - * jsbuffer. + * Used from js2python for to_py and by to_memoryview. Make a new Python buffer + * with the same data as jsbuffer. * * All other arguments are calculated from jsbuffer, but it's more convenient to * calculate them in JavaScript and pass them as arguments than to acquire them @@ -1501,10 +1513,10 @@ finally: * itemsize - the appropriate itemsize for jsbuffer, from get_buffer_datatype */ PyObject* -JsBuffer_CloneIntoPython(JsRef jsbuffer, - Py_ssize_t byteLength, - char* format, - Py_ssize_t itemsize) +JsBuffer_CopyIntoMemoryView(JsRef jsbuffer, + Py_ssize_t byteLength, + char* format, + Py_ssize_t itemsize) { bool success = false; Buffer* buffer = NULL; @@ -1526,19 +1538,146 @@ finally: return result; } +/** + * Used by to_bytes. Make a new bytes object and copy the data from the + * ArrayBuffer into it. + */ +PyObject* +JsBuffer_CopyIntoBytes(JsRef jsbuffer, Py_ssize_t byteLength) +{ + bool success = false; + + PyObject* result = PyBytes_FromStringAndSize(NULL, byteLength); + FAIL_IF_NULL(result); + char* data = PyBytes_AS_STRING(result); + FAIL_IF_MINUS_ONE(hiwire_assign_to_ptr(jsbuffer, data)); + success = true; +finally: + if (!success) { + Py_CLEAR(result); + } + return result; +} + +/** + * Used by JsBuffer_ToString. Decode the ArrayBuffer into a Javascript string + * using a TextDecoder with the given encoding. I have found no evidence that + * the encoding argument ever matters... + * + * If a decoding error occurs, return 0 without setting error flag so we can + * replace with a UnicodeDecodeError + */ +// clang-format off +EM_JS_REF(JsRef, +JsBuffer_DecodeString_js, +(JsRef jsbuffer_id, char* encoding), +{ + let buffer = Module.hiwire.get_value(jsbuffer_id); + let encoding_js; + if (encoding) { + encoding_js = UTF8ToString(encoding); + } + let decoder = new TextDecoder(encoding_js, {fatal : true}); + let res; + try { + res = decoder.decode(buffer); + } catch(e){ + if(e instanceof TypeError) { + // Decoding error + return 0; + } + throw e; + } + return Module.hiwire.new_value(res); +}) +// clang-format on + +/** + * Decode the ArrayBuffer into a PyUnicode object. + */ +PyObject* +JsBuffer_ToString(JsRef jsbuffer, char* encoding) +{ + JsRef jsresult = NULL; + PyObject* result = NULL; + + jsresult = JsBuffer_DecodeString_js(jsbuffer, encoding); + if (jsresult == NULL && !PyErr_Occurred()) { + PyErr_Format(PyExc_ValueError, + "Failed to decode Javascript TypedArray as %s", + encoding ? encoding : "utf8"); + } + FAIL_IF_NULL(jsresult); + result = js2python(jsresult); + FAIL_IF_NULL(result); + +finally: + hiwire_CLEAR(jsresult); + return result; +} + +static PyObject* +JsBuffer_tomemoryview(PyObject* buffer) +{ + JsProxy* self = (JsProxy*)buffer; + return JsBuffer_CopyIntoMemoryView( + self->js, self->byteLength, self->format, self->itemsize); +} + +static PyMethodDef JsBuffer_tomemoryview_MethodDef = { + "to_memoryview", + (PyCFunction)JsBuffer_tomemoryview, + METH_NOARGS, +}; + +static PyObject* +JsBuffer_tobytes(PyObject* buffer) +{ + JsProxy* self = (JsProxy*)buffer; + return JsBuffer_CopyIntoBytes(self->js, self->byteLength); +} + +static PyMethodDef JsBuffer_tobytes_MethodDef = { + "to_bytes", + (PyCFunction)JsBuffer_tobytes, + METH_NOARGS, +}; + +static PyObject* +JsBuffer_tostring(PyObject* self, + PyObject* const* args, + Py_ssize_t nargs, + PyObject* kwnames) +{ + static const char* const _keywords[] = { "encoding", 0 }; + static struct _PyArg_Parser _parser = { "|s:to_string", _keywords, 0 }; + char* encoding = NULL; + if (!_PyArg_ParseStackAndKeywords( + args, nargs, kwnames, &_parser, &encoding)) { + return NULL; + } + return JsBuffer_ToString(JsProxy_REF(self), encoding); +} + +static PyMethodDef JsBuffer_tostring_MethodDef = { + "to_string", + (PyCFunction)JsBuffer_tostring, + METH_FASTCALL | METH_KEYWORDS, +}; + int JsBuffer_cinit(PyObject* obj) { bool success = false; JsProxy* self = (JsProxy*)obj; // TODO: should logic here be any different if we're on wasm heap? - self->byteLength = hiwire_get_byteLength(JsProxy_REF(self)); // format string is borrowed from hiwire_get_buffer_datatype, DO NOT // DEALLOCATE! - hiwire_get_buffer_datatype(JsProxy_REF(self), - &self->format, - &self->itemsize, - &self->check_assignments); + hiwire_get_buffer_info(JsProxy_REF(self), + &self->byteLength, + &self->format, + &self->itemsize, + &self->check_assignments); if (self->format == NULL) { char* typename = hiwire_constructor_name(JsProxy_REF(self)); PyErr_Format( @@ -1571,7 +1710,7 @@ JsProxy_create_subtype(int flags) // Make sure these stack allocations are large enough to fit! PyType_Slot slots[20]; int cur_slot = 0; - PyMethodDef methods[10]; + PyMethodDef methods[50]; int cur_method = 0; PyMemberDef members[5]; int cur_member = 0; @@ -1654,18 +1793,11 @@ JsProxy_create_subtype(int flags) .pfunc = (void*)JsProxy_ass_subscript_array }; } if (flags & IS_BUFFER) { - methods[cur_method++] = (PyMethodDef){ - "assign", - (PyCFunction)JsBuffer_AssignPyBuffer, - METH_O, - PyDoc_STR("Copies a buffer into the TypedArray "), - }; - methods[cur_method++] = (PyMethodDef){ - "assign_to", - (PyCFunction)JsBuffer_AssignToPyBuffer, - METH_O, - PyDoc_STR("Copies the TypedArray into a buffer"), - }; + methods[cur_method++] = JsBuffer_assign_MethodDef; + methods[cur_method++] = JsBuffer_assign_to_MethodDef; + methods[cur_method++] = JsBuffer_tomemoryview_MethodDef; + methods[cur_method++] = JsBuffer_tobytes_MethodDef; + methods[cur_method++] = JsBuffer_tostring_MethodDef; } methods[cur_method++] = (PyMethodDef){ 0 }; members[cur_member++] = (PyMemberDef){ 0 }; @@ -1898,6 +2030,11 @@ JsProxy_init(PyObject* core_module) SET_DOCSTRING(JsProxy_catch_MethodDef); SET_DOCSTRING(JsProxy_finally_MethodDef); SET_DOCSTRING(JsMethod_Construct_MethodDef); + SET_DOCSTRING(JsBuffer_assign_MethodDef); + SET_DOCSTRING(JsBuffer_assign_to_MethodDef); + SET_DOCSTRING(JsBuffer_tomemoryview_MethodDef); + SET_DOCSTRING(JsBuffer_tobytes_MethodDef); + SET_DOCSTRING(JsBuffer_tostring_MethodDef); #undef SET_DOCSTRING asyncio_module = PyImport_ImportModule("asyncio"); diff --git a/src/py/_pyodide/_core_docs.py b/src/py/_pyodide/_core_docs.py index 41be0bbc7..0c92ed377 100644 --- a/src/py/_pyodide/_core_docs.py +++ b/src/py/_pyodide/_core_docs.py @@ -108,6 +108,37 @@ class JsProxy: an ArrayBuffer view. """ + def to_memoryview(self) -> memoryview: + """Convert the buffer to a memoryview. + + Copies the data once. This currently has the same effect as :any:`to_py`. + Present only if the wrapped Javascript object is an ArrayBuffer or + an ArrayBuffer view. + """ + + def to_bytes(self) -> bytes: + """Convert the buffer to a bytes object. + + Copies the data once. + Present only if the wrapped Javascript object is an ArrayBuffer or + an ArrayBuffer view. + """ + + def to_string(self, encoding=None) -> str: + """Convert the buffer to a string object. + + Copies the data twice. + + The encoding argument will be passed to the Javascript + [``TextDecoder``](https://developer.mozilla.org/en-US/docs/Web/API/TextDecoder) + constructor. It should be one of the encodings listed in the table here: + `https://encoding.spec.whatwg.org/#names-and-labels`. The default + encoding is utf8. + + Present only if the wrapped Javascript object is an ArrayBuffer or + an ArrayBuffer view. + """ + # from pyproxy.c diff --git a/src/tests/test_jsproxy.py b/src/tests/test_jsproxy.py index f566496a0..d8baab198 100644 --- a/src/tests/test_jsproxy.py +++ b/src/tests/test_jsproxy.py @@ -1031,6 +1031,68 @@ def test_buffer_assign_back(selenium): assert result == [1, 20, 3, 77, 5, 9] +def test_buffer_conversions(selenium): + selenium.run_js( + f""" + self.s = "abcဴ"; + self.jsbytes = new TextEncoder().encode(s); + pyodide.runPython(` + from js import s, jsbytes + memoryview_conversion = jsbytes.to_memoryview() + bytes_conversion = jsbytes.to_bytes() + + assert bytes_conversion.decode() == s + assert bytes(memoryview_conversion) == bytes_conversion + del jsbytes + `); + """ + ) + + +def test_tostring_encoding(selenium): + selenium.run_js( + """ + // windows-1251 encoded "Привет, мир!" which is Russian for "Hello, world!" + self.bytes = new Uint8Array([207, 240, 232, 226, 229, 242, 44, 32, 236, 232, 240, 33]); + pyodide.runPython(` + from js import bytes + assert bytes.to_string('windows-1251') == "Привет, мир!" + `); + """ + ) + + +def test_tostring_error(selenium): + selenium.run_js( + """ + // windows-1251 encoded "Привет, мир!" which is Russian for "Hello, world!" + self.bytes = new Uint8Array([207, 240, 232, 226, 229, 242, 44, 32, 236, 232, 240, 33]); + pyodide.runPython(` + from js import bytes + from unittest import TestCase + raises = TestCase().assertRaises + with raises(ValueError): + bytes.to_string() + `); + """ + ) + + +def test_duck_buffer_method_presence(selenium): + selenium.run_js( + """ + self.bytes = new Uint8Array([207, 240, 232, 226, 229, 242, 44, 32, 236, 232, 240, 33]); + self.other = {}; + pyodide.runPython(` + from js import bytes, other + buffer_methods = {"assign", "assign_to", "to_string", "to_memoryview", "to_bytes"} + assert buffer_methods < set(dir(bytes)) + assert not set(dir(other)).intersection(buffer_methods) + `); + """ + ) + + def test_memory_leaks(selenium): # refcounts are tested automatically in conftest by default selenium.run_js(