Added TypedArray methods to_string and to_bytes to help minimize copies (#1864)

This commit is contained in:
Hood Chatham 2021-10-08 11:04:08 -07:00 committed by GitHub
parent 3d45d25f24
commit 579777dbb7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 289 additions and 45 deletions

View File

@ -32,6 +32,10 @@ substitutions:
arguments and return values are automatically destroyed when the function is
finished. {pr}`1573`
- {{Enhancement}} Added {any}`JsProxy.to_string`, {any}`JsProxy.to_bytes`, and
{any}`JsProxy.to_memoryview` to allow for conversion of `TypedArray` to
standard Python types without unneeded copies. {pr}`1864`
- {{Fix}} It is now possible to destroy borrowed attribute `PyProxy` of a
`PyProxy` (as introduced by {pr}`1636`) before destroying the root `PyProxy`.
{pr}`1854`

View File

@ -14,7 +14,14 @@ set_method_docstring(PyMethodDef* method, PyObject* parent)
PyObject* py_result = NULL;
py_method = PyObject_GetAttrString(parent, method->ml_name);
FAIL_IF_NULL(py_method);
if (py_method == NULL) {
PyErr_Format(PyExc_AttributeError,
"set_method_docstring failed for method %s, documentation "
"stub '%.50s' has no such attribute.",
method->ml_name,
Py_TYPE(parent)->tp_name);
FAIL();
}
py_result = _PyObject_CallMethodIdOneArg(
py_docstring_mod, &PyId_get_cmeth_docstring, py_method);

View File

@ -772,12 +772,14 @@ EM_JS_NUM(errcode, hiwire_assign_from_ptr, (JsRef idobj, void* ptr), {
// clang-format off
EM_JS_NUM(
errcode,
hiwire_get_buffer_datatype,
(JsRef idobj, char** format_ptr, Py_ssize_t* size_ptr, bool* checked_ptr),
hiwire_get_buffer_info,
(JsRef idobj, Py_ssize_t* byteLength_ptr, char** format_ptr, Py_ssize_t* size_ptr, bool* checked_ptr),
{
let jsobj = Module.hiwire.get_value(idobj);
let byteLength = jsobj.byteLength;
let [format_utf8, size, checked] = Module.get_buffer_datatype(jsobj);
// Store results into arguments
DEREF_U32(byteLength_ptr, 0) = byteLength;
DEREF_U32(format_ptr, 0) = format_utf8;
DEREF_U32(size_ptr, 0) = size;
DEREF_U8(checked_ptr, 0) = checked;

View File

@ -562,10 +562,11 @@ hiwire_assign_from_ptr(JsRef idobj, void* ptr);
* Get a data type identifier for a given typedarray.
*/
errcode
hiwire_get_buffer_datatype(JsRef idobj,
char** format_ptr,
Py_ssize_t* size_ptr,
bool* check_assignments);
hiwire_get_buffer_info(JsRef idobj,
Py_ssize_t* byteLength_ptr,
char** format_ptr,
Py_ssize_t* size_ptr,
bool* check_assignments);
/**
* Get a subarray from a TypedArray

View File

@ -346,7 +346,7 @@ EM_JS_NUM(errcode, js2python_init, (), {
}
if (toStringTag === "[object ArrayBuffer]" || ArrayBuffer.isView(value)){
let [format_utf8, itemsize] = Module.get_buffer_datatype(value);
return _JsBuffer_CloneIntoPython(id, value.byteLength, format_utf8, itemsize);
return _JsBuffer_CopyIntoMemoryView(id, value.byteLength, format_utf8, itemsize);
}
// clang-format on
return _JsProxy_create(id);

View File

@ -338,7 +338,7 @@ JsProxy_object_entries(PyObject* o, PyObject* _args)
return result;
}
PyMethodDef JsProxy_object_entries_MethodDef = {
static PyMethodDef JsProxy_object_entries_MethodDef = {
"object_entries",
(PyCFunction)JsProxy_object_entries,
METH_NOARGS,
@ -361,7 +361,7 @@ JsProxy_object_keys(PyObject* o, PyObject* _args)
return result;
}
PyMethodDef JsProxy_object_keys_MethodDef = {
static PyMethodDef JsProxy_object_keys_MethodDef = {
"object_keys",
(PyCFunction)JsProxy_object_keys,
METH_NOARGS,
@ -384,7 +384,7 @@ JsProxy_object_values(PyObject* o, PyObject* _args)
return result;
}
PyMethodDef JsProxy_object_values_MethodDef = {
static PyMethodDef JsProxy_object_values_MethodDef = {
"object_values",
(PyCFunction)JsProxy_object_values,
METH_NOARGS,
@ -682,7 +682,7 @@ finally:
return result;
}
PyMethodDef JsProxy_Dir_MethodDef = {
static PyMethodDef JsProxy_Dir_MethodDef = {
"__dir__",
(PyCFunction)JsProxy_Dir,
METH_NOARGS,
@ -705,7 +705,7 @@ JsProxy_toPy(PyObject* self,
return js2python_convert(GET_JSREF(self), depth);
}
PyMethodDef JsProxy_toPy_MethodDef = {
static PyMethodDef JsProxy_toPy_MethodDef = {
"to_py",
(PyCFunction)JsProxy_toPy,
METH_FASTCALL | METH_KEYWORDS,
@ -857,7 +857,7 @@ finally:
return result;
}
PyMethodDef JsProxy_then_MethodDef = {
static PyMethodDef JsProxy_then_MethodDef = {
"then",
(PyCFunction)JsProxy_then,
METH_VARARGS | METH_KEYWORDS,
@ -894,7 +894,7 @@ finally:
return result;
}
PyMethodDef JsProxy_catch_MethodDef = {
static PyMethodDef JsProxy_catch_MethodDef = {
"catch",
(PyCFunction)JsProxy_catch,
METH_O,
@ -936,7 +936,7 @@ finally:
return result;
}
PyMethodDef JsProxy_finally_MethodDef = {
static PyMethodDef JsProxy_finally_MethodDef = {
"finally_",
(PyCFunction)JsProxy_finally,
METH_O,
@ -1285,7 +1285,7 @@ finally:
}
// clang-format off
PyMethodDef JsMethod_Construct_MethodDef = {
static PyMethodDef JsMethod_Construct_MethodDef = {
"new",
(PyCFunction)JsMethod_Construct,
METH_FASTCALL | METH_KEYWORDS
@ -1386,8 +1386,8 @@ static PyTypeObject BufferType = {
};
/**
* This is a helper function to do error checking for JsBuffer_AssignToPyBuffer
* and JsBuffer_AssignPyBuffer.
* This is a helper function to do error checking for JsBuffer_assign_to
* and JsBuffer_assign.
*
* self -- The JavaScript buffer involved
* view -- The Py_buffer view involved
@ -1438,7 +1438,7 @@ check_buffer_compatibility(JsProxy* self, Py_buffer view, bool safe, bool dir)
* buffer -- A PyObject whcih supports the buffer protocol and is writable.
*/
static PyObject*
JsBuffer_AssignToPyBuffer(PyObject* obj, PyObject* target)
JsBuffer_assign_to(PyObject* obj, PyObject* target)
{
JsProxy* self = (JsProxy*)obj;
bool success = false;
@ -1460,13 +1460,19 @@ finally:
return NULL;
}
static PyMethodDef JsBuffer_assign_to_MethodDef = {
"assign_to",
(PyCFunction)JsBuffer_assign_to,
METH_O,
};
/**
* Assign from a py buffer to a js buffer
* obj -- A JsBuffer (meaning a PyProxy of an ArrayBuffer or an ArrayBufferView)
* buffer -- A PyObject which supports the buffer protocol (can be read only)
*/
static PyObject*
JsBuffer_AssignPyBuffer(PyObject* obj, PyObject* source)
JsBuffer_assign(PyObject* obj, PyObject* source)
{
JsProxy* self = (JsProxy*)obj;
bool success = false;
@ -1487,9 +1493,15 @@ finally:
return NULL;
}
static PyMethodDef JsBuffer_assign_MethodDef = {
"assign",
(PyCFunction)JsBuffer_assign,
METH_O,
};
/**
* Used from js2python for to_py. Make a new Python buffer with the same data as
* jsbuffer.
* Used from js2python for to_py and by to_memoryview. Make a new Python buffer
* with the same data as jsbuffer.
*
* All other arguments are calculated from jsbuffer, but it's more convenient to
* calculate them in JavaScript and pass them as arguments than to acquire them
@ -1501,10 +1513,10 @@ finally:
* itemsize - the appropriate itemsize for jsbuffer, from get_buffer_datatype
*/
PyObject*
JsBuffer_CloneIntoPython(JsRef jsbuffer,
Py_ssize_t byteLength,
char* format,
Py_ssize_t itemsize)
JsBuffer_CopyIntoMemoryView(JsRef jsbuffer,
Py_ssize_t byteLength,
char* format,
Py_ssize_t itemsize)
{
bool success = false;
Buffer* buffer = NULL;
@ -1526,19 +1538,146 @@ finally:
return result;
}
/**
* Used by to_bytes. Make a new bytes object and copy the data from the
* ArrayBuffer into it.
*/
PyObject*
JsBuffer_CopyIntoBytes(JsRef jsbuffer, Py_ssize_t byteLength)
{
bool success = false;
PyObject* result = PyBytes_FromStringAndSize(NULL, byteLength);
FAIL_IF_NULL(result);
char* data = PyBytes_AS_STRING(result);
FAIL_IF_MINUS_ONE(hiwire_assign_to_ptr(jsbuffer, data));
success = true;
finally:
if (!success) {
Py_CLEAR(result);
}
return result;
}
/**
* Used by JsBuffer_ToString. Decode the ArrayBuffer into a Javascript string
* using a TextDecoder with the given encoding. I have found no evidence that
* the encoding argument ever matters...
*
* If a decoding error occurs, return 0 without setting error flag so we can
* replace with a UnicodeDecodeError
*/
// clang-format off
EM_JS_REF(JsRef,
JsBuffer_DecodeString_js,
(JsRef jsbuffer_id, char* encoding),
{
let buffer = Module.hiwire.get_value(jsbuffer_id);
let encoding_js;
if (encoding) {
encoding_js = UTF8ToString(encoding);
}
let decoder = new TextDecoder(encoding_js, {fatal : true});
let res;
try {
res = decoder.decode(buffer);
} catch(e){
if(e instanceof TypeError) {
// Decoding error
return 0;
}
throw e;
}
return Module.hiwire.new_value(res);
})
// clang-format on
/**
* Decode the ArrayBuffer into a PyUnicode object.
*/
PyObject*
JsBuffer_ToString(JsRef jsbuffer, char* encoding)
{
JsRef jsresult = NULL;
PyObject* result = NULL;
jsresult = JsBuffer_DecodeString_js(jsbuffer, encoding);
if (jsresult == NULL && !PyErr_Occurred()) {
PyErr_Format(PyExc_ValueError,
"Failed to decode Javascript TypedArray as %s",
encoding ? encoding : "utf8");
}
FAIL_IF_NULL(jsresult);
result = js2python(jsresult);
FAIL_IF_NULL(result);
finally:
hiwire_CLEAR(jsresult);
return result;
}
static PyObject*
JsBuffer_tomemoryview(PyObject* buffer)
{
JsProxy* self = (JsProxy*)buffer;
return JsBuffer_CopyIntoMemoryView(
self->js, self->byteLength, self->format, self->itemsize);
}
static PyMethodDef JsBuffer_tomemoryview_MethodDef = {
"to_memoryview",
(PyCFunction)JsBuffer_tomemoryview,
METH_NOARGS,
};
static PyObject*
JsBuffer_tobytes(PyObject* buffer)
{
JsProxy* self = (JsProxy*)buffer;
return JsBuffer_CopyIntoBytes(self->js, self->byteLength);
}
static PyMethodDef JsBuffer_tobytes_MethodDef = {
"to_bytes",
(PyCFunction)JsBuffer_tobytes,
METH_NOARGS,
};
static PyObject*
JsBuffer_tostring(PyObject* self,
PyObject* const* args,
Py_ssize_t nargs,
PyObject* kwnames)
{
static const char* const _keywords[] = { "encoding", 0 };
static struct _PyArg_Parser _parser = { "|s:to_string", _keywords, 0 };
char* encoding = NULL;
if (!_PyArg_ParseStackAndKeywords(
args, nargs, kwnames, &_parser, &encoding)) {
return NULL;
}
return JsBuffer_ToString(JsProxy_REF(self), encoding);
}
static PyMethodDef JsBuffer_tostring_MethodDef = {
"to_string",
(PyCFunction)JsBuffer_tostring,
METH_FASTCALL | METH_KEYWORDS,
};
int
JsBuffer_cinit(PyObject* obj)
{
bool success = false;
JsProxy* self = (JsProxy*)obj;
// TODO: should logic here be any different if we're on wasm heap?
self->byteLength = hiwire_get_byteLength(JsProxy_REF(self));
// format string is borrowed from hiwire_get_buffer_datatype, DO NOT
// DEALLOCATE!
hiwire_get_buffer_datatype(JsProxy_REF(self),
&self->format,
&self->itemsize,
&self->check_assignments);
hiwire_get_buffer_info(JsProxy_REF(self),
&self->byteLength,
&self->format,
&self->itemsize,
&self->check_assignments);
if (self->format == NULL) {
char* typename = hiwire_constructor_name(JsProxy_REF(self));
PyErr_Format(
@ -1571,7 +1710,7 @@ JsProxy_create_subtype(int flags)
// Make sure these stack allocations are large enough to fit!
PyType_Slot slots[20];
int cur_slot = 0;
PyMethodDef methods[10];
PyMethodDef methods[50];
int cur_method = 0;
PyMemberDef members[5];
int cur_member = 0;
@ -1654,18 +1793,11 @@ JsProxy_create_subtype(int flags)
.pfunc = (void*)JsProxy_ass_subscript_array };
}
if (flags & IS_BUFFER) {
methods[cur_method++] = (PyMethodDef){
"assign",
(PyCFunction)JsBuffer_AssignPyBuffer,
METH_O,
PyDoc_STR("Copies a buffer into the TypedArray "),
};
methods[cur_method++] = (PyMethodDef){
"assign_to",
(PyCFunction)JsBuffer_AssignToPyBuffer,
METH_O,
PyDoc_STR("Copies the TypedArray into a buffer"),
};
methods[cur_method++] = JsBuffer_assign_MethodDef;
methods[cur_method++] = JsBuffer_assign_to_MethodDef;
methods[cur_method++] = JsBuffer_tomemoryview_MethodDef;
methods[cur_method++] = JsBuffer_tobytes_MethodDef;
methods[cur_method++] = JsBuffer_tostring_MethodDef;
}
methods[cur_method++] = (PyMethodDef){ 0 };
members[cur_member++] = (PyMemberDef){ 0 };
@ -1898,6 +2030,11 @@ JsProxy_init(PyObject* core_module)
SET_DOCSTRING(JsProxy_catch_MethodDef);
SET_DOCSTRING(JsProxy_finally_MethodDef);
SET_DOCSTRING(JsMethod_Construct_MethodDef);
SET_DOCSTRING(JsBuffer_assign_MethodDef);
SET_DOCSTRING(JsBuffer_assign_to_MethodDef);
SET_DOCSTRING(JsBuffer_tomemoryview_MethodDef);
SET_DOCSTRING(JsBuffer_tobytes_MethodDef);
SET_DOCSTRING(JsBuffer_tostring_MethodDef);
#undef SET_DOCSTRING
asyncio_module = PyImport_ImportModule("asyncio");

View File

@ -108,6 +108,37 @@ class JsProxy:
an ArrayBuffer view.
"""
def to_memoryview(self) -> memoryview:
"""Convert the buffer to a memoryview.
Copies the data once. This currently has the same effect as :any:`to_py`.
Present only if the wrapped Javascript object is an ArrayBuffer or
an ArrayBuffer view.
"""
def to_bytes(self) -> bytes:
"""Convert the buffer to a bytes object.
Copies the data once.
Present only if the wrapped Javascript object is an ArrayBuffer or
an ArrayBuffer view.
"""
def to_string(self, encoding=None) -> str:
"""Convert the buffer to a string object.
Copies the data twice.
The encoding argument will be passed to the Javascript
[``TextDecoder``](https://developer.mozilla.org/en-US/docs/Web/API/TextDecoder)
constructor. It should be one of the encodings listed in the table here:
`https://encoding.spec.whatwg.org/#names-and-labels`. The default
encoding is utf8.
Present only if the wrapped Javascript object is an ArrayBuffer or
an ArrayBuffer view.
"""
# from pyproxy.c

View File

@ -1031,6 +1031,68 @@ def test_buffer_assign_back(selenium):
assert result == [1, 20, 3, 77, 5, 9]
def test_buffer_conversions(selenium):
selenium.run_js(
f"""
self.s = "abcဴ";
self.jsbytes = new TextEncoder().encode(s);
pyodide.runPython(`
from js import s, jsbytes
memoryview_conversion = jsbytes.to_memoryview()
bytes_conversion = jsbytes.to_bytes()
assert bytes_conversion.decode() == s
assert bytes(memoryview_conversion) == bytes_conversion
del jsbytes
`);
"""
)
def test_tostring_encoding(selenium):
selenium.run_js(
"""
// windows-1251 encoded "Привет, мир!" which is Russian for "Hello, world!"
self.bytes = new Uint8Array([207, 240, 232, 226, 229, 242, 44, 32, 236, 232, 240, 33]);
pyodide.runPython(`
from js import bytes
assert bytes.to_string('windows-1251') == "Привет, мир!"
`);
"""
)
def test_tostring_error(selenium):
selenium.run_js(
"""
// windows-1251 encoded "Привет, мир!" which is Russian for "Hello, world!"
self.bytes = new Uint8Array([207, 240, 232, 226, 229, 242, 44, 32, 236, 232, 240, 33]);
pyodide.runPython(`
from js import bytes
from unittest import TestCase
raises = TestCase().assertRaises
with raises(ValueError):
bytes.to_string()
`);
"""
)
def test_duck_buffer_method_presence(selenium):
selenium.run_js(
"""
self.bytes = new Uint8Array([207, 240, 232, 226, 229, 242, 44, 32, 236, 232, 240, 33]);
self.other = {};
pyodide.runPython(`
from js import bytes, other
buffer_methods = {"assign", "assign_to", "to_string", "to_memoryview", "to_bytes"}
assert buffer_methods < set(dir(bytes))
assert not set(dir(other)).intersection(buffer_methods)
`);
"""
)
def test_memory_leaks(selenium):
# refcounts are tested automatically in conftest by default
selenium.run_js(