See #301: Speed-up js2python string conversion

This commit is contained in:
Michael Droettboom 2019-01-25 10:09:40 -05:00
parent 4166c00c2c
commit 46f64b5c7c
3 changed files with 62 additions and 9 deletions

View File

@ -60,7 +60,7 @@ EM_JS(int, hiwire_string_ucs4, (int ptr, int len), {
var jsstr = "";
var idx = ptr / 4;
for (var i = 0; i < len; ++i) {
jsstr += String.fromCharCode(Module.HEAPU32[idx + i]);
jsstr += String.fromCodePoint(Module.HEAPU32[idx + i]);
}
return Module.hiwire_new_value(jsstr);
});

View File

@ -9,9 +9,15 @@
// bubble out to Python
int
_js2python_string(char* val)
_js2python_allocate_string(int size, int max_code_point)
{
return (int)PyUnicode_FromString(val);
return (int)PyUnicode_New(size, max_code_point);
}
int
_js2python_get_ptr(int obj)
{
return (int)PyUnicode_DATA((PyObject*)obj);
}
int
@ -68,9 +74,46 @@ EM_JS(int, __js2python, (int id), {
var value = Module.hiwire_get_value(id);
var type = typeof value;
if (type === 'string') {
var charptr = allocate(intArrayFromString(value), 'i8', ALLOC_NORMAL);
var result = __js2python_string(charptr);
_free(charptr);
// The general idea here is to allocate a Python string and then
// have Javascript write directly into its buffer. We first need
// to determine if is needs to be a 1-, 2- or 4-byte string, since
// Python handles all 3.
var max_code_point = 0;
for (var i = 0; i < value.length; i++) {
code_point = value.codePointAt(i);
max_code_point = Math.max(max_code_point, code_point);
if (max_code_point > 0xffff) {
// If we're dealing with UTF-16 surrogate pairs, convert the string
// to an array of each of its characters, so we correctly count the
// number of characters.
value = Array.from(value[Symbol.iterator]());
// We can short circuit here -- we already know we need a 4-byte output.
break;
}
}
var result = __js2python_allocate_string(value.length, max_code_point);
if (result == 0) {
return 0;
}
var ptr = __js2python_get_ptr(result);
if (max_code_point > 0xffff) {
ptr = ptr / 4;
for (var i = 0; i < value.length; i++) {
Module.HEAPU32[ptr + i] = value[i].codePointAt(0);
}
} else if (max_code_point > 0xff) {
ptr = ptr / 2;
for (var i = 0; i < value.length; i++) {
Module.HEAPU16[ptr + i] = value.codePointAt(i);
}
} else {
for (var i = 0; i < value.length; i++) {
Module.HEAPU8[ptr + i] = value.codePointAt(i);
}
}
return result;
} else if (type === 'number') {
return __js2python_number(value);

View File

@ -34,6 +34,8 @@ def test_python2js(selenium):
'return pyodide.runPython("\'ιωδιούχο\'") === "ιωδιούχο"')
assert selenium.run_js(
'return pyodide.runPython("\'碘化物\'") === "碘化物"')
assert selenium.run_js(
'return pyodide.runPython("\'🐍\'") === "🐍"')
assert selenium.run_js(
'let x = pyodide.runPython("b\'bytes\'");\n'
'return (x instanceof window.Uint8ClampedArray) && '
@ -156,7 +158,9 @@ def test_pythonexc2js(selenium):
def test_js2python(selenium):
selenium.run_js(
"""
window.jsstring = "碘化物";
window.jsstring_ucs1 = "pyodidé";
window.jsstring_ucs2 = "碘化物";
window.jsstring_ucs4 = "🐍";
window.jsnumber0 = 42;
window.jsnumber1 = 42.5;
window.jsundefined = undefined;
@ -170,8 +174,14 @@ def test_js2python(selenium):
"""
)
assert selenium.run(
'from js import jsstring\n'
'jsstring == "碘化物"')
'from js import jsstring_ucs1\n'
'jsstring_ucs1 == "pyodidé"')
assert selenium.run(
'from js import jsstring_ucs2\n'
'jsstring_ucs2 == "碘化物"')
assert selenium.run(
'from js import jsstring_ucs4\n'
'jsstring_ucs4 == "🐍"')
assert selenium.run(
'from js import jsnumber0\n'
'jsnumber0 == 42')