See #301: Speed-up js2python string conversion

This commit is contained in:
Michael Droettboom 2019-01-25 10:09:40 -05:00
parent 4166c00c2c
commit 46f64b5c7c
3 changed files with 62 additions and 9 deletions

View File

@ -60,7 +60,7 @@ EM_JS(int, hiwire_string_ucs4, (int ptr, int len), {
var jsstr = ""; var jsstr = "";
var idx = ptr / 4; var idx = ptr / 4;
for (var i = 0; i < len; ++i) { for (var i = 0; i < len; ++i) {
jsstr += String.fromCharCode(Module.HEAPU32[idx + i]); jsstr += String.fromCodePoint(Module.HEAPU32[idx + i]);
} }
return Module.hiwire_new_value(jsstr); return Module.hiwire_new_value(jsstr);
}); });

View File

@ -9,9 +9,15 @@
// bubble out to Python // bubble out to Python
int int
_js2python_string(char* val) _js2python_allocate_string(int size, int max_code_point)
{ {
return (int)PyUnicode_FromString(val); return (int)PyUnicode_New(size, max_code_point);
}
int
_js2python_get_ptr(int obj)
{
return (int)PyUnicode_DATA((PyObject*)obj);
} }
int int
@ -68,9 +74,46 @@ EM_JS(int, __js2python, (int id), {
var value = Module.hiwire_get_value(id); var value = Module.hiwire_get_value(id);
var type = typeof value; var type = typeof value;
if (type === 'string') { if (type === 'string') {
var charptr = allocate(intArrayFromString(value), 'i8', ALLOC_NORMAL); // The general idea here is to allocate a Python string and then
var result = __js2python_string(charptr); // have Javascript write directly into its buffer. We first need
_free(charptr); // to determine if is needs to be a 1-, 2- or 4-byte string, since
// Python handles all 3.
var max_code_point = 0;
for (var i = 0; i < value.length; i++) {
code_point = value.codePointAt(i);
max_code_point = Math.max(max_code_point, code_point);
if (max_code_point > 0xffff) {
// If we're dealing with UTF-16 surrogate pairs, convert the string
// to an array of each of its characters, so we correctly count the
// number of characters.
value = Array.from(value[Symbol.iterator]());
// We can short circuit here -- we already know we need a 4-byte output.
break;
}
}
var result = __js2python_allocate_string(value.length, max_code_point);
if (result == 0) {
return 0;
}
var ptr = __js2python_get_ptr(result);
if (max_code_point > 0xffff) {
ptr = ptr / 4;
for (var i = 0; i < value.length; i++) {
Module.HEAPU32[ptr + i] = value[i].codePointAt(0);
}
} else if (max_code_point > 0xff) {
ptr = ptr / 2;
for (var i = 0; i < value.length; i++) {
Module.HEAPU16[ptr + i] = value.codePointAt(i);
}
} else {
for (var i = 0; i < value.length; i++) {
Module.HEAPU8[ptr + i] = value.codePointAt(i);
}
}
return result; return result;
} else if (type === 'number') { } else if (type === 'number') {
return __js2python_number(value); return __js2python_number(value);

View File

@ -34,6 +34,8 @@ def test_python2js(selenium):
'return pyodide.runPython("\'ιωδιούχο\'") === "ιωδιούχο"') 'return pyodide.runPython("\'ιωδιούχο\'") === "ιωδιούχο"')
assert selenium.run_js( assert selenium.run_js(
'return pyodide.runPython("\'碘化物\'") === "碘化物"') 'return pyodide.runPython("\'碘化物\'") === "碘化物"')
assert selenium.run_js(
'return pyodide.runPython("\'🐍\'") === "🐍"')
assert selenium.run_js( assert selenium.run_js(
'let x = pyodide.runPython("b\'bytes\'");\n' 'let x = pyodide.runPython("b\'bytes\'");\n'
'return (x instanceof window.Uint8ClampedArray) && ' 'return (x instanceof window.Uint8ClampedArray) && '
@ -156,7 +158,9 @@ def test_pythonexc2js(selenium):
def test_js2python(selenium): def test_js2python(selenium):
selenium.run_js( selenium.run_js(
""" """
window.jsstring = "碘化物"; window.jsstring_ucs1 = "pyodidé";
window.jsstring_ucs2 = "碘化物";
window.jsstring_ucs4 = "🐍";
window.jsnumber0 = 42; window.jsnumber0 = 42;
window.jsnumber1 = 42.5; window.jsnumber1 = 42.5;
window.jsundefined = undefined; window.jsundefined = undefined;
@ -170,8 +174,14 @@ def test_js2python(selenium):
""" """
) )
assert selenium.run( assert selenium.run(
'from js import jsstring\n' 'from js import jsstring_ucs1\n'
'jsstring == "碘化物"') 'jsstring_ucs1 == "pyodidé"')
assert selenium.run(
'from js import jsstring_ucs2\n'
'jsstring_ucs2 == "碘化物"')
assert selenium.run(
'from js import jsstring_ucs4\n'
'jsstring_ucs4 == "🐍"')
assert selenium.run( assert selenium.run(
'from js import jsnumber0\n' 'from js import jsnumber0\n'
'jsnumber0 == 42') 'jsnumber0 == 42')