diff --git a/src/hiwire.c b/src/hiwire.c index ea6684f63..7e7e1238e 100644 --- a/src/hiwire.c +++ b/src/hiwire.c @@ -60,7 +60,7 @@ EM_JS(int, hiwire_string_ucs4, (int ptr, int len), { var jsstr = ""; var idx = ptr / 4; for (var i = 0; i < len; ++i) { - jsstr += String.fromCharCode(Module.HEAPU32[idx + i]); + jsstr += String.fromCodePoint(Module.HEAPU32[idx + i]); } return Module.hiwire_new_value(jsstr); }); diff --git a/src/js2python.c b/src/js2python.c index 606d383b4..859f7a5ff 100644 --- a/src/js2python.c +++ b/src/js2python.c @@ -9,9 +9,15 @@ // bubble out to Python int -_js2python_string(char* val) +_js2python_allocate_string(int size, int max_code_point) { - return (int)PyUnicode_FromString(val); + return (int)PyUnicode_New(size, max_code_point); +} + +int +_js2python_get_ptr(int obj) +{ + return (int)PyUnicode_DATA((PyObject*)obj); } int @@ -68,9 +74,46 @@ EM_JS(int, __js2python, (int id), { var value = Module.hiwire_get_value(id); var type = typeof value; if (type === 'string') { - var charptr = allocate(intArrayFromString(value), 'i8', ALLOC_NORMAL); - var result = __js2python_string(charptr); - _free(charptr); + // The general idea here is to allocate a Python string and then + // have Javascript write directly into its buffer. We first need + // to determine if is needs to be a 1-, 2- or 4-byte string, since + // Python handles all 3. + var max_code_point = 0; + for (var i = 0; i < value.length; i++) { + code_point = value.codePointAt(i); + max_code_point = Math.max(max_code_point, code_point); + if (max_code_point > 0xffff) { + // If we're dealing with UTF-16 surrogate pairs, convert the string + // to an array of each of its characters, so we correctly count the + // number of characters. + value = Array.from(value[Symbol.iterator]()); + // We can short circuit here -- we already know we need a 4-byte output. + break; + } + } + + var result = __js2python_allocate_string(value.length, max_code_point); + if (result == 0) { + return 0; + } + + var ptr = __js2python_get_ptr(result); + if (max_code_point > 0xffff) { + ptr = ptr / 4; + for (var i = 0; i < value.length; i++) { + Module.HEAPU32[ptr + i] = value[i].codePointAt(0); + } + } else if (max_code_point > 0xff) { + ptr = ptr / 2; + for (var i = 0; i < value.length; i++) { + Module.HEAPU16[ptr + i] = value.codePointAt(i); + } + } else { + for (var i = 0; i < value.length; i++) { + Module.HEAPU8[ptr + i] = value.codePointAt(i); + } + } + return result; } else if (type === 'number') { return __js2python_number(value); diff --git a/test/test_python.py b/test/test_python.py index c0fc64cfa..2dc1dadb5 100644 --- a/test/test_python.py +++ b/test/test_python.py @@ -34,6 +34,8 @@ def test_python2js(selenium): 'return pyodide.runPython("\'ιωδιούχο\'") === "ιωδιούχο"') assert selenium.run_js( 'return pyodide.runPython("\'碘化物\'") === "碘化物"') + assert selenium.run_js( + 'return pyodide.runPython("\'🐍\'") === "🐍"') assert selenium.run_js( 'let x = pyodide.runPython("b\'bytes\'");\n' 'return (x instanceof window.Uint8ClampedArray) && ' @@ -156,7 +158,9 @@ def test_pythonexc2js(selenium): def test_js2python(selenium): selenium.run_js( """ - window.jsstring = "碘化物"; + window.jsstring_ucs1 = "pyodidé"; + window.jsstring_ucs2 = "碘化物"; + window.jsstring_ucs4 = "🐍"; window.jsnumber0 = 42; window.jsnumber1 = 42.5; window.jsundefined = undefined; @@ -170,8 +174,14 @@ def test_js2python(selenium): """ ) assert selenium.run( - 'from js import jsstring\n' - 'jsstring == "碘化物"') + 'from js import jsstring_ucs1\n' + 'jsstring_ucs1 == "pyodidé"') + assert selenium.run( + 'from js import jsstring_ucs2\n' + 'jsstring_ucs2 == "碘化物"') + assert selenium.run( + 'from js import jsstring_ucs4\n' + 'jsstring_ucs4 == "🐍"') assert selenium.run( 'from js import jsnumber0\n' 'jsnumber0 == 42')