From a07b6f391325047b63a324a16ddcfaae25afccf5 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Thu, 26 Jul 2018 16:50:12 -0400 Subject: [PATCH] Fix #93 by avoiding use of TextDecoder Any use of TextDecoder on a part of the Wasm HEAP seems to cause crashes when using WebAssembly.instantiate later on (in Chrome). The fix is to: - Avoid use of TextDecoder in emscripten-generated code by setting -s TEXTDECODER=0 - Do the Python-to-JS string conversion a different way: - Handle the native format of a Python Unicode string (which is either UCS1, UCS 2 or UCS 4) directly. This has the added advantage of being computationally simpler than encoding/decoding to/from UTF8. --- Makefile | 3 ++- src/hiwire.c | 29 +++++++++++++++++++++++++---- src/hiwire.h | 24 +++++++++++++++++++++--- src/python2js.c | 16 ++++++++++++---- test/test_python.py | 21 +++++++++++++++++++++ 5 files changed, 81 insertions(+), 12 deletions(-) diff --git a/Makefile b/Makefile index a4fc9e3f9..3ab2f0f07 100644 --- a/Makefile +++ b/Makefile @@ -32,7 +32,8 @@ LDFLAGS=\ -s USE_LIBPNG=1 \ -std=c++14 \ -lstdc++ \ - --memory-init-file 0 + --memory-init-file 0 \ + -s TEXTDECODER=0 SIX_ROOT=six/six-1.11.0/build/lib SIX_LIBS=$(SIX_ROOT)/six.py diff --git a/src/hiwire.c b/src/hiwire.c index 8f35764c0..933125932 100644 --- a/src/hiwire.c +++ b/src/hiwire.c @@ -36,10 +36,31 @@ EM_JS(int, hiwire_double, (double val), { return Module.hiwire_new_value(val); }); -EM_JS(int, hiwire_string_utf8_length, (int ptr, int len), { - var bytes = new Uint8Array(Module.HEAPU8.buffer, ptr, len); - var jsval = new TextDecoder('utf-8').decode(bytes); - return Module.hiwire_new_value(jsval); +EM_JS(int, hiwire_string_ucs4, (int ptr, int len), { + var jsstr = ""; + var idx = ptr / 4; + for (var i = 0; i < len; ++i) { + jsstr += String.fromCharCode(Module.HEAPU32[idx + i]); + } + return Module.hiwire_new_value(jsstr); +}); + +EM_JS(int, hiwire_string_ucs2, (int ptr, int len), { + var jsstr = ""; + var idx = ptr / 2; + for (var i = 0; i < len; ++i) { + jsstr += String.fromCharCode(Module.HEAPU16[idx + i]); + } + return Module.hiwire_new_value(jsstr); +}); + +EM_JS(int, hiwire_string_ucs1, (int ptr, int len), { + var jsstr = ""; + var idx = ptr; + for (var i = 0; i < len; ++i) { + jsstr += String.fromCharCode(Module.HEAPU8[idx + i]); + } + return Module.hiwire_new_value(jsstr); }); EM_JS(int, hiwire_string_utf8, (int ptr), { diff --git a/src/hiwire.h b/src/hiwire.h index 6119d21d5..a7cad62c7 100644 --- a/src/hiwire.h +++ b/src/hiwire.h @@ -51,13 +51,31 @@ int hiwire_double(double val); /** - * Create a new Javascript string, given a pointer to a buffer containing UTF8 - * and a length, in bytes. The string data itself is copied. + * Create a new Javascript string, given a pointer to a buffer + * containing UCS4 and a length. The string data itself is copied. * * Returns: New reference */ int -hiwire_string_utf8_length(int ptr, int len); +hiwire_string_ucs4(int ptr, int len); + +/** + * Create a new Javascript string, given a pointer to a buffer + * containing UCS2 and a length. The string data itself is copied. + * + * Returns: New reference + */ +int +hiwire_string_ucs2(int ptr, int len); + +/** + * Create a new Javascript string, given a pointer to a buffer + * containing UCS1 and a length. The string data itself is copied. + * + * Returns: New reference + */ +int +hiwire_string_ucs1(int ptr, int len); /** * Create a new Javascript string, given a pointer to a null-terminated buffer diff --git a/src/python2js.c b/src/python2js.c index bd35ab7d0..b9607fb3b 100644 --- a/src/python2js.c +++ b/src/python2js.c @@ -130,12 +130,20 @@ python2js_int(PyObject* x) } return hiwire_double(x_double); } else if (PyUnicode_Check(x)) { - Py_ssize_t length; - char* chars = PyUnicode_AsUTF8AndSize(x, &length); - if (chars == NULL) { + int kind = PyUnicode_KIND(x); + int data = (int)PyUnicode_DATA(x); + int length = (int)PyUnicode_GET_LENGTH(x); + switch (kind) { + case PyUnicode_1BYTE_KIND: + return hiwire_string_ucs1(data, length); + case PyUnicode_2BYTE_KIND: + return hiwire_string_ucs2(data, length); + case PyUnicode_4BYTE_KIND: + return hiwire_string_ucs4(data, length); + default: + PyErr_SetString(PyExc_ValueError, "Unknown Unicode KIND"); return -1; } - return hiwire_string_utf8_length((int)(void*)chars, length); } else if (PyBytes_Check(x)) { char* x_buff; Py_ssize_t length; diff --git a/test/test_python.py b/test/test_python.py index 78fde14c2..aa4b5d951 100644 --- a/test/test_python.py +++ b/test/test_python.py @@ -25,6 +25,12 @@ def test_python2js(selenium): assert selenium.run_js('return pyodide.runPython("False") === false') assert selenium.run_js('return pyodide.runPython("42") === 42') assert selenium.run_js('return pyodide.runPython("3.14") === 3.14') + # Need to test all three internal string representations in Python: UCS1, + # UCS2 and UCS4 + assert selenium.run_js( + 'return pyodide.runPython("\'ascii\'") === "ascii"') + assert selenium.run_js( + 'return pyodide.runPython("\'ιωδιούχο\'") === "ιωδιούχο"') assert selenium.run_js( 'return pyodide.runPython("\'碘化物\'") === "碘化物"') assert selenium.run_js( @@ -311,3 +317,18 @@ def test_recursive_repr(selenium): "except RecursionError:\n" " result = False\n" "result") + + +def test_load_package_after_convert_string(selenium): + """ + See #93. + """ + selenium.run( + "import sys\n" + "x = sys.version") + selenium.run_js( + "var x = pyodide.pyimport('x')\n" + "console.log(x)") + selenium.load_package('kiwisolver') + selenium.run( + "import kiwisolver")