Simplify handling of non-BMP characters

2019-01-28 11:48:46 -05:00 · 2019-01-28 11:48:46 -05:00 · 1761a222dc
parent 46f64b5c7c
commit 1761a222dc
1 changed files with 16 additions and 12 deletions
--- a/src/js2python.c
+++ b/src/js2python.c
@ -79,20 +79,20 @@ EM_JS(int, __js2python, (int id), {
    // to determine if is needs to be a 1-, 2- or 4-byte string, since
    // Python handles all 3.
    var max_code_point = 0;
+    var length = value.length;
    for (var i = 0; i < value.length; i++) {
      code_point = value.codePointAt(i);
      max_code_point = Math.max(max_code_point, code_point);
-      if (max_code_point > 0xffff) {
-        // If we're dealing with UTF-16 surrogate pairs, convert the string
-        // to an array of each of its characters, so we correctly count the
-        // number of characters.
-        value = Array.from(value[Symbol.iterator]());
-        // We can short circuit here -- we already know we need a 4-byte output.
-        break;
+      if (code_point > 0xffff) {
+        // If we have a code point requiring UTF-16 surrogate pairs, the
+        // number of characters (codePoints) is less than value.length,
+        // so skip the next charCode and subtract 1 from the length.
+        i++;
+        length--;
      }
    }

-    var result = __js2python_allocate_string(value.length, max_code_point);
+    var result = __js2python_allocate_string(length, max_code_point);
    if (result == 0) {
      return 0;
    }
@ -100,16 +100,20 @@ EM_JS(int, __js2python, (int id), {
    var ptr = __js2python_get_ptr(result);
    if (max_code_point > 0xffff) {
      ptr = ptr / 4;
-      for (var i = 0; i < value.length; i++) {
-        Module.HEAPU32[ptr + i] = value[i].codePointAt(0);
+      for (var i = 0, j = 0; j < length; i++, j++) {
+        var code_point = value.codePointAt(i);
+        Module.HEAPU32[ptr + j] = code_point;
+        if (code_point > 0xffff) {
+          i++;
+        }
      }
    } else if (max_code_point > 0xff) {
      ptr = ptr / 2;
-      for (var i = 0; i < value.length; i++) {
+      for (var i = 0; i < length; i++) {
        Module.HEAPU16[ptr + i] = value.codePointAt(i);
      }
    } else {
-      for (var i = 0; i < value.length; i++) {
+      for (var i = 0; i < length; i++) {
        Module.HEAPU8[ptr + i] = value.codePointAt(i);
      }
    }