From 4b3bfe53ade98ecb7af83876719b23f71c777a69 Mon Sep 17 00:00:00 2001
From: Michael Droettboom <mdboom@gmail.com>
Date: Thu, 10 Jan 2019 07:40:01 -0500
Subject: [PATCH] Upgrade emscripten to 1.38.22

---
 Makefile                       |   9 +-
 Makefile.envs                  |   8 +-
 emsdk/Makefile                 |  20 +-
 emsdk/patches/lz4_c.patch      |   6 +-
 emsdk/patches/num_params.patch |   4 +-
 tools/file_packager.py         | 384 +++++++++++++++++++++++----------
 6 files changed, 292 insertions(+), 139 deletions(-)

diff --git a/Makefile b/Makefile
index a20cf958a..e2f7d31a8 100644
--- a/Makefile
+++ b/Makefile
@@ -68,8 +68,6 @@ build/pyodide.asm.js: src/main.bc src/jsimport.bc src/jsproxy.bc src/js2python.b
 	[ -d build ] || mkdir build
 	$(CXX) -s EXPORT_NAME="'pyodide'" -o build/pyodide.asm.html $(filter %.bc,$^) \
 	  $(LDFLAGS) -s FORCE_FILESYSTEM=1
-	rm build/pyodide.asm.asm.js
-	rm build/pyodide.asm.wasm.pre
 	rm build/pyodide.asm.html
 
 
@@ -149,7 +147,7 @@ build/test.data: $(CPYTHONLIB)
 	)
 	( \
 		cd build; \
-		python $(FILEPACKAGER) test.data --lz4 --preload ../$(CPYTHONLIB)/test@/lib/python3.7/test --js-output=test.js --export-name=pyodide._module --exclude \*.wasm.pre --exclude __pycache__ \
+		python $(FILEPACKAGER) test.data --lz4 --preload ../$(CPYTHONLIB)/test@/lib/python3.7/test --js-output=test.js --export-name=pyodide._module --exclude __pycache__ \
   )
 	uglifyjs build/test.js -o build/test.js
 
@@ -174,7 +172,6 @@ root/.built: \
 		cd root/lib/python$(PYMINOR); \
 		rm -fr `cat ../../../remove_modules.txt`; \
 		rm -fr test; \
-		find . -name "*.wasm.pre" -type f -delete ; \
 		find -type d -name __pycache__ -prune -exec rm -rf {} \; \
 	)
 	touch root/.built
@@ -185,7 +182,7 @@ ccache/emcc:
 	if hash ccache &>/dev/null; then \
     ln -s `which ccache` $(PYODIDE_ROOT)/ccache/emcc ; \
   else \
-    ln -s emsdk/emsdk/emscripten/tag-1.38.12/emcc $(PYODIDE_ROOT)/ccache/emcc; \
+    ln -s emsdk/emsdk/emscripten/tag-$(EMSCRIPTEN_VERSION)/emcc $(PYODIDE_ROOT)/ccache/emcc; \
   fi
 
 
@@ -194,7 +191,7 @@ ccache/em++:
 	if hash ccache &>/dev/null; then \
     ln -s `which ccache` $(PYODIDE_ROOT)/ccache/em++ ; \
   else \
-    ln -s emsdk/emsdk/emscripten/tag-1.38.12/em++ $(PYODIDE_ROOT)/ccache/em++; \
+    ln -s emsdk/emsdk/emscripten/tag-$(EMSCRIPTEN_VERSION)/em++ $(PYODIDE_ROOT)/ccache/em++; \
   fi
 
 
diff --git a/Makefile.envs b/Makefile.envs
index d3d28f6d4..0961e5e09 100644
--- a/Makefile.envs
+++ b/Makefile.envs
@@ -1,10 +1,12 @@
-export PATH := $(PYODIDE_ROOT)/ccache:$(PYODIDE_ROOT)/emsdk/emsdk:$(PYODIDE_ROOT)/emsdk/emsdk/clang/tag-e1.38.12/build_tag-e1.38.12_64/bin:$(PYODIDE_ROOT)/emsdk/emsdk/node/8.9.1_64bit/bin:$(PYODIDE_ROOT)/emsdk/emsdk/emscripten/tag-1.38.12:$(PYODIDE_ROOT)/emsdk/emsdk/binaryen/tag-1.38.12_64bit_binaryen/bin:$(PATH)
+export EMSCRIPTEN_VERSION = 1.38.22
+
+export PATH := $(PYODIDE_ROOT)/ccache:$(PYODIDE_ROOT)/emsdk/emsdk:$(PYODIDE_ROOT)/emsdk/emsdk/clang/tag-e$(EMSCRIPTEN_VERSION)/build_tag-e$(EMSCRIPTEN_VERSION)_64/bin:$(PYODIDE_ROOT)/emsdk/emsdk/node/8.9.1_64bit/bin:$(PYODIDE_ROOT)/emsdk/emsdk/emscripten/tag-$(EMSCRIPTEN_VERSION):$(PYODIDE_ROOT)/emsdk/emsdk/binaryen/tag-$(EMSCRIPTEN_VERSION)_64bit_binaryen/bin:$(PATH)
 
 export EMSDK = $(PYODIDE_ROOT)/emsdk/emsdk
 export EM_CONFIG = $(PYODIDE_ROOT)/emsdk/emsdk/.emscripten
 export EM_CACHE = $(PYODIDE_ROOT)/emsdk/emsdk/.emscripten_cache
-export EMSCRIPTEN = $(PYODIDE_ROOT)/emsdk/emsdk/emscripten/tag-1.38.12
-export BINARYEN_ROOT = $(PYODIDE_ROOT)/emsdk/emsdk/binaryen/tag-1.38.12_64bit_binaryen
+export EMSCRIPTEN = $(PYODIDE_ROOT)/emsdk/emsdk/emscripten/tag-$(EMSCRIPTEN_VERSION)
+export BINARYEN_ROOT = $(PYODIDE_ROOT)/emsdk/emsdk/binaryen/tag-$(EMSCRIPTEN_VERSION)_64bit_binaryen
 
 export PYVERSION=3.7.0
 export PYMINOR=$(basename $(PYVERSION))
diff --git a/emsdk/Makefile b/emsdk/Makefile
index 153de8622..d44e8ea7b 100644
--- a/emsdk/Makefile
+++ b/emsdk/Makefile
@@ -1,3 +1,6 @@
+PYODIDE_ROOT=$(abspath ..)
+include ../Makefile.envs
+
 all: emsdk/.complete
 
 # We hack the CPU_CORES, because if you use all of the cores on Circle-CI, you
@@ -8,14 +11,15 @@ emsdk/.complete:
 	git clone https://github.com/juj/emsdk.git
 	sed -i -e "s#CPU_CORES = max(multiprocessing.cpu_count()-1, 1)#CPU_CORES = 3#g" emsdk/emsdk
 	( \
-		cd emsdk ; \
-		./emsdk install --build=Release sdk-tag-1.38.12-64bit binaryen-tag-1.38.12-64bit ; \
-		cd .. ; \
-		(cat patches/*.patch | patch -p1) ; \
-		cd emsdk/binaryen/tag-1.38.12_64bit_binaryen/ ; \
-    make ; \
-	  cd ../.. ; \
-		./emsdk activate --embedded --build=Release sdk-tag-1.38.12-64bit binaryen-tag-1.38.12-64bit ; \
+		cd emsdk && \
+		./emsdk install --build=Release sdk-tag-$(EMSCRIPTEN_VERSION)-64bit binaryen-tag-$(EMSCRIPTEN_VERSION)-64bit && \
+		cd .. && \
+		(cat patches/*.patch | patch -p1) && \
+		cd emsdk/binaryen/tag-$(EMSCRIPTEN_VERSION)_64bit_binaryen/ && \
+    make && \
+	  cd ../.. && \
+    cp binaryen/tag-$(EMSCRIPTEN_VERSION)/bin/wasm.js binaryen/tag-$(EMSCRIPTEN_VERSION)_64bit_binaryen/bin && \
+		./emsdk activate --embedded --build=Release sdk-tag-$(EMSCRIPTEN_VERSION)-64bit binaryen-tag-$(EMSCRIPTEN_VERSION)-64bit && \
     touch .complete \
 	)
 
diff --git a/emsdk/patches/lz4_c.patch b/emsdk/patches/lz4_c.patch
index 175d510f4..62f3a761c 100644
--- a/emsdk/patches/lz4_c.patch
+++ b/emsdk/patches/lz4_c.patch
@@ -1,7 +1,7 @@
-diff --git a/emsdk/emscripten/tag-1.38.12/src/library_lz4.js b/emsdk/emscripten/tag-1.38.12/src/library_lz4.js
+diff --git a/emsdk/emscripten/tag-1.38.22/src/library_lz4.js b/emsdk/emscripten/tag-1.38.22/src/library_lz4.js
 index 4c3f583b7..5291002a4 100644
---- a/emsdk/emscripten/tag-1.38.12/src/library_lz4.js
-+++ b/emsdk/emscripten/tag-1.38.12/src/library_lz4.js
+--- a/emsdk/emscripten/tag-1.38.22/src/library_lz4.js
++++ b/emsdk/emscripten/tag-1.38.22/src/library_lz4.js
 @@ -5,26 +5,14 @@ mergeInto(LibraryManager.library, {
      DIR_MODE: {{{ cDefine('S_IFDIR') }}} | 511 /* 0777 */,
      FILE_MODE: {{{ cDefine('S_IFREG') }}} | 511 /* 0777 */,
diff --git a/emsdk/patches/num_params.patch b/emsdk/patches/num_params.patch
index cd49b7b1a..c7a89aa2b 100644
--- a/emsdk/patches/num_params.patch
+++ b/emsdk/patches/num_params.patch
@@ -1,7 +1,7 @@
 diff --git a/emsdk/binaryen/master/src/passes/FuncCastEmulation.cpp b/emsdk/binaryen/master/src/passes/FuncCastEmulation.cpp
 index 013e9403..d95fc282 100644
---- a/emsdk/binaryen/tag-1.38.12/src/passes/FuncCastEmulation.cpp
-+++ b/emsdk/binaryen/tag-1.38.12/src/passes/FuncCastEmulation.cpp
+--- a/emsdk/binaryen/tag-1.38.22/src/passes/FuncCastEmulation.cpp
++++ b/emsdk/binaryen/tag-1.38.22/src/passes/FuncCastEmulation.cpp
 @@ -39,7 +39,7 @@ namespace wasm {
  // This should be enough for everybody. (As described above, we need this
  // to match when dynamically linking, and also dynamic linking is why we
diff --git a/tools/file_packager.py b/tools/file_packager.py
index db5bdd80c..0af4edab1 100644
--- a/tools/file_packager.py
+++ b/tools/file_packager.py
@@ -1,6 +1,12 @@
 # flake8: noqa
 
-# This is forked from emscripten 1.38.10
+# This is forked from emscripten 1.38.22, with the original copyright notice
+# below.
+
+# Copyright 2012 The Emscripten Authors.  All rights reserved.
+# Emscripten is available under two separate licenses, the MIT license and the
+# University of Illinois/NCSA Open Source License.  Both these licenses can be
+# found in the LICENSE file.
 
 '''
 A tool that generates FS API calls to generate a filesystem, and packages the files
@@ -59,19 +65,19 @@ Notes:
 '''
 
 from __future__ import print_function
-import os, sys, shutil, random, uuid, ctypes
+import os
+import sys
+import shutil
+import random
+import uuid
+import ctypes
 
-sys.path.insert(
-  1,
-  os.path.join(
-    os.path.dirname(
-      os.path.dirname(
-        os.path.abspath(__file__)
-      )
-    ),
-    'emsdk', 'emsdk', 'emscripten', 'tag-1.38.12'
-  )
+emscripten_dir = os.path.join(
+  os.path.dirname(os.path.dirname(os.path.abspath(__file__))),
+  'emsdk', 'emsdk', 'emscripten'
 )
+tag_dir = sorted(os.listdir(emscripten_dir), key=lambda x: len(x))[0]
+sys.path.insert(1, os.path.join(emscripten_dir, tag_dir))
 
 from tools.toolchain_profiler import ToolchainProfiler
 if __name__ == '__main__':
@@ -79,9 +85,8 @@ if __name__ == '__main__':
 
 import posixpath
 from tools import shared
-from tools.shared import suffix, unsuffixed
 from tools.jsrun import run_js
-from subprocess import Popen, PIPE, STDOUT
+from subprocess import PIPE
 import fnmatch
 import json
 
@@ -96,11 +101,13 @@ data_target = sys.argv[1]
 
 IMAGE_SUFFIXES = ('.jpg', '.png', '.bmp')
 AUDIO_SUFFIXES = ('.ogg', '.wav', '.mp3')
-AUDIO_MIMETYPES = { 'ogg': 'audio/ogg', 'wav': 'audio/wav', 'mp3': 'audio/mpeg' }
+AUDIO_MIMETYPES = {'ogg': 'audio/ogg', 'wav': 'audio/wav', 'mp3': 'audio/mpeg'}
 
 DDS_HEADER_SIZE = 128
 
-AV_WORKAROUND = 0 # Set to 1 to randomize file order and add some padding, to work around silly av false positives
+# Set to 1 to randomize file order and add some padding,
+# to work around silly av false positives
+AV_WORKAROUND = 0
 
 data_files = []
 excluded_patterns = []
@@ -112,16 +119,22 @@ plugins = []
 jsoutput = None
 from_emcc = False
 force = True
-# If set to True, IndexedDB (IDBFS in library_idbfs.js) is used to locally cache VFS XHR so that subsequent
-# page loads can read the data from the offline cache instead.
+# If set to True, IndexedDB (IDBFS in library_idbfs.js) is used to locally
+# cache VFS XHR so that subsequent page loads can read the data from the
+# offline cache instead.
 use_preload_cache = False
 indexeddb_name = 'EM_PRELOAD_CACHE'
-# If set to True, the blob received from XHR is moved to the Emscripten HEAP, optimizing for mmap() performance.
-# If set to False, the XHR blob is kept intact, and fread()s etc. are performed directly to that data. This optimizes for minimal memory usage and fread() performance.
+# If set to True, the blob received from XHR is moved to the Emscripten HEAP,
+# optimizing for mmap() performance.
+# If set to False, the XHR blob is kept intact, and fread()s etc. are performed
+# directly to that data. This optimizes for minimal memory usage and fread()
+# performance.
 no_heap_copy = True
-# If set to True, the package metadata is stored separately from js-output file which makes js-output file immutable to the package content changes.
-# If set to False, the package metadata is stored inside the js-output file which makes js-output file to mutate on each invocation of this packager tool.
-separate_metadata  = False
+# If set to True, the package metadata is stored separately from js-output
+# file which makes js-output file immutable to the package content changes.
+# If set to False, the package metadata is stored inside the js-output file
+# which makes js-output file to mutate on each invocation of this packager tool.
+separate_metadata = False
 lz4 = False
 use_preload_plugins = False
 
@@ -170,16 +183,22 @@ for arg in sys.argv[2:]:
     leading = ''
   elif leading == 'preload' or leading == 'embed':
     mode = leading
-    at_position = arg.replace('@@', '__').find('@') # position of @ if we're doing 'src@dst'. '__' is used to keep the index same with the original if they escaped with '@@'.
-    uses_at_notation = (at_position != -1) # '@@' in input string means there is an actual @ character, a single '@' means the 'src@dst' notation.
+    # position of @ if we're doing 'src@dst'. '__' is used to keep the index
+    # same with the original if they escaped with '@@'.
+    at_position = arg.replace('@@', '__').find('@')
+    # '@@' in input string means there is an actual @ character, a single '@'
+    # means the 'src@dst' notation.
+    uses_at_notation = (at_position != -1)
 
     if uses_at_notation:
       srcpath = arg[0:at_position].replace('@@', '@') # split around the @
-      dstpath = arg[at_position+1:].replace('@@', '@')
+      dstpath = arg[at_position + 1:].replace('@@', '@')
     else:
-      srcpath = dstpath = arg.replace('@@', '@') # Use source path as destination path.
+      # Use source path as destination path.
+      srcpath = dstpath = arg.replace('@@', '@')
     if os.path.isfile(srcpath) or os.path.isdir(srcpath):
-      data_files.append({ 'srcpath': srcpath, 'dstpath': dstpath, 'mode': mode, 'explicit_dst_path': uses_at_notation })
+      data_files.append({'srcpath': srcpath, 'dstpath': dstpath, 'mode': mode,
+                         'explicit_dst_path': uses_at_notation})
     else:
       print('Warning: ' + arg + ' does not exist, ignoring.', file=sys.stderr)
   elif leading == 'exclude':
@@ -190,14 +209,19 @@ for arg in sys.argv[2:]:
 
 if (not force) and len(data_files) == 0:
   has_preloaded = False
-if not has_preloaded or jsoutput == None:
-  assert not separate_metadata, 'cannot separate-metadata without both --preloaded files and a specified --js-output'
+if not has_preloaded or jsoutput is None:
+  assert not separate_metadata, (
+     'cannot separate-metadata without both --preloaded files '
+     'and a specified --js-output')
 
 if not from_emcc:
-  print('Remember to build the main file with  -s FORCE_FILESYSTEM=1  so that it includes support for loading this file package', file=sys.stderr)
+  print('Remember to build the main file with  -s FORCE_FILESYSTEM=1  '
+        'so that it includes support for loading this file package',
+        file=sys.stderr)
 
 ret = ''
-# emcc.py will add this to the output itself, so it is only needed for standalone calls
+# emcc.py will add this to the output itself, so it is only needed for
+# standalone calls
 if not from_emcc:
   ret = '''
 var Module = typeof %(EXPORT_NAME)s !== 'undefined' ? %(EXPORT_NAME)s : {};
@@ -219,22 +243,26 @@ code = '''
     }
 '''
 
-# Win32 code to test whether the given file has the hidden property set.
+
 def has_hidden_attribute(filepath):
+  """Win32 code to test whether the given file has the hidden property set."""
+
   if sys.platform != 'win32':
     return False
 
   try:
-    attrs = ctypes.windll.kernel32.GetFileAttributesW(unicode(filepath))
+    attrs = ctypes.windll.kernel32.GetFileAttributesW(
+        u'%s' % filepath)
     assert attrs != -1
     result = bool(attrs & 2)
-  except:
+  except Exception:
     result = False
   return result
 
-# The packager should never preload/embed files if the file is hidden (Win32).
-# or it matches any pattern specified in --exclude
+
 def should_ignore(fullname):
+  """The packager should never preload/embed files if the file
+  is hidden (Win32) or it matches any pattern specified in --exclude"""
   if has_hidden_attribute(fullname):
     return True
 
@@ -243,10 +271,15 @@ def should_ignore(fullname):
       return True
   return False
 
-# Expand directories into individual files
+
 def add(mode, rootpathsrc, rootpathdst):
-  # rootpathsrc: The path name of the root directory on the local FS we are adding to emscripten virtual FS.
-  # rootpathdst: The name we want to make the source path available on the emscripten virtual FS.
+  """Expand directories into individual files
+
+  rootpathsrc: The path name of the root directory on the local FS we are
+               adding to emscripten virtual FS.
+  rootpathdst: The name we want to make the source path available on the
+               emscripten virtual FS.
+  """
   for dirpath, dirnames, filenames in os.walk(rootpathsrc):
     new_dirnames = []
     for name in dirnames:
@@ -254,17 +287,23 @@ def add(mode, rootpathsrc, rootpathdst):
       if not should_ignore(fullname):
         new_dirnames.append(name)
       elif DEBUG:
-        print('Skipping directory "' + fullname + '" from inclusion in the emscripten virtual file system.', file=sys.stderr)
+        print('Skipping directory "%s" from inclusion in the emscripten '
+              'virtual file system.' % fullname, file=sys.stderr)
     for name in filenames:
       fullname = os.path.join(dirpath, name)
       if not should_ignore(fullname):
-        dstpath = os.path.join(rootpathdst, os.path.relpath(fullname, rootpathsrc)) # Convert source filename relative to root directory of target FS.
-        new_data_files.append({ 'srcpath': fullname, 'dstpath': dstpath, 'mode': mode, 'explicit_dst_path': True })
+        # Convert source filename relative to root directory of target FS.
+        dstpath = os.path.join(rootpathdst,
+                               os.path.relpath(fullname, rootpathsrc))
+        new_data_files.append({'srcpath': fullname, 'dstpath': dstpath,
+                               'mode': mode, 'explicit_dst_path': True})
       elif DEBUG:
-        print('Skipping file "' + fullname + '" from inclusion in the emscripten virtual file system.', file=sys.stderr)
+        print('Skipping file "%s" from inclusion in the emscripten '
+              'virtual file system.' % fullname, file=sys.stderr)
     del dirnames[:]
     dirnames.extend(new_dirnames)
 
+
 new_data_files = []
 for file_ in data_files:
   if not should_ignore(file_['srcpath']):
@@ -272,43 +311,67 @@ for file_ in data_files:
       add(file_['mode'], file_['srcpath'], file_['dstpath'])
     else:
       new_data_files.append(file_)
-data_files = [file_ for file_ in new_data_files if not os.path.isdir(file_['srcpath'])]
+data_files = [file_ for file_ in new_data_files
+              if not os.path.isdir(file_['srcpath'])]
 if len(data_files) == 0:
   print('Nothing to do!', file=sys.stderr)
   sys.exit(1)
 
 # Absolutize paths, and check that they make sense
-curr_abspath = os.path.abspath(os.getcwd()) # os.getcwd() always returns the hard path with any symbolic links resolved, even if we cd'd into a symbolic link.
+# os.getcwd() always returns the hard path with any symbolic links resolved,
+# even if we cd'd into a symbolic link.
+curr_abspath = os.path.abspath(os.getcwd())
 
 for file_ in data_files:
   if not file_['explicit_dst_path']:
-    # This file was not defined with src@dst, so we inferred the destination from the source. In that case,
-    # we require that the destination not be under the current location
+    # This file was not defined with src@dst, so we inferred the destination
+    # from the source. In that case, we require that the destination not be
+    # under the current location
     path = file_['dstpath']
-    abspath = os.path.realpath(os.path.abspath(path)) # Use os.path.realpath to resolve any symbolic links to hard paths, to match the structure in curr_abspath.
-    if DEBUG: print(path, abspath, curr_abspath, file=sys.stderr)
+    # Use os.path.realpath to resolve any symbolic links to hard paths,
+    # to match the structure in curr_abspath.
+    abspath = os.path.realpath(os.path.abspath(path))
+    if DEBUG:
+        print(path, abspath, curr_abspath, file=sys.stderr)
     if not abspath.startswith(curr_abspath):
-      print('Error: Embedding "%s" which is below the current directory "%s". This is invalid since the current directory becomes the root that the generated code will see' % (path, curr_abspath), file=sys.stderr)
+      print('Error: Embedding "%s" which is below the current directory '
+            '"%s". This is invalid since the current directory becomes the '
+            'root that the generated code will see' % (path, curr_abspath),
+            file=sys.stderr)
       sys.exit(1)
-    file_['dstpath'] = abspath[len(curr_abspath)+1:]
+    file_['dstpath'] = abspath[len(curr_abspath) + 1:]
     if os.path.isabs(path):
-      print('Warning: Embedding an absolute file/directory name "' + path + '" to the virtual filesystem. The file will be made available in the relative path "' + file_['dstpath'] + '". You can use the explicit syntax --preload-file srcpath@dstpath to explicitly specify the target location the absolute source path should be directed to.', file=sys.stderr)
+      print('Warning: Embedding an absolute file/directory name "%s" to the '
+            'virtual filesystem. The file will be made available in the '
+            'relative path "%s". You can use the explicit syntax '
+            '--preload-file srcpath@dstpath to explicitly specify the target '
+            'location the absolute source path should be directed to.'
+            % (path, file_['dstpath']), file=sys.stderr)
 
 for file_ in data_files:
-  file_['dstpath'] = file_['dstpath'].replace(os.path.sep, '/') # name in the filesystem, native and emulated
-  if file_['dstpath'].endswith('/'): # If user has submitted a directory name as the destination but omitted the destination filename, use the filename from source file
+  # name in the filesystem, native and emulated
+  file_['dstpath'] = file_['dstpath'].replace(os.path.sep, '/')
+  # If user has submitted a directory name as the destination but omitted
+  # the destination filename, use the filename from source file
+  if file_['dstpath'].endswith('/'):
     file_['dstpath'] = file_['dstpath'] + os.path.basename(file_['srcpath'])
   # make destination path always relative to the root
   file_['dstpath'] = posixpath.normpath(os.path.join('/', file_['dstpath']))
   if DEBUG:
-    print('Packaging file "' + file_['srcpath'] + '" to VFS in path "' + file_['dstpath'] + '".', file=sys.stderr)
+    print('Packaging file "%s" to VFS in path "%s".'
+          % (file_['srcpath'],  file_['dstpath']), file=sys.stderr)
 
 # Remove duplicates (can occur naively, for example preload dir/, preload dir/subdir/)
 seen = {}
+
+
 def was_seen(name):
-  if seen.get(name): return True
+  if seen.get(name):
+      return True
   seen[name] = 1
   return False
+
+
 data_files = [file_ for file_ in data_files if not was_seen(file_['dstpath'])]
 
 if AV_WORKAROUND:
@@ -329,27 +392,32 @@ for file_ in data_files:
   if dirname != '':
     parts = dirname.split('/')
     for i in range(len(parts)):
-      partial = '/'.join(parts[:i+1])
+      partial = '/'.join(parts[:i + 1])
       if partial not in partial_dirs:
-        code += '''Module['FS_createPath']('/%s', '%s', true, true);\n''' % ('/'.join(parts[:i]), parts[i])
+        code += ('''Module['FS_createPath']('/%s', '%s', true, true);\n'''
+                 % ('/'.join(parts[:i]), parts[i]))
         partial_dirs.append(partial)
 
 if has_preloaded:
-  # Bundle all datafiles into one archive. Avoids doing lots of simultaneous XHRs which has overhead.
+  # Bundle all datafiles into one archive. Avoids doing lots of simultaneous
+  # XHRs which has overhead.
   data = open(data_target, 'wb')
   start = 0
   for file_ in data_files:
     file_['data_start'] = start
     curr = open(file_['srcpath'], 'rb').read()
     file_['data_end'] = start + len(curr)
-    if AV_WORKAROUND: curr += '\x00'
-    #print >> sys.stderr, 'bundling', file_['srcpath'], file_['dstpath'], file_['data_start'], file_['data_end']
+    if AV_WORKAROUND:
+        curr += '\x00'
     start += len(curr)
     data.write(curr)
   data.close()
   # TODO: sha256sum on data_target
-  if start > 256*1024*1024:
-    print('warning: file packager is creating an asset bundle of %d MB. this is very large, and browsers might have trouble loading it. see https://hacks.mozilla.org/2015/02/synchronous-execution-and-filesystem-access-in-emscripten/' % (start/(1024*1024)), file=sys.stderr)
+  if start > 256 * 1024 * 1024:
+    print('warning: file packager is creating an asset bundle of %d MB. '
+          'this is very large, and browsers might have trouble loading it. '
+          'see https://hacks.mozilla.org/2015/02/synchronous-execution-and-filesystem-access-in-emscripten/'
+          % (start / (1024 * 1024)), file=sys.stderr)
 
   create_preloaded = '''
         Module['FS_createPreloadedFile'](this.name, null, byteArray, true, true, function() {
@@ -367,7 +435,8 @@ if has_preloaded:
         Module['removeRunDependency']('fp ' + that.name);
 '''
 
-  # Data requests - for getting a block of data out of the big archive - have a similar API to XHRs
+  # Data requests - for getting a block of data out of the big archive - have
+  # a similar API to XHRs
   code += '''
     function DataRequest(start, end, audio) {
       this.start = start;
@@ -414,10 +483,12 @@ for file_ in data_files:
       chunk_size = 10240
       start = 0
       while start < len(data):
-        parts.append('''fileData%d.push.apply(fileData%d, %s);\n''' % (counter, counter, str(data[start:start+chunk_size])))
+        parts.append('''fileData%d.push.apply(fileData%d, %s);\n'''
+                     % (counter, counter, str(data[start:start + chunk_size])))
         start += chunk_size
       code += ''.join(parts)
-    code += '''Module['FS_createDataFile']('%s', '%s', fileData%d, true, true, false);\n''' % (dirname, basename, counter)
+    code += ('''Module['FS_createDataFile']('%s', '%s', fileData%d, true, true, false);\n'''
+             % (dirname, basename, counter))
     counter += 1
   elif file_['mode'] == 'preload':
     # Preload
@@ -439,7 +510,6 @@ if has_preloaded:
       use_data = '''
         // copy the entire loaded file into a spot in the heap. Files will refer to slices in that. They cannot be freed though
         // (we may be allocating before malloc is ready, during startup).
-        if (Module['SPLIT_MEMORY']) err('warning: you should run the file packager with --no-heap-copy when SPLIT_MEMORY is used, otherwise copying into the heap may fail due to the splitting');
         var ptr = Module['getMemory'](byteArray.length);
         Module['HEAPU8'].set(byteArray, ptr);
         DataRequest.prototype.byteArray = Module['HEAPU8'].subarray(ptr, ptr+byteArray.length);
@@ -455,13 +525,17 @@ if has_preloaded:
             DataRequest.prototype.requests[files[i].filename].onload();
           }
     '''
-    use_data += "          Module['removeRunDependency']('datafile_%s');\n" % shared.JS.escape_for_js_string(data_target)
+    use_data += ("          Module['removeRunDependency']('datafile_%s');\n"
+                 % shared.JS.escape_for_js_string(data_target))
 
   else:
     # LZ4FS usage
     temp = data_target + '.orig'
     shutil.move(data_target, temp)
-    meta = run_js(shared.path_from_root('tools', 'lz4-compress.js'), shared.NODE_JS, [shared.path_from_root('src', 'mini-lz4.js'), temp, data_target], stdout=PIPE)
+    meta = run_js(shared.path_from_root('tools', 'lz4-compress.js'),
+                  shared.NODE_JS,
+                  [shared.path_from_root('src', 'mini-lz4.js'),
+                   temp, data_target], stdout=PIPE)
     os.unlink(temp)
     use_data = '''
           var compressedData = %s;
@@ -473,8 +547,7 @@ if has_preloaded:
 
   package_uuid = uuid.uuid4()
   package_name = data_target
-  statinfo = os.stat(package_name)
-  remote_package_size = statinfo.st_size
+  remote_package_size = os.path.getsize(package_name)
   remote_package_name = os.path.basename(package_name)
   ret += r'''
     var PACKAGE_PATH;
@@ -493,7 +566,8 @@ if has_preloaded:
       err('warning: you defined Module.locateFilePackage, that has been renamed to Module.locateFile (using your locateFilePackage for now)');
     }
     var REMOTE_PACKAGE_NAME = Module['locateFile'] ? Module['locateFile'](REMOTE_PACKAGE_BASE, '') : REMOTE_PACKAGE_BASE;
-  ''' % (shared.JS.escape_for_js_string(data_target), shared.JS.escape_for_js_string(remote_package_name))
+  ''' % (shared.JS.escape_for_js_string(data_target),
+         shared.JS.escape_for_js_string(remote_package_name))
   metadata['remote_package_size'] = remote_package_size
   metadata['package_uuid'] = str(package_uuid)
   ret += '''
@@ -538,59 +612,122 @@ if has_preloaded:
         };
       };
 
+      // This is needed as chromium has a limit on per-entry files in IndexedDB
+      // https://cs.chromium.org/chromium/src/content/renderer/indexed_db/webidbdatabase_impl.cc?type=cs&sq=package:chromium&g=0&l=177
+      // https://cs.chromium.org/chromium/src/out/Debug/gen/third_party/blink/public/mojom/indexeddb/indexeddb.mojom.h?type=cs&sq=package:chromium&g=0&l=60
+      // We set the chunk size to 64MB to stay well-below the limit
+      var CHUNK_SIZE = 64 * 1024 * 1024;
+
+      function cacheRemotePackage(
+        db,
+        packageName,
+        packageData,
+        packageMeta,
+        callback,
+        errback
+      ) {
+        var transactionPackages = db.transaction([PACKAGE_STORE_NAME], IDB_RW);
+        var packages = transactionPackages.objectStore(PACKAGE_STORE_NAME);
+        var chunkSliceStart = 0;
+        var nextChunkSliceStart = 0;
+        var chunkCount = Math.ceil(packageData.byteLength / CHUNK_SIZE);
+        var finishedChunks = 0;
+        for (var chunkId = 0; chunkId < chunkCount; chunkId++) {
+          nextChunkSliceStart += CHUNK_SIZE;
+          var putPackageRequest = packages.put(
+            packageData.slice(chunkSliceStart, nextChunkSliceStart),
+            'package/' + packageName + '/' + chunkId
+          );
+          chunkSliceStart = nextChunkSliceStart;
+          putPackageRequest.onsuccess = function(event) {
+            finishedChunks++;
+            if (finishedChunks == chunkCount) {
+              var transaction_metadata = db.transaction(
+                [METADATA_STORE_NAME],
+                IDB_RW
+              );
+              var metadata = transaction_metadata.objectStore(METADATA_STORE_NAME);
+              var putMetadataRequest = metadata.put(
+                {
+                  uuid: packageMeta.uuid,
+                  chunkCount: chunkCount
+                },
+                'metadata/' + packageName
+              );
+              putMetadataRequest.onsuccess = function(event) {
+                callback(packageData);
+              };
+              putMetadataRequest.onerror = function(error) {
+                errback(error);
+              };
+            }
+          };
+          putPackageRequest.onerror = function(error) {
+            errback(error);
+          };
+        }
+      }
+
       /* Check if there's a cached package, and if so whether it's the latest available */
       function checkCachedPackage(db, packageName, callback, errback) {
         var transaction = db.transaction([METADATA_STORE_NAME], IDB_RO);
         var metadata = transaction.objectStore(METADATA_STORE_NAME);
-
-        var getRequest = metadata.get("metadata/" + packageName);
+        var getRequest = metadata.get('metadata/' + packageName);
         getRequest.onsuccess = function(event) {
           var result = event.target.result;
           if (!result) {
-            return callback(false);
+            return callback(false, null);
           } else {
-            return callback(PACKAGE_UUID === result.uuid);
+            return callback(PACKAGE_UUID === result.uuid, result);
           }
         };
         getRequest.onerror = function(error) {
           errback(error);
         };
-      };
+      }
 
-      function fetchCachedPackage(db, packageName, callback, errback) {
+      function fetchCachedPackage(db, packageName, metadata, callback, errback) {
         var transaction = db.transaction([PACKAGE_STORE_NAME], IDB_RO);
         var packages = transaction.objectStore(PACKAGE_STORE_NAME);
 
-        var getRequest = packages.get("package/" + packageName);
-        getRequest.onsuccess = function(event) {
-          var result = event.target.result;
-          callback(result);
-        };
-        getRequest.onerror = function(error) {
-          errback(error);
-        };
-      };
+        var chunksDone = 0;
+        var totalSize = 0;
+        var chunks = new Array(metadata.chunkCount);
 
-      function cacheRemotePackage(db, packageName, packageData, packageMeta, callback, errback) {
-        var transaction_packages = db.transaction([PACKAGE_STORE_NAME], IDB_RW);
-        var packages = transaction_packages.objectStore(PACKAGE_STORE_NAME);
-
-        var putPackageRequest = packages.put(packageData, "package/" + packageName);
-        putPackageRequest.onsuccess = function(event) {
-          var transaction_metadata = db.transaction([METADATA_STORE_NAME], IDB_RW);
-          var metadata = transaction_metadata.objectStore(METADATA_STORE_NAME);
-          var putMetadataRequest = metadata.put(packageMeta, "metadata/" + packageName);
-          putMetadataRequest.onsuccess = function(event) {
-            callback(packageData);
+        for (var chunkId = 0; chunkId < metadata.chunkCount; chunkId++) {
+          var getRequest = packages.get('package/' + packageName + '/' + chunkId);
+          getRequest.onsuccess = function(event) {
+            // If there's only 1 chunk, there's nothing to concatenate it with so we can just return it now
+            if (metadata.chunkCount == 1) {
+              callback(event.target.result);
+            } else {
+              chunksDone++;
+              totalSize += event.target.result.byteLength;
+              chunks.push(event.target.result);
+              if (chunksDone == metadata.chunkCount) {
+                if (chunksDone == 1) {
+                  callback(event.target.result);
+                } else {
+                  var tempTyped = new Uint8Array(totalSize);
+                  var byteOffset = 0;
+                  for (var chunkId in chunks) {
+                    var buffer = chunks[chunkId];
+                    tempTyped.set(new Uint8Array(buffer), byteOffset);
+                    byteOffset += buffer.byteLength;
+                    buffer = undefined;
+                  }
+                  chunks = undefined;
+                  callback(tempTyped.buffer);
+                  tempTyped = undefined;
+                }
+              }
+            }
           };
-          putMetadataRequest.onerror = function(error) {
+          getRequest.onerror = function(error) {
             errback(error);
           };
-        };
-        putPackageRequest.onerror = function(error) {
-          errback(error);
-        };
-      };
+        }
+      }
     '''
 
   ret += r'''
@@ -657,7 +794,9 @@ if has_preloaded:
       %s
     };
     Module['addRunDependency']('datafile_%s');
-  ''' % (use_data, shared.JS.escape_for_js_string(data_target)) # use basename because from the browser's point of view, we need to find the datafile in the same dir as the html file
+  ''' % (use_data, shared.JS.escape_for_js_string(data_target))
+  # use basename because from the browser's point of view,
+  # we need to find the datafile in the same dir as the html file
 
   code += r'''
     if (!Module.preloadResults) Module.preloadResults = {};
@@ -674,11 +813,11 @@ if has_preloaded:
       openDatabase(
         function(db) {
           checkCachedPackage(db, PACKAGE_PATH + PACKAGE_NAME,
-            function(useCached) {
+            function(useCached, metadata) {
               Module.preloadResults[PACKAGE_NAME] = {fromCache: useCached};
               if (useCached) {
                 console.info('loading ' + PACKAGE_NAME + ' from cache');
-                fetchCachedPackage(db, PACKAGE_PATH + PACKAGE_NAME, processPackageData, preloadFallback);
+                fetchCachedPackage(db, PACKAGE_PATH + PACKAGE_NAME, metadata, processPackageData, preloadFallback);
               } else {
                 console.info('loading ' + PACKAGE_NAME + ' from remote');
                 fetchRemotePackage(REMOTE_PACKAGE_NAME, REMOTE_PACKAGE_SIZE,
@@ -699,8 +838,10 @@ if has_preloaded:
       if (Module['setStatus']) Module['setStatus']('Downloading...');
     '''
   else:
-    # Not using preload cache, so we might as well start the xhr ASAP, potentially before JS parsing of the main codebase if it's after us.
-    # Only tricky bit is the fetch is async, but also when runWithFS is called is async, so we handle both orderings.
+    # Not using preload cache, so we might as well start the xhr ASAP,
+    # potentially before JS parsing of the main codebase if it's after us.
+    # Only tricky bit is the fetch is async, but also when runWithFS is called
+    # is async, so we handle both orderings.
     ret += r'''
       var fetchedCallback = null;
       var fetched = Module['getPreloadedPackage'] ? Module['getPreloadedPackage'](REMOTE_PACKAGE_NAME, REMOTE_PACKAGE_SIZE) : null;
@@ -739,9 +880,8 @@ ret += '''
   }
 '''
 
-ret += '''%s
-})();
-''' % ('''
+if separate_metadata:
+    _metadata_template = '''
   Module['removeRunDependency']('%(metadata_file)s');
  }
 
@@ -765,16 +905,26 @@ ret += '''%s
   if (!Module['preRun']) Module['preRun'] = [];
   Module["preRun"].push(runMetaWithFS);
  }
-''' % {'metadata_file': os.path.basename(jsoutput + '.metadata')} if separate_metadata else '''
+''' % {'metadata_file': os.path.basename(jsoutput + '.metadata')}
+
+else:
+    _metadata_template = '''
  }
  loadPackage(%s);
-''' % json.dumps(metadata))
+''' % json.dumps(metadata)
+
+ret += '''%s
+})();
+''' % _metadata_template
+
 
 if force or len(data_files):
-  if jsoutput == None:
+  if jsoutput is None:
     print(ret)
   else:
-    # Overwrite the old jsoutput file (if exists) only when its content differs from the current generated one, otherwise leave the file untouched preserving its old timestamp
+    # Overwrite the old jsoutput file (if exists) only when its content
+    # differs from the current generated one, otherwise leave the file
+    # untouched preserving its old timestamp
     if os.path.isfile(jsoutput):
       f = open(jsoutput, 'r+')
       old = f.read()