diff --git a/src/js/load-pyodide.js b/src/js/load-pyodide.js index e3d01e4f1..8810f9a2e 100644 --- a/src/js/load-pyodide.js +++ b/src/js/load-pyodide.js @@ -1,5 +1,10 @@ import { Module } from "./module.js"; +// +// Initialization code and node/browser shims +// + +// Detect if we're in node const IN_NODE = typeof process !== "undefined" && process.release && @@ -7,10 +12,33 @@ const IN_NODE = typeof process.browser === "undefined"; /* This last condition checks if we run the browser shim of process */ +let nodePathMod; +let nodeFetch; +let nodeFsPromisesMod; +let nodeVmMod; + +/** + * If we're in node, it's most convenient to import various node modules on + * initialization. Otherwise, this does nothing. + * @private + */ +export async function initNodeModules() { + if (!IN_NODE) { + return; + } + nodePathMod = (await import(/* webpackIgnore: true */ "path")).default; + nodeFsPromisesMod = await import(/* webpackIgnore: true */ "fs/promises"); + nodeFetch = (await import(/* webpackIgnore: true */ "node-fetch")).default; + nodeVmMod = (await import(/* webpackIgnore: true */ "vm")).default; +} + /** @typedef {import('./pyproxy.js').PyProxy} PyProxy */ /** @private */ let baseURL; /** + * Initialize the packages index. This is called as early as possible in + * loadPyodide so that fetching packages.json can occur in parallel with other + * operations. * @param {string} indexURL * @private */ @@ -18,8 +46,7 @@ export async function initializePackageIndex(indexURL) { baseURL = indexURL; let package_json; if (IN_NODE) { - const fsPromises = await import(/* webpackIgnore: true */ "fs/promises"); - const package_string = await fsPromises.readFile( + const package_string = await nodeFsPromisesMod.readFile( `${indexURL}packages.json` ); package_json = JSON.parse(package_string); @@ -43,19 +70,94 @@ export async function initializePackageIndex(indexURL) { } } -export async function _fetchBinaryFile(indexURL, path) { - if (IN_NODE) { - const fsPromises = await import(/* webpackIgnore: true */ "fs/promises"); - const tar_buffer = await fsPromises.readFile(`${indexURL}${path}`); - return tar_buffer.buffer; - } else { - let response = await fetch(`${indexURL}${path}`); +/** + * Load a binary file, only for use in Node. If the path explicitly is a URL, + * then fetch from a URL, else load from the file system. + * @param {str} indexURL base path to resolve relative paths + * @param {str} path the path to load + * @returns An ArrayBuffer containing the binary data + * @private + */ +async function node_loadBinaryFile(indexURL, path) { + if (path.includes("://")) { + let response = await nodeFetch(path); + if (!response.ok) { + throw new Error(`Failed to load '${path}': request failed.`); + } return await response.arrayBuffer(); + } else { + const data = await nodeFsPromisesMod.readFile(`${indexURL}${path}`); + return new Uint8Array(data.buffer, data.byteOffset, data.byteLength); } } -//////////////////////////////////////////////////////////// -// Package loading +/** + * Load a binary file, only for use in browser. Resolves relative paths against + * indexURL. + * + * @param {str} indexURL base path to resolve relative paths + * @param {str} path the path to load + * @returns An ArrayBuffer containing the binary data + * @private + */ +async function browser_loadBinaryFile(indexURL, path) { + const base = new URL(indexURL, location); + const url = new URL(path, base); + let response = await fetch(url); + if (!response.ok) { + throw new Error(`Failed to load '${url}': request failed.`); + } + return new Uint8Array(await response.arrayBuffer()); +} + +export let _loadBinaryFile; +if (IN_NODE) { + _loadBinaryFile = node_loadBinaryFile; +} else { + _loadBinaryFile = browser_loadBinaryFile; +} + +/** + * Load a text file and executes it as Javascript + * @param {str} url The path to load. May be a url or a relative file system path. + * @private + */ +async function nodeLoadScript(url) { + if (url.includes("://")) { + // If it's a url, load it with fetch then eval it. + nodeVmMod.runInThisContext(await (await nodeFetch(url)).text()); + } else { + // Otherwise, hopefully it is a relative path we can load from the file + // system. + await import(nodePathMod.resolve(url)); + } +} + +/** + * @param {string) url + * @async + * @private + */ +export let loadScript; + +if (globalThis.document) { + // browser + loadScript = async (url) => await import(/* webpackIgnore: true */ url); +} else if (globalThis.importScripts) { + // webworker + loadScript = async (url) => { + // This is async only for consistency + globalThis.importScripts(url); + }; +} else if (IN_NODE) { + loadScript = nodeLoadScript; +} else { + throw new Error("Cannot determine runtime environment"); +} + +// +// Dependency resolution +// const DEFAULT_CHANNEL = "default channel"; // Regexp for validating package name and URI @@ -69,71 +171,54 @@ function _uri_to_package_name(package_uri) { } /** - * @param {string) url - * @async + * Recursively add a package and its dependencies to toLoad and toLoadShared. + * A helper function for recursiveDependencies. + * @param {str} name The package to add + * @param {Set} toLoad The set of names of packages to load + * @param {Set} toLoadShared The set of names of shared libraries to load * @private */ -export let loadScript; -if (globalThis.document) { - // browser - loadScript = async (url) => await import(/* webpackIgnore: true */ url); -} else if (globalThis.importScripts) { - // webworker - loadScript = async (url) => { - // This is async only for consistency - globalThis.importScripts(url); - }; -} else if (IN_NODE) { - const pathPromise = import(/* webpackIgnore: true */ "path").then( - (M) => M.default - ); - const fetchPromise = import("node-fetch").then((M) => M.default); - const vmPromise = import(/* webpackIgnore: true */ "vm").then( - (M) => M.default - ); - loadScript = async (url) => { - if (url.includes("://")) { - // If it's a url, have to load it with fetch and then eval it. - const fetch = await fetchPromise; - const vm = await vmPromise; - vm.runInThisContext(await (await fetch(url)).text()); - } else { - // Otherwise, hopefully it is a relative path we can load from the file - // system. - const path = await pathPromise; - await import(path.resolve(url)); - } - }; -} else { - throw new Error("Cannot determine runtime environment"); -} - -function addPackageToLoad(name, toLoad) { +function addPackageToLoad(name, toLoad, toLoadShared) { name = name.toLowerCase(); if (toLoad.has(name)) { return; } - toLoad.set(name, DEFAULT_CHANNEL); + const pkg_info = Module.packages[name]; + if (!pkg_info) { + throw new Error(`No known package with name '${name}'`); + } + if (pkg_info.shared_library) { + toLoadShared.set(name, DEFAULT_CHANNEL); + } else { + toLoad.set(name, DEFAULT_CHANNEL); + } // If the package is already loaded, we don't add dependencies, but warn // the user later. This is especially important if the loaded package is // from a custom url, in which case adding dependencies is wrong. if (loadedPackages[name] !== undefined) { return; } - for (let dep_name of Module.packages[name].depends) { - addPackageToLoad(dep_name, toLoad); + for (let dep_name of pkg_info.depends) { + addPackageToLoad(dep_name, toLoad, toLoadShared); } } -function recursiveDependencies( - names, - _messageCallback, - errorCallback, - sharedLibsOnly -) { +/** + * Calculate the dependencies of a set of packages + * @param names The list of names whose dependencies we need to calculate. + * @returns Two sets, the set of normal dependencies and the set of shared + * dependencies + * @private + */ +function recursiveDependencies(names, errorCallback) { const toLoad = new Map(); + const toLoadShared = new Map(); for (let name of names) { const pkgname = _uri_to_package_name(name); + if (pkgname === undefined) { + addPackageToLoad(name.toLowerCase(), toLoad, toLoadShared); + continue; + } if (toLoad.has(pkgname) && toLoad.get(pkgname) !== name) { errorCallback( `Loading same package ${pkgname} from ${name} and ${toLoad.get( @@ -142,28 +227,9 @@ function recursiveDependencies( ); continue; } - if (pkgname !== undefined) { - toLoad.set(pkgname, name); - continue; - } - name = name.toLowerCase(); - if (name in Module.packages) { - addPackageToLoad(name, toLoad); - continue; - } - errorCallback(`Skipping unknown package '${name}'`); + toLoad.set(pkgname, name); } - if (sharedLibsOnly) { - let onlySharedLibs = new Map(); - for (let c of toLoad) { - let name = c[0]; - if (Module.packages[name].shared_library) { - onlySharedLibs.set(name, toLoad.get(name)); - } - } - return onlySharedLibs; - } - return toLoad; + return [toLoad, toLoadShared]; } // locateFile is the function used by the .js file to locate the .data file @@ -201,110 +267,6 @@ function waitRunDependency() { return promise; } -async function _loadPackage(names, messageCallback, errorCallback) { - // toLoad is a map pkg_name => pkg_uri - let toLoad = recursiveDependencies(names, messageCallback, errorCallback); - // Tell Module.locateFile about the packages we're loading - Module.locateFile_packagesToLoad = toLoad; - if (toLoad.size === 0) { - return Promise.resolve("No new packages to load"); - } else { - let packageNames = Array.from(toLoad.keys()).join(", "); - messageCallback(`Loading ${packageNames}`); - } - - // This is a collection of promises that resolve when the package's JS file is - // loaded. The promises already handle error and never fail. - let scriptPromises = []; - - for (let [pkg, uri] of toLoad) { - let loaded = loadedPackages[pkg]; - if (loaded !== undefined) { - // If uri is from the DEFAULT_CHANNEL, we assume it was added as a - // depedency, which was previously overridden. - if (loaded === uri || uri === DEFAULT_CHANNEL) { - messageCallback(`${pkg} already loaded from ${loaded}`); - continue; - } else { - errorCallback( - `URI mismatch, attempting to load package ${pkg} from ${uri} ` + - `while it is already loaded from ${loaded}. To override a dependency, ` + - `load the custom package first.` - ); - continue; - } - } - let pkgname = (Module.packages[pkg] && Module.packages[pkg].name) || pkg; - let scriptSrc = uri === DEFAULT_CHANNEL ? `${baseURL}${pkgname}.js` : uri; - messageCallback(`Loading ${pkg} from ${scriptSrc}`); - scriptPromises.push( - loadScript(scriptSrc).catch((e) => { - errorCallback(`Couldn't load package from URL ${scriptSrc}`, e); - toLoad.delete(pkg); - }) - ); - } - - // We must start waiting for runDependencies *after* all the JS files are - // loaded, since the number of runDependencies may happen to equal zero - // between package files loading. - try { - await Promise.all(scriptPromises).then(waitRunDependency); - } finally { - delete Module.monitorRunDependencies; - } - - let packageList = []; - for (let [pkg, uri] of toLoad) { - loadedPackages[pkg] = uri; - packageList.push(pkg); - } - - let resolveMsg; - if (packageList.length > 0) { - let packageNames = packageList.join(", "); - resolveMsg = `Loaded ${packageNames}`; - } else { - resolveMsg = "No packages loaded"; - } - - Module.reportUndefinedSymbols(); - - messageCallback(resolveMsg); - - // We have to invalidate Python's import caches, or it won't - // see the new files. - Module.importlib.invalidate_caches(); -} - -// This is a promise that is resolved iff there are no pending package loads. It -// never fails. -let _package_lock = Promise.resolve(); - -/** - * An async lock for package loading. Prevents race conditions in loadPackage. - * @returns A zero argument function that releases the lock. - * @private - */ -async function acquirePackageLock() { - let old_lock = _package_lock; - let releaseLock; - _package_lock = new Promise((resolve) => (releaseLock = resolve)); - await old_lock; - return releaseLock; -} - -/** - * - * The list of packages that Pyodide has loaded. - * Use ``Object.keys(pyodide.loadedPackages)`` to get the list of names of - * loaded packages, and ``pyodide.loadedPackages[package_name]`` to access - * install location for a particular ``package_name``. - * - * @type {object} - */ -export let loadedPackages = {}; - let sharedLibraryWasmPlugin; let origWasmPlugin; let wasmPluginIndex; @@ -357,6 +319,31 @@ function restoreOrigWasmPlugin() { * @private */ +/** + * @returns A new asynchronous lock + * @private + */ +function createLock() { + // This is a promise that is resolved when the lock is open, not resolved when lock is held. + let _lock = Promise.resolve(); + + /** + * Acquire the async lock + * @returns A zero argument function that releases the lock. + * @private + */ + async function acquireLock() { + let old_lock = _lock; + let releaseLock; + _lock = new Promise((resolve) => (releaseLock = resolve)); + await old_lock; + return releaseLock; + } + return acquireLock; +} + +const acquirePackageLock = createLock(); + /** * Load a package or a list of packages over the network. This installs the * package in the virtual filesystem. The package needs to be imported from @@ -375,52 +362,135 @@ function restoreOrigWasmPlugin() { * @async */ export async function loadPackage(names, messageCallback, errorCallback) { + messageCallback = messageCallback || console.log; + errorCallback = errorCallback || console.error; if (Module.isPyProxy(names)) { - let temp; - try { - temp = names.toJs(); - } finally { - names.destroy(); - } - names = temp; + names = names.toJs(); } - if (!Array.isArray(names)) { names = [names]; } - // get shared library packages and load those first - // otherwise bad things happen with linking them in firefox. - let sharedLibraryNames = []; - try { - let sharedLibraryPackagesToLoad = recursiveDependencies( - names, - messageCallback, - errorCallback, - true - ); - for (let pkg of sharedLibraryPackagesToLoad) { - sharedLibraryNames.push(pkg[0]); - } - } catch (e) { - // do nothing - let the main load throw any errors + + const [toLoad, toLoadShared] = recursiveDependencies(names, errorCallback); + if (toLoad.size === 0 && toLoadShared.size === 0) { + messageCallback("No new packages to load"); + return; } let releaseLock = await acquirePackageLock(); + for (let [pkg, uri] of [...toLoad, ...toLoadShared]) { + let loaded = loadedPackages[pkg]; + if (loaded === undefined) { + continue; + } + toLoad.delete(pkg); + toLoadShared.delete(pkg); + // If uri is from the DEFAULT_CHANNEL, we assume it was added as a + // depedency, which was previously overridden. + if (loaded === uri || uri === DEFAULT_CHANNEL) { + messageCallback(`${pkg} already loaded from ${loaded}`); + } else { + errorCallback( + `URI mismatch, attempting to load package ${pkg} from ${uri} ` + + `while it is already loaded from ${loaded}. To override a dependency, ` + + `load the custom package first.` + ); + } + } + + const packageNames = [...toLoad.keys(), ...toLoadShared.keys()].join(", "); try { + messageCallback(`Loading ${packageNames}`); + let scriptPromises = []; + const loaded = []; + const failed = {}; + useSharedLibraryWasmPlugin(); - await _loadPackage( - sharedLibraryNames, - messageCallback || console.log, - errorCallback || console.error - ); + Module.locateFile_packagesToLoad = toLoadShared; + for (const [pkg, uri] of toLoadShared) { + const pkgname = + (Module.packages[pkg] && Module.packages[pkg].name) || pkg; + const scriptSrc = + uri === DEFAULT_CHANNEL ? `${baseURL}${pkgname}.js` : uri; + messageCallback(`Loading ${pkg} from ${scriptSrc}`); + scriptPromises.push( + loadScript(scriptSrc) + .then(() => { + loaded.push(pkg); + loadedPackages[pkg] = uri; + }) + .catch((e) => { + failed[pkg] = e; + }) + ); + } + + // We must start waiting for runDependencies *after* all the JS files are + // loaded, since the number of runDependencies may happen to equal zero + // between package files loading. + try { + await Promise.all(scriptPromises).then(waitRunDependency); + } finally { + delete Module.monitorRunDependencies; + } restoreOrigWasmPlugin(); - await _loadPackage( - names, - messageCallback || console.log, - errorCallback || console.error - ); + + scriptPromises = []; + Module.locateFile_packagesToLoad = toLoad; + for (const [pkg, uri] of toLoad) { + const pkgname = + (Module.packages[pkg] && Module.packages[pkg].name) || pkg; + const scriptSrc = + uri === DEFAULT_CHANNEL ? `${baseURL}${pkgname}.js` : uri; + messageCallback(`Loading ${pkg} from ${scriptSrc}`); + scriptPromises.push( + loadScript(scriptSrc) + .then(() => { + loaded.push(pkg); + loadedPackages[pkg] = uri; + }) + .catch((e) => { + failed[pkg] = e; + }) + ); + } + + try { + await Promise.all(scriptPromises).then(waitRunDependency); + } finally { + delete Module.monitorRunDependencies; + } + + Module.reportUndefinedSymbols(); + if (loaded.length > 0) { + const successNames = loaded.join(", "); + messageCallback(`Loaded ${successNames}`); + } + if (Object.keys(failed).length > 0) { + const failedNames = Object.keys(failed).join(", "); + messageCallback(`Failed to load ${failedNames}`); + for (let [name, err] of Object.entries(failed)) { + console.warn(`The following error occurred while loading ${name}:`); + console.error(err); + } + } + + // We have to invalidate Python's import caches, or it won't + // see the new files. + Module.importlib.invalidate_caches(); } finally { restoreOrigWasmPlugin(); releaseLock(); } } + +/** + * + * The list of packages that Pyodide has loaded. + * Use ``Object.keys(pyodide.loadedPackages)`` to get the list of names of + * loaded packages, and ``pyodide.loadedPackages[package_name]`` to access + * install location for a particular ``package_name``. + * + * @type {object} + */ +export let loadedPackages = {}; diff --git a/src/js/pyodide.js b/src/js/pyodide.js index 4761f243c..48accd92d 100644 --- a/src/js/pyodide.js +++ b/src/js/pyodide.js @@ -5,8 +5,9 @@ import { Module, setStandardStreams, setHomeDirectory } from "./module.js"; import { loadScript, initializePackageIndex, - _fetchBinaryFile, + _loadBinaryFile, loadPackage, + initNodeModules, } from "./load-pyodide.js"; import { makePublicAPI, registerJsModule } from "./api.js"; import "./pyproxy.gen.js"; @@ -144,7 +145,7 @@ function unpackPyodidePy(pyodide_py_tar) { let stream = Module.FS.open(fileName, "w"); Module.FS.write( stream, - new Uint8Array(pyodide_py_tar), + pyodide_py_tar, 0, pyodide_py_tar.byteLength, undefined, @@ -263,8 +264,9 @@ export async function loadPyodide(config) { config.indexURL += "/"; } Module.indexURL = config.indexURL; + await initNodeModules(); let packageIndexReady = initializePackageIndex(config.indexURL); - let pyodide_py_tar_promise = _fetchBinaryFile( + let pyodide_py_tar_promise = _loadBinaryFile( config.indexURL, "pyodide_py.tar" ); diff --git a/src/tests/test_package_loading.py b/src/tests/test_package_loading.py index aaf9c6b2e..1f4f78543 100644 --- a/src/tests/test_package_loading.py +++ b/src/tests/test_package_loading.py @@ -74,13 +74,16 @@ def test_uri_mismatch(selenium_standalone): def test_invalid_package_name(selenium): - selenium.load_package("wrong name+$") - assert "Skipping unknown package" in selenium.logs - - selenium.clean_logs() - - selenium.load_package("tcp://some_url") - assert "Skipping unknown package" in selenium.logs + with pytest.raises( + selenium.JavascriptException, + match=r"No known package with name 'wrong name\+\$'", + ): + selenium.load_package("wrong name+$") + with pytest.raises( + selenium.JavascriptException, + match="No known package with name 'tcp://some_url'", + ): + selenium.load_package("tcp://some_url") @pytest.mark.parametrize( @@ -118,10 +121,12 @@ def test_load_handle_failure(selenium_standalone): selenium = selenium_standalone selenium.load_package("pytz") selenium.run("import pytz") - selenium.load_package("pytz2") + with pytest.raises( + selenium.JavascriptException, match="No known package with name 'pytz2'" + ): + selenium.load_package("pytz2") selenium.load_package("pyparsing") assert "Loading pytz" in selenium.logs - assert "Skipping unknown package 'pytz2'" in selenium.logs assert "Loading pyparsing" in selenium.logs @@ -130,7 +135,7 @@ def test_load_failure_retry(selenium_standalone): selenium = selenium_standalone selenium.load_package("http://invalidurl/pytz.js") assert selenium.logs.count("Loading pytz from") == 1 - assert selenium.logs.count("Couldn't load package from URL") == 1 + assert selenium.logs.count("The following error occurred while loading pytz:") == 1 assert selenium.run_js("return Object.keys(pyodide.loadedPackages)") == [] selenium.load_package("pytz")