MAINT Refactor of load-pyodide (reduces diff to wheels PR) (#2106)

This commit is contained in:
Hood Chatham 2022-01-13 14:52:06 -08:00 committed by GitHub
parent 3f5a6aceda
commit 5c1588d011
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 306 additions and 229 deletions

View File

@ -1,5 +1,10 @@
import { Module } from "./module.js";
//
// Initialization code and node/browser shims
//
// Detect if we're in node
const IN_NODE =
typeof process !== "undefined" &&
process.release &&
@ -7,10 +12,33 @@ const IN_NODE =
typeof process.browser ===
"undefined"; /* This last condition checks if we run the browser shim of process */
let nodePathMod;
let nodeFetch;
let nodeFsPromisesMod;
let nodeVmMod;
/**
* If we're in node, it's most convenient to import various node modules on
* initialization. Otherwise, this does nothing.
* @private
*/
export async function initNodeModules() {
if (!IN_NODE) {
return;
}
nodePathMod = (await import(/* webpackIgnore: true */ "path")).default;
nodeFsPromisesMod = await import(/* webpackIgnore: true */ "fs/promises");
nodeFetch = (await import(/* webpackIgnore: true */ "node-fetch")).default;
nodeVmMod = (await import(/* webpackIgnore: true */ "vm")).default;
}
/** @typedef {import('./pyproxy.js').PyProxy} PyProxy */
/** @private */
let baseURL;
/**
* Initialize the packages index. This is called as early as possible in
* loadPyodide so that fetching packages.json can occur in parallel with other
* operations.
* @param {string} indexURL
* @private
*/
@ -18,8 +46,7 @@ export async function initializePackageIndex(indexURL) {
baseURL = indexURL;
let package_json;
if (IN_NODE) {
const fsPromises = await import(/* webpackIgnore: true */ "fs/promises");
const package_string = await fsPromises.readFile(
const package_string = await nodeFsPromisesMod.readFile(
`${indexURL}packages.json`
);
package_json = JSON.parse(package_string);
@ -43,19 +70,94 @@ export async function initializePackageIndex(indexURL) {
}
}
export async function _fetchBinaryFile(indexURL, path) {
if (IN_NODE) {
const fsPromises = await import(/* webpackIgnore: true */ "fs/promises");
const tar_buffer = await fsPromises.readFile(`${indexURL}${path}`);
return tar_buffer.buffer;
} else {
let response = await fetch(`${indexURL}${path}`);
/**
* Load a binary file, only for use in Node. If the path explicitly is a URL,
* then fetch from a URL, else load from the file system.
* @param {str} indexURL base path to resolve relative paths
* @param {str} path the path to load
* @returns An ArrayBuffer containing the binary data
* @private
*/
async function node_loadBinaryFile(indexURL, path) {
if (path.includes("://")) {
let response = await nodeFetch(path);
if (!response.ok) {
throw new Error(`Failed to load '${path}': request failed.`);
}
return await response.arrayBuffer();
} else {
const data = await nodeFsPromisesMod.readFile(`${indexURL}${path}`);
return new Uint8Array(data.buffer, data.byteOffset, data.byteLength);
}
}
////////////////////////////////////////////////////////////
// Package loading
/**
* Load a binary file, only for use in browser. Resolves relative paths against
* indexURL.
*
* @param {str} indexURL base path to resolve relative paths
* @param {str} path the path to load
* @returns An ArrayBuffer containing the binary data
* @private
*/
async function browser_loadBinaryFile(indexURL, path) {
const base = new URL(indexURL, location);
const url = new URL(path, base);
let response = await fetch(url);
if (!response.ok) {
throw new Error(`Failed to load '${url}': request failed.`);
}
return new Uint8Array(await response.arrayBuffer());
}
export let _loadBinaryFile;
if (IN_NODE) {
_loadBinaryFile = node_loadBinaryFile;
} else {
_loadBinaryFile = browser_loadBinaryFile;
}
/**
* Load a text file and executes it as Javascript
* @param {str} url The path to load. May be a url or a relative file system path.
* @private
*/
async function nodeLoadScript(url) {
if (url.includes("://")) {
// If it's a url, load it with fetch then eval it.
nodeVmMod.runInThisContext(await (await nodeFetch(url)).text());
} else {
// Otherwise, hopefully it is a relative path we can load from the file
// system.
await import(nodePathMod.resolve(url));
}
}
/**
* @param {string) url
* @async
* @private
*/
export let loadScript;
if (globalThis.document) {
// browser
loadScript = async (url) => await import(/* webpackIgnore: true */ url);
} else if (globalThis.importScripts) {
// webworker
loadScript = async (url) => {
// This is async only for consistency
globalThis.importScripts(url);
};
} else if (IN_NODE) {
loadScript = nodeLoadScript;
} else {
throw new Error("Cannot determine runtime environment");
}
//
// Dependency resolution
//
const DEFAULT_CHANNEL = "default channel";
// Regexp for validating package name and URI
@ -69,71 +171,54 @@ function _uri_to_package_name(package_uri) {
}
/**
* @param {string) url
* @async
* Recursively add a package and its dependencies to toLoad and toLoadShared.
* A helper function for recursiveDependencies.
* @param {str} name The package to add
* @param {Set} toLoad The set of names of packages to load
* @param {Set} toLoadShared The set of names of shared libraries to load
* @private
*/
export let loadScript;
if (globalThis.document) {
// browser
loadScript = async (url) => await import(/* webpackIgnore: true */ url);
} else if (globalThis.importScripts) {
// webworker
loadScript = async (url) => {
// This is async only for consistency
globalThis.importScripts(url);
};
} else if (IN_NODE) {
const pathPromise = import(/* webpackIgnore: true */ "path").then(
(M) => M.default
);
const fetchPromise = import("node-fetch").then((M) => M.default);
const vmPromise = import(/* webpackIgnore: true */ "vm").then(
(M) => M.default
);
loadScript = async (url) => {
if (url.includes("://")) {
// If it's a url, have to load it with fetch and then eval it.
const fetch = await fetchPromise;
const vm = await vmPromise;
vm.runInThisContext(await (await fetch(url)).text());
} else {
// Otherwise, hopefully it is a relative path we can load from the file
// system.
const path = await pathPromise;
await import(path.resolve(url));
}
};
} else {
throw new Error("Cannot determine runtime environment");
}
function addPackageToLoad(name, toLoad) {
function addPackageToLoad(name, toLoad, toLoadShared) {
name = name.toLowerCase();
if (toLoad.has(name)) {
return;
}
toLoad.set(name, DEFAULT_CHANNEL);
const pkg_info = Module.packages[name];
if (!pkg_info) {
throw new Error(`No known package with name '${name}'`);
}
if (pkg_info.shared_library) {
toLoadShared.set(name, DEFAULT_CHANNEL);
} else {
toLoad.set(name, DEFAULT_CHANNEL);
}
// If the package is already loaded, we don't add dependencies, but warn
// the user later. This is especially important if the loaded package is
// from a custom url, in which case adding dependencies is wrong.
if (loadedPackages[name] !== undefined) {
return;
}
for (let dep_name of Module.packages[name].depends) {
addPackageToLoad(dep_name, toLoad);
for (let dep_name of pkg_info.depends) {
addPackageToLoad(dep_name, toLoad, toLoadShared);
}
}
function recursiveDependencies(
names,
_messageCallback,
errorCallback,
sharedLibsOnly
) {
/**
* Calculate the dependencies of a set of packages
* @param names The list of names whose dependencies we need to calculate.
* @returns Two sets, the set of normal dependencies and the set of shared
* dependencies
* @private
*/
function recursiveDependencies(names, errorCallback) {
const toLoad = new Map();
const toLoadShared = new Map();
for (let name of names) {
const pkgname = _uri_to_package_name(name);
if (pkgname === undefined) {
addPackageToLoad(name.toLowerCase(), toLoad, toLoadShared);
continue;
}
if (toLoad.has(pkgname) && toLoad.get(pkgname) !== name) {
errorCallback(
`Loading same package ${pkgname} from ${name} and ${toLoad.get(
@ -142,28 +227,9 @@ function recursiveDependencies(
);
continue;
}
if (pkgname !== undefined) {
toLoad.set(pkgname, name);
continue;
}
name = name.toLowerCase();
if (name in Module.packages) {
addPackageToLoad(name, toLoad);
continue;
}
errorCallback(`Skipping unknown package '${name}'`);
toLoad.set(pkgname, name);
}
if (sharedLibsOnly) {
let onlySharedLibs = new Map();
for (let c of toLoad) {
let name = c[0];
if (Module.packages[name].shared_library) {
onlySharedLibs.set(name, toLoad.get(name));
}
}
return onlySharedLibs;
}
return toLoad;
return [toLoad, toLoadShared];
}
// locateFile is the function used by the .js file to locate the .data file
@ -201,110 +267,6 @@ function waitRunDependency() {
return promise;
}
async function _loadPackage(names, messageCallback, errorCallback) {
// toLoad is a map pkg_name => pkg_uri
let toLoad = recursiveDependencies(names, messageCallback, errorCallback);
// Tell Module.locateFile about the packages we're loading
Module.locateFile_packagesToLoad = toLoad;
if (toLoad.size === 0) {
return Promise.resolve("No new packages to load");
} else {
let packageNames = Array.from(toLoad.keys()).join(", ");
messageCallback(`Loading ${packageNames}`);
}
// This is a collection of promises that resolve when the package's JS file is
// loaded. The promises already handle error and never fail.
let scriptPromises = [];
for (let [pkg, uri] of toLoad) {
let loaded = loadedPackages[pkg];
if (loaded !== undefined) {
// If uri is from the DEFAULT_CHANNEL, we assume it was added as a
// depedency, which was previously overridden.
if (loaded === uri || uri === DEFAULT_CHANNEL) {
messageCallback(`${pkg} already loaded from ${loaded}`);
continue;
} else {
errorCallback(
`URI mismatch, attempting to load package ${pkg} from ${uri} ` +
`while it is already loaded from ${loaded}. To override a dependency, ` +
`load the custom package first.`
);
continue;
}
}
let pkgname = (Module.packages[pkg] && Module.packages[pkg].name) || pkg;
let scriptSrc = uri === DEFAULT_CHANNEL ? `${baseURL}${pkgname}.js` : uri;
messageCallback(`Loading ${pkg} from ${scriptSrc}`);
scriptPromises.push(
loadScript(scriptSrc).catch((e) => {
errorCallback(`Couldn't load package from URL ${scriptSrc}`, e);
toLoad.delete(pkg);
})
);
}
// We must start waiting for runDependencies *after* all the JS files are
// loaded, since the number of runDependencies may happen to equal zero
// between package files loading.
try {
await Promise.all(scriptPromises).then(waitRunDependency);
} finally {
delete Module.monitorRunDependencies;
}
let packageList = [];
for (let [pkg, uri] of toLoad) {
loadedPackages[pkg] = uri;
packageList.push(pkg);
}
let resolveMsg;
if (packageList.length > 0) {
let packageNames = packageList.join(", ");
resolveMsg = `Loaded ${packageNames}`;
} else {
resolveMsg = "No packages loaded";
}
Module.reportUndefinedSymbols();
messageCallback(resolveMsg);
// We have to invalidate Python's import caches, or it won't
// see the new files.
Module.importlib.invalidate_caches();
}
// This is a promise that is resolved iff there are no pending package loads. It
// never fails.
let _package_lock = Promise.resolve();
/**
* An async lock for package loading. Prevents race conditions in loadPackage.
* @returns A zero argument function that releases the lock.
* @private
*/
async function acquirePackageLock() {
let old_lock = _package_lock;
let releaseLock;
_package_lock = new Promise((resolve) => (releaseLock = resolve));
await old_lock;
return releaseLock;
}
/**
*
* The list of packages that Pyodide has loaded.
* Use ``Object.keys(pyodide.loadedPackages)`` to get the list of names of
* loaded packages, and ``pyodide.loadedPackages[package_name]`` to access
* install location for a particular ``package_name``.
*
* @type {object}
*/
export let loadedPackages = {};
let sharedLibraryWasmPlugin;
let origWasmPlugin;
let wasmPluginIndex;
@ -357,6 +319,31 @@ function restoreOrigWasmPlugin() {
* @private
*/
/**
* @returns A new asynchronous lock
* @private
*/
function createLock() {
// This is a promise that is resolved when the lock is open, not resolved when lock is held.
let _lock = Promise.resolve();
/**
* Acquire the async lock
* @returns A zero argument function that releases the lock.
* @private
*/
async function acquireLock() {
let old_lock = _lock;
let releaseLock;
_lock = new Promise((resolve) => (releaseLock = resolve));
await old_lock;
return releaseLock;
}
return acquireLock;
}
const acquirePackageLock = createLock();
/**
* Load a package or a list of packages over the network. This installs the
* package in the virtual filesystem. The package needs to be imported from
@ -375,52 +362,135 @@ function restoreOrigWasmPlugin() {
* @async
*/
export async function loadPackage(names, messageCallback, errorCallback) {
messageCallback = messageCallback || console.log;
errorCallback = errorCallback || console.error;
if (Module.isPyProxy(names)) {
let temp;
try {
temp = names.toJs();
} finally {
names.destroy();
}
names = temp;
names = names.toJs();
}
if (!Array.isArray(names)) {
names = [names];
}
// get shared library packages and load those first
// otherwise bad things happen with linking them in firefox.
let sharedLibraryNames = [];
try {
let sharedLibraryPackagesToLoad = recursiveDependencies(
names,
messageCallback,
errorCallback,
true
);
for (let pkg of sharedLibraryPackagesToLoad) {
sharedLibraryNames.push(pkg[0]);
}
} catch (e) {
// do nothing - let the main load throw any errors
const [toLoad, toLoadShared] = recursiveDependencies(names, errorCallback);
if (toLoad.size === 0 && toLoadShared.size === 0) {
messageCallback("No new packages to load");
return;
}
let releaseLock = await acquirePackageLock();
for (let [pkg, uri] of [...toLoad, ...toLoadShared]) {
let loaded = loadedPackages[pkg];
if (loaded === undefined) {
continue;
}
toLoad.delete(pkg);
toLoadShared.delete(pkg);
// If uri is from the DEFAULT_CHANNEL, we assume it was added as a
// depedency, which was previously overridden.
if (loaded === uri || uri === DEFAULT_CHANNEL) {
messageCallback(`${pkg} already loaded from ${loaded}`);
} else {
errorCallback(
`URI mismatch, attempting to load package ${pkg} from ${uri} ` +
`while it is already loaded from ${loaded}. To override a dependency, ` +
`load the custom package first.`
);
}
}
const packageNames = [...toLoad.keys(), ...toLoadShared.keys()].join(", ");
try {
messageCallback(`Loading ${packageNames}`);
let scriptPromises = [];
const loaded = [];
const failed = {};
useSharedLibraryWasmPlugin();
await _loadPackage(
sharedLibraryNames,
messageCallback || console.log,
errorCallback || console.error
);
Module.locateFile_packagesToLoad = toLoadShared;
for (const [pkg, uri] of toLoadShared) {
const pkgname =
(Module.packages[pkg] && Module.packages[pkg].name) || pkg;
const scriptSrc =
uri === DEFAULT_CHANNEL ? `${baseURL}${pkgname}.js` : uri;
messageCallback(`Loading ${pkg} from ${scriptSrc}`);
scriptPromises.push(
loadScript(scriptSrc)
.then(() => {
loaded.push(pkg);
loadedPackages[pkg] = uri;
})
.catch((e) => {
failed[pkg] = e;
})
);
}
// We must start waiting for runDependencies *after* all the JS files are
// loaded, since the number of runDependencies may happen to equal zero
// between package files loading.
try {
await Promise.all(scriptPromises).then(waitRunDependency);
} finally {
delete Module.monitorRunDependencies;
}
restoreOrigWasmPlugin();
await _loadPackage(
names,
messageCallback || console.log,
errorCallback || console.error
);
scriptPromises = [];
Module.locateFile_packagesToLoad = toLoad;
for (const [pkg, uri] of toLoad) {
const pkgname =
(Module.packages[pkg] && Module.packages[pkg].name) || pkg;
const scriptSrc =
uri === DEFAULT_CHANNEL ? `${baseURL}${pkgname}.js` : uri;
messageCallback(`Loading ${pkg} from ${scriptSrc}`);
scriptPromises.push(
loadScript(scriptSrc)
.then(() => {
loaded.push(pkg);
loadedPackages[pkg] = uri;
})
.catch((e) => {
failed[pkg] = e;
})
);
}
try {
await Promise.all(scriptPromises).then(waitRunDependency);
} finally {
delete Module.monitorRunDependencies;
}
Module.reportUndefinedSymbols();
if (loaded.length > 0) {
const successNames = loaded.join(", ");
messageCallback(`Loaded ${successNames}`);
}
if (Object.keys(failed).length > 0) {
const failedNames = Object.keys(failed).join(", ");
messageCallback(`Failed to load ${failedNames}`);
for (let [name, err] of Object.entries(failed)) {
console.warn(`The following error occurred while loading ${name}:`);
console.error(err);
}
}
// We have to invalidate Python's import caches, or it won't
// see the new files.
Module.importlib.invalidate_caches();
} finally {
restoreOrigWasmPlugin();
releaseLock();
}
}
/**
*
* The list of packages that Pyodide has loaded.
* Use ``Object.keys(pyodide.loadedPackages)`` to get the list of names of
* loaded packages, and ``pyodide.loadedPackages[package_name]`` to access
* install location for a particular ``package_name``.
*
* @type {object}
*/
export let loadedPackages = {};

View File

@ -5,8 +5,9 @@ import { Module, setStandardStreams, setHomeDirectory } from "./module.js";
import {
loadScript,
initializePackageIndex,
_fetchBinaryFile,
_loadBinaryFile,
loadPackage,
initNodeModules,
} from "./load-pyodide.js";
import { makePublicAPI, registerJsModule } from "./api.js";
import "./pyproxy.gen.js";
@ -144,7 +145,7 @@ function unpackPyodidePy(pyodide_py_tar) {
let stream = Module.FS.open(fileName, "w");
Module.FS.write(
stream,
new Uint8Array(pyodide_py_tar),
pyodide_py_tar,
0,
pyodide_py_tar.byteLength,
undefined,
@ -263,8 +264,9 @@ export async function loadPyodide(config) {
config.indexURL += "/";
}
Module.indexURL = config.indexURL;
await initNodeModules();
let packageIndexReady = initializePackageIndex(config.indexURL);
let pyodide_py_tar_promise = _fetchBinaryFile(
let pyodide_py_tar_promise = _loadBinaryFile(
config.indexURL,
"pyodide_py.tar"
);

View File

@ -74,13 +74,16 @@ def test_uri_mismatch(selenium_standalone):
def test_invalid_package_name(selenium):
selenium.load_package("wrong name+$")
assert "Skipping unknown package" in selenium.logs
selenium.clean_logs()
selenium.load_package("tcp://some_url")
assert "Skipping unknown package" in selenium.logs
with pytest.raises(
selenium.JavascriptException,
match=r"No known package with name 'wrong name\+\$'",
):
selenium.load_package("wrong name+$")
with pytest.raises(
selenium.JavascriptException,
match="No known package with name 'tcp://some_url'",
):
selenium.load_package("tcp://some_url")
@pytest.mark.parametrize(
@ -118,10 +121,12 @@ def test_load_handle_failure(selenium_standalone):
selenium = selenium_standalone
selenium.load_package("pytz")
selenium.run("import pytz")
selenium.load_package("pytz2")
with pytest.raises(
selenium.JavascriptException, match="No known package with name 'pytz2'"
):
selenium.load_package("pytz2")
selenium.load_package("pyparsing")
assert "Loading pytz" in selenium.logs
assert "Skipping unknown package 'pytz2'" in selenium.logs
assert "Loading pyparsing" in selenium.logs
@ -130,7 +135,7 @@ def test_load_failure_retry(selenium_standalone):
selenium = selenium_standalone
selenium.load_package("http://invalidurl/pytz.js")
assert selenium.logs.count("Loading pytz from") == 1
assert selenium.logs.count("Couldn't load package from URL") == 1
assert selenium.logs.count("The following error occurred while loading pytz:") == 1
assert selenium.run_js("return Object.keys(pyodide.loadedPackages)") == []
selenium.load_package("pytz")