ENH Allow specifying a custom lock file (#2645)

This option is intended to be used with micropip.freeze. A user can
save the lockfile generated by micropip.freeze and load that lock
file while using the rest of the files from the CDN.
This commit is contained in:
Hood Chatham 2022-06-23 09:33:53 -07:00 committed by GitHub
parent fac51bdcf0
commit b01cc1a800
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 98 additions and 32 deletions

View File

@ -16,10 +16,16 @@ substitutions:
- {{ Fix }} `micropip` supports extra markers in packages correctly now.
{pr}`2584`
- {{ Enhancement }} Integrity of Pyodide packages are now verified before
loading them. This is for now only limited to browser environments.
{pr}`2513`
- {{ Enhancement }} It is now possible to use an alternate `repodata.json`
lockfile by passing the `lockFileURL` option to {any}`loadPyodide`. This is
particularly intended to be used with {any}`micropip.freeze`.
{pr}``
- {{ Fix }} Fix building on macOS {issue}`2360` {pr}`2554`
- {{ Fix }} Fix a REPL error in printing high-dimensional lists.

View File

@ -2,6 +2,7 @@ from pyodide._core import IN_BROWSER
if IN_BROWSER:
from ._compat_in_pyodide import (
REPODATA_INFO,
REPODATA_PACKAGES,
fetch_bytes,
fetch_string,
@ -11,6 +12,7 @@ if IN_BROWSER:
)
else:
from ._compat_not_in_pyodide import (
REPODATA_INFO,
REPODATA_PACKAGES,
fetch_bytes,
fetch_string,
@ -20,9 +22,10 @@ else:
)
__all__ = [
"REPODATA_INFO",
"REPODATA_PACKAGES",
"fetch_bytes",
"fetch_string",
"REPODATA_PACKAGES",
"loadedPackages",
"loadDynlib",
"loadPackage",

View File

@ -7,6 +7,7 @@ try:
from pyodide_js._api import loadBinaryFile, loadDynlib # type: ignore[import]
REPODATA_PACKAGES = pyodide_js._api.repodata_packages.to_py()
REPODATA_INFO = pyodide_js._api.repodata_info.to_py()
except ImportError:
if IN_BROWSER:
raise
@ -26,6 +27,7 @@ async def fetch_string(url: str, kwargs: dict[str, str]) -> str:
__all__ = [
"fetch_bytes",
"fetch_string",
"REPODATA_INFO",
"REPODATA_PACKAGES",
"loadedPackages",
"loadDynlib",

View File

@ -29,6 +29,9 @@ class pyodide_js_:
raise RuntimeError(f"Attempted to access property '{attr}' on pyodide_js dummy")
REPODATA_INFO: dict[str, str] = {}
def loadPackage(packages: str | list[str]) -> None:
pass
@ -37,6 +40,7 @@ __all__ = [
"loadDynlib",
"fetch_bytes",
"fetch_string",
"REPODATA_INFO",
"REPODATA_PACKAGES",
"loadedPackages",
"loadPackage",

View File

@ -23,6 +23,7 @@ from pyodide import to_js
from pyodide._package_loader import get_dynlibs, wheel_dist_info_dir
from ._compat import (
REPODATA_INFO,
REPODATA_PACKAGES,
fetch_bytes,
fetch_string,
@ -521,12 +522,15 @@ def _generate_package_hash(data: BytesIO) -> str:
def freeze() -> str:
"""Produce a json string which can be used as the contents of the
``repodata.json`` lockfile.
``repodata.json`` lock file.
If you later load pyodide with this lock file, you can use
:any:`pyodide.loadPackage` to load packages that were loaded with `micropip` this
time. Loading packages with :any:`pyodide.loadPackage` is much faster and you
will always get consistent versions of all your dependencies.
If you later load Pyodide with this lock file, you can use
:any:`pyodide.loadPackage` to load packages that were loaded with `micropip`
this time. Loading packages with :any:`pyodide.loadPackage` is much faster
and you will always get consistent versions of all your dependencies.
You can use your custom lock file by passing an appropriate url to the
`lockFileURL` argument to :any:`loadPyodide <globalThis.loadPyodide>`.
"""
from copy import deepcopy
@ -561,7 +565,7 @@ def freeze() -> str:
# Sort
packages = dict(sorted(packages.items()))
package_data = {
"info": {"arch": "wasm32", "platform": "Emscripten-1.0"},
"info": REPODATA_INFO,
"packages": packages,
}
return json.dumps(package_data)

View File

@ -10,26 +10,21 @@ import {
} from "./compat.js";
import { PyProxy, isPyProxy } from "./pyproxy.gen";
/** @private */
let baseURL: string;
/**
* Initialize the packages index. This is called as early as possible in
* loadPyodide so that fetching repodata.json can occur in parallel with other
* operations.
* @param indexURL
* @param lockFileURL
* @private
*/
export async function initializePackageIndex(indexURL: string) {
baseURL = indexURL;
async function initializePackageIndex(lockFileURL: string) {
let repodata;
if (IN_NODE) {
await initNodeModules();
const package_string = await nodeFsPromisesMod.readFile(
`${indexURL}repodata.json`
);
const package_string = await nodeFsPromisesMod.readFile(lockFileURL);
repodata = JSON.parse(package_string);
} else {
let response = await fetch(`${indexURL}repodata.json`);
let response = await fetch(lockFileURL);
repodata = await response.json();
}
if (!repodata.packages) {
@ -37,6 +32,7 @@ export async function initializePackageIndex(indexURL: string) {
"Loaded repodata.json does not contain the expected key 'packages'."
);
}
API.repodata_info = repodata.info;
API.repodata_packages = repodata.packages;
// compute the inverted index for imports to package names
@ -48,6 +44,8 @@ export async function initializePackageIndex(indexURL: string) {
}
}
API.packageIndexReady = initializePackageIndex(API.config.lockFileURL);
/**
* Only used in Node. If we can't find a package in node_modules, we'll use this
* to fetch the package from the cdn (and we'll store it into node_modules so
@ -150,11 +148,11 @@ function recursiveDependencies(
/**
* Download a package. If `channel` is `DEFAULT_CHANNEL`, look up the wheel URL
* relative to baseURL from `repodata.json`, otherwise use the URL specified by
* relative to indexURL from `repodata.json`, otherwise use the URL specified by
* `channel`.
* @param name The name of the package
* @param channel Either `DEFAULT_CHANNEL` or the absolute URL to the
* wheel or the path to the wheel relative to baseURL.
* wheel or the path to the wheel relative to indexURL.
* @returns The binary data for the package
* @private
*/
@ -176,7 +174,11 @@ async function downloadPackage(
file_sub_resource_hash = undefined;
}
try {
return await loadBinaryFile(baseURL, file_name, file_sub_resource_hash);
return await loadBinaryFile(
API.config.indexURL,
file_name,
file_sub_resource_hash
);
} catch (e) {
if (!IN_NODE) {
throw e;
@ -191,7 +193,10 @@ async function downloadPackage(
console.log(
`Package ${file_name} loaded from ${cdnURL}, caching the wheel in node_modules for future use.`
);
await nodeFsPromisesMod.writeFile(`${baseURL}${file_name}`, binary);
await nodeFsPromisesMod.writeFile(
`${API.config.indexURL}${file_name}`,
binary
);
return binary;
}
@ -471,5 +476,3 @@ export async function loadPackage(
* install location for a particular ``package_name``.
*/
export let loadedPackages: { [key: string]: string } = {};
API.packageIndexReady = initializePackageIndex(API.config.indexURL);

View File

@ -175,6 +175,7 @@ function calculateIndexURL(): string {
*/
export type ConfigType = {
indexURL: string;
lockFileURL: string;
homedir: string;
fullStdLib?: boolean;
stdin?: () => string;
@ -207,6 +208,13 @@ export async function loadPyodide(
*/
indexURL?: string;
/**
* The URL from which Pyodide will load the Pyodide "repodata.json" lock
* file. Defaults to ``${indexURL}/repodata.json``. You can produce custom
* lock files with :any:`micropip.freze`
*/
lockFileURL?: string;
/**
* The home directory which Pyodide will use inside virtual file system. Default: "/home/pyodide"
*/
@ -237,17 +245,18 @@ export async function loadPyodide(
if (!options.indexURL) {
options.indexURL = calculateIndexURL();
}
if (!options.indexURL.endsWith("/")) {
options.indexURL += "/";
}
const default_config = {
fullStdLib: true,
jsglobals: globalThis,
stdin: globalThis.prompt ? globalThis.prompt : undefined,
homedir: "/home/pyodide",
lockFileURL: options.indexURL! + "repodata.json",
};
const config = Object.assign(default_config, options) as ConfigType;
if (!config.indexURL.endsWith("/")) {
config.indexURL += "/";
}
await initNodeModules();
const pyodide_py_tar_promise = loadBinaryFile(
config.indexURL,
@ -294,6 +303,9 @@ export async function loadPyodide(
API.setCdnUrl(`https://cdn.jsdelivr.net/pyodide/v${pyodide.version}/full/`);
}
await API.packageIndexReady;
if (API.repodata_info.version !== pyodide.version) {
throw new Error("Lock file version doesn't match Pyodide version");
}
if (config.fullStdLib) {
await pyodide.loadPackage(["distutils"]);
}

View File

@ -158,17 +158,16 @@ def test_load_failure_retry(selenium_standalone):
def test_load_package_unknown(selenium_standalone):
dist_dir = Path(__file__).parents[2] / "dist"
pyparsing_wheel_name = get_pyparsing_wheel_name()
shutil.copyfile(
dist_dir / pyparsing_wheel_name,
dist_dir / "pyparsing-custom-3.0.6-py3-none-any.whl",
DIST_PATH / pyparsing_wheel_name,
DIST_PATH / "pyparsing-custom-3.0.6-py3-none-any.whl",
)
try:
selenium_standalone.load_package("./pyparsing-custom-3.0.6-py3-none-any.whl")
finally:
(dist_dir / "pyparsing-custom-3.0.6-py3-none-any.whl").unlink()
(DIST_PATH / "pyparsing-custom-3.0.6-py3-none-any.whl").unlink()
assert selenium_standalone.run_js(
"return pyodide.loadedPackages.hasOwnProperty('pyparsing-custom')"
@ -264,14 +263,13 @@ def test_test_unvendoring(selenium_standalone):
def test_install_archive(selenium):
dist_dir = Path(__file__).parents[2] / "dist"
test_dir = Path(__file__).parent
# TODO: first argument actually works as a path due to implementation,
# maybe it can be proposed to typeshed?
shutil.make_archive(
str(test_dir / "test_pkg"), "gztar", root_dir=test_dir, base_dir="test_pkg"
)
build_test_pkg = dist_dir / "test_pkg.tar.gz"
build_test_pkg = DIST_PATH / "test_pkg.tar.gz"
if not build_test_pkg.exists():
build_test_pkg.symlink_to((test_dir / "test_pkg.tar.gz").absolute())
try:
@ -302,7 +300,7 @@ def test_install_archive(selenium):
"""
)
finally:
(dist_dir / "test_pkg.tar.gz").unlink(missing_ok=True)
(DIST_PATH / "test_pkg.tar.gz").unlink(missing_ok=True)
(test_dir / "test_pkg.tar.gz").unlink(missing_ok=True)
@ -392,3 +390,37 @@ def test_get_dynlibs():
x2.close()
t.flush()
assert sorted(get_dynlibs(t, ".zip", Path("/p"))) == so_files
@pytest.mark.xfail_browsers(node="Some fetch trouble")
@pytest.mark.skip_refcount_check
@pytest.mark.skip_pyproxy_check
def test_custom_lockfile(selenium_standalone_noload):
selenium = selenium_standalone_noload
lock = selenium.run_js(
"""
let pyodide = await loadPyodide({fullStdLib: false});
await pyodide.loadPackage("micropip")
return pyodide.runPythonAsync(`
import micropip
await micropip.install("hypothesis==6.47.3")
micropip.freeze()
`);
"""
)
custom_lockfile = DIST_PATH / "custom_lockfile.json"
custom_lockfile.write_text(lock)
try:
assert (
selenium.run_js(
"""
let pyodide = await loadPyodide({fullStdLib: false, lockFileURL: "custom_lockfile.json" });
await pyodide.loadPackage("hypothesis");
return pyodide.runPython("import hypothesis; hypothesis.__version__")
"""
)
== "6.47.3"
)
finally:
custom_lockfile.unlink()