diff --git a/docs/project/changelog.md b/docs/project/changelog.md index 1af510010..271ac11e4 100644 --- a/docs/project/changelog.md +++ b/docs/project/changelog.md @@ -16,10 +16,16 @@ substitutions: - {{ Fix }} `micropip` supports extra markers in packages correctly now. {pr}`2584` + - {{ Enhancement }} Integrity of Pyodide packages are now verified before loading them. This is for now only limited to browser environments. {pr}`2513` +- {{ Enhancement }} It is now possible to use an alternate `repodata.json` + lockfile by passing the `lockFileURL` option to {any}`loadPyodide`. This is + particularly intended to be used with {any}`micropip.freeze`. + {pr}`` + - {{ Fix }} Fix building on macOS {issue}`2360` {pr}`2554` - {{ Fix }} Fix a REPL error in printing high-dimensional lists. diff --git a/packages/micropip/src/micropip/_compat.py b/packages/micropip/src/micropip/_compat.py index a8dde2ab1..25c0e5dd4 100644 --- a/packages/micropip/src/micropip/_compat.py +++ b/packages/micropip/src/micropip/_compat.py @@ -2,6 +2,7 @@ from pyodide._core import IN_BROWSER if IN_BROWSER: from ._compat_in_pyodide import ( + REPODATA_INFO, REPODATA_PACKAGES, fetch_bytes, fetch_string, @@ -11,6 +12,7 @@ if IN_BROWSER: ) else: from ._compat_not_in_pyodide import ( + REPODATA_INFO, REPODATA_PACKAGES, fetch_bytes, fetch_string, @@ -20,9 +22,10 @@ else: ) __all__ = [ + "REPODATA_INFO", + "REPODATA_PACKAGES", "fetch_bytes", "fetch_string", - "REPODATA_PACKAGES", "loadedPackages", "loadDynlib", "loadPackage", diff --git a/packages/micropip/src/micropip/_compat_in_pyodide.py b/packages/micropip/src/micropip/_compat_in_pyodide.py index f9804527f..bef0f0222 100644 --- a/packages/micropip/src/micropip/_compat_in_pyodide.py +++ b/packages/micropip/src/micropip/_compat_in_pyodide.py @@ -7,6 +7,7 @@ try: from pyodide_js._api import loadBinaryFile, loadDynlib # type: ignore[import] REPODATA_PACKAGES = pyodide_js._api.repodata_packages.to_py() + REPODATA_INFO = pyodide_js._api.repodata_info.to_py() except ImportError: if IN_BROWSER: raise @@ -26,6 +27,7 @@ async def fetch_string(url: str, kwargs: dict[str, str]) -> str: __all__ = [ "fetch_bytes", "fetch_string", + "REPODATA_INFO", "REPODATA_PACKAGES", "loadedPackages", "loadDynlib", diff --git a/packages/micropip/src/micropip/_compat_not_in_pyodide.py b/packages/micropip/src/micropip/_compat_not_in_pyodide.py index 55d1bf105..d1fc48263 100644 --- a/packages/micropip/src/micropip/_compat_not_in_pyodide.py +++ b/packages/micropip/src/micropip/_compat_not_in_pyodide.py @@ -29,6 +29,9 @@ class pyodide_js_: raise RuntimeError(f"Attempted to access property '{attr}' on pyodide_js dummy") +REPODATA_INFO: dict[str, str] = {} + + def loadPackage(packages: str | list[str]) -> None: pass @@ -37,6 +40,7 @@ __all__ = [ "loadDynlib", "fetch_bytes", "fetch_string", + "REPODATA_INFO", "REPODATA_PACKAGES", "loadedPackages", "loadPackage", diff --git a/packages/micropip/src/micropip/_micropip.py b/packages/micropip/src/micropip/_micropip.py index 170c452fc..3071c6ffd 100644 --- a/packages/micropip/src/micropip/_micropip.py +++ b/packages/micropip/src/micropip/_micropip.py @@ -23,6 +23,7 @@ from pyodide import to_js from pyodide._package_loader import get_dynlibs, wheel_dist_info_dir from ._compat import ( + REPODATA_INFO, REPODATA_PACKAGES, fetch_bytes, fetch_string, @@ -521,12 +522,15 @@ def _generate_package_hash(data: BytesIO) -> str: def freeze() -> str: """Produce a json string which can be used as the contents of the - ``repodata.json`` lockfile. + ``repodata.json`` lock file. - If you later load pyodide with this lock file, you can use - :any:`pyodide.loadPackage` to load packages that were loaded with `micropip` this - time. Loading packages with :any:`pyodide.loadPackage` is much faster and you - will always get consistent versions of all your dependencies. + If you later load Pyodide with this lock file, you can use + :any:`pyodide.loadPackage` to load packages that were loaded with `micropip` + this time. Loading packages with :any:`pyodide.loadPackage` is much faster + and you will always get consistent versions of all your dependencies. + + You can use your custom lock file by passing an appropriate url to the + `lockFileURL` argument to :any:`loadPyodide `. """ from copy import deepcopy @@ -561,7 +565,7 @@ def freeze() -> str: # Sort packages = dict(sorted(packages.items())) package_data = { - "info": {"arch": "wasm32", "platform": "Emscripten-1.0"}, + "info": REPODATA_INFO, "packages": packages, } return json.dumps(package_data) diff --git a/src/js/load-package.ts b/src/js/load-package.ts index 35348edcf..ff5696b08 100644 --- a/src/js/load-package.ts +++ b/src/js/load-package.ts @@ -10,26 +10,21 @@ import { } from "./compat.js"; import { PyProxy, isPyProxy } from "./pyproxy.gen"; -/** @private */ -let baseURL: string; /** * Initialize the packages index. This is called as early as possible in * loadPyodide so that fetching repodata.json can occur in parallel with other * operations. - * @param indexURL + * @param lockFileURL * @private */ -export async function initializePackageIndex(indexURL: string) { - baseURL = indexURL; +async function initializePackageIndex(lockFileURL: string) { let repodata; if (IN_NODE) { await initNodeModules(); - const package_string = await nodeFsPromisesMod.readFile( - `${indexURL}repodata.json` - ); + const package_string = await nodeFsPromisesMod.readFile(lockFileURL); repodata = JSON.parse(package_string); } else { - let response = await fetch(`${indexURL}repodata.json`); + let response = await fetch(lockFileURL); repodata = await response.json(); } if (!repodata.packages) { @@ -37,6 +32,7 @@ export async function initializePackageIndex(indexURL: string) { "Loaded repodata.json does not contain the expected key 'packages'." ); } + API.repodata_info = repodata.info; API.repodata_packages = repodata.packages; // compute the inverted index for imports to package names @@ -48,6 +44,8 @@ export async function initializePackageIndex(indexURL: string) { } } +API.packageIndexReady = initializePackageIndex(API.config.lockFileURL); + /** * Only used in Node. If we can't find a package in node_modules, we'll use this * to fetch the package from the cdn (and we'll store it into node_modules so @@ -150,11 +148,11 @@ function recursiveDependencies( /** * Download a package. If `channel` is `DEFAULT_CHANNEL`, look up the wheel URL - * relative to baseURL from `repodata.json`, otherwise use the URL specified by + * relative to indexURL from `repodata.json`, otherwise use the URL specified by * `channel`. * @param name The name of the package * @param channel Either `DEFAULT_CHANNEL` or the absolute URL to the - * wheel or the path to the wheel relative to baseURL. + * wheel or the path to the wheel relative to indexURL. * @returns The binary data for the package * @private */ @@ -176,7 +174,11 @@ async function downloadPackage( file_sub_resource_hash = undefined; } try { - return await loadBinaryFile(baseURL, file_name, file_sub_resource_hash); + return await loadBinaryFile( + API.config.indexURL, + file_name, + file_sub_resource_hash + ); } catch (e) { if (!IN_NODE) { throw e; @@ -191,7 +193,10 @@ async function downloadPackage( console.log( `Package ${file_name} loaded from ${cdnURL}, caching the wheel in node_modules for future use.` ); - await nodeFsPromisesMod.writeFile(`${baseURL}${file_name}`, binary); + await nodeFsPromisesMod.writeFile( + `${API.config.indexURL}${file_name}`, + binary + ); return binary; } @@ -471,5 +476,3 @@ export async function loadPackage( * install location for a particular ``package_name``. */ export let loadedPackages: { [key: string]: string } = {}; - -API.packageIndexReady = initializePackageIndex(API.config.indexURL); diff --git a/src/js/pyodide.ts b/src/js/pyodide.ts index edf5632dc..e6d5bf4d8 100644 --- a/src/js/pyodide.ts +++ b/src/js/pyodide.ts @@ -175,6 +175,7 @@ function calculateIndexURL(): string { */ export type ConfigType = { indexURL: string; + lockFileURL: string; homedir: string; fullStdLib?: boolean; stdin?: () => string; @@ -207,6 +208,13 @@ export async function loadPyodide( */ indexURL?: string; + /** + * The URL from which Pyodide will load the Pyodide "repodata.json" lock + * file. Defaults to ``${indexURL}/repodata.json``. You can produce custom + * lock files with :any:`micropip.freze` + */ + lockFileURL?: string; + /** * The home directory which Pyodide will use inside virtual file system. Default: "/home/pyodide" */ @@ -237,17 +245,18 @@ export async function loadPyodide( if (!options.indexURL) { options.indexURL = calculateIndexURL(); } + if (!options.indexURL.endsWith("/")) { + options.indexURL += "/"; + } const default_config = { fullStdLib: true, jsglobals: globalThis, stdin: globalThis.prompt ? globalThis.prompt : undefined, homedir: "/home/pyodide", + lockFileURL: options.indexURL! + "repodata.json", }; const config = Object.assign(default_config, options) as ConfigType; - if (!config.indexURL.endsWith("/")) { - config.indexURL += "/"; - } await initNodeModules(); const pyodide_py_tar_promise = loadBinaryFile( config.indexURL, @@ -294,6 +303,9 @@ export async function loadPyodide( API.setCdnUrl(`https://cdn.jsdelivr.net/pyodide/v${pyodide.version}/full/`); } await API.packageIndexReady; + if (API.repodata_info.version !== pyodide.version) { + throw new Error("Lock file version doesn't match Pyodide version"); + } if (config.fullStdLib) { await pyodide.loadPackage(["distutils"]); } diff --git a/src/tests/test_package_loading.py b/src/tests/test_package_loading.py index 53cf42e1a..47bb77363 100644 --- a/src/tests/test_package_loading.py +++ b/src/tests/test_package_loading.py @@ -158,17 +158,16 @@ def test_load_failure_retry(selenium_standalone): def test_load_package_unknown(selenium_standalone): - dist_dir = Path(__file__).parents[2] / "dist" pyparsing_wheel_name = get_pyparsing_wheel_name() shutil.copyfile( - dist_dir / pyparsing_wheel_name, - dist_dir / "pyparsing-custom-3.0.6-py3-none-any.whl", + DIST_PATH / pyparsing_wheel_name, + DIST_PATH / "pyparsing-custom-3.0.6-py3-none-any.whl", ) try: selenium_standalone.load_package("./pyparsing-custom-3.0.6-py3-none-any.whl") finally: - (dist_dir / "pyparsing-custom-3.0.6-py3-none-any.whl").unlink() + (DIST_PATH / "pyparsing-custom-3.0.6-py3-none-any.whl").unlink() assert selenium_standalone.run_js( "return pyodide.loadedPackages.hasOwnProperty('pyparsing-custom')" @@ -264,14 +263,13 @@ def test_test_unvendoring(selenium_standalone): def test_install_archive(selenium): - dist_dir = Path(__file__).parents[2] / "dist" test_dir = Path(__file__).parent # TODO: first argument actually works as a path due to implementation, # maybe it can be proposed to typeshed? shutil.make_archive( str(test_dir / "test_pkg"), "gztar", root_dir=test_dir, base_dir="test_pkg" ) - build_test_pkg = dist_dir / "test_pkg.tar.gz" + build_test_pkg = DIST_PATH / "test_pkg.tar.gz" if not build_test_pkg.exists(): build_test_pkg.symlink_to((test_dir / "test_pkg.tar.gz").absolute()) try: @@ -302,7 +300,7 @@ def test_install_archive(selenium): """ ) finally: - (dist_dir / "test_pkg.tar.gz").unlink(missing_ok=True) + (DIST_PATH / "test_pkg.tar.gz").unlink(missing_ok=True) (test_dir / "test_pkg.tar.gz").unlink(missing_ok=True) @@ -392,3 +390,37 @@ def test_get_dynlibs(): x2.close() t.flush() assert sorted(get_dynlibs(t, ".zip", Path("/p"))) == so_files + + +@pytest.mark.xfail_browsers(node="Some fetch trouble") +@pytest.mark.skip_refcount_check +@pytest.mark.skip_pyproxy_check +def test_custom_lockfile(selenium_standalone_noload): + selenium = selenium_standalone_noload + lock = selenium.run_js( + """ + let pyodide = await loadPyodide({fullStdLib: false}); + await pyodide.loadPackage("micropip") + return pyodide.runPythonAsync(` + import micropip + await micropip.install("hypothesis==6.47.3") + micropip.freeze() + `); + """ + ) + custom_lockfile = DIST_PATH / "custom_lockfile.json" + custom_lockfile.write_text(lock) + + try: + assert ( + selenium.run_js( + """ + let pyodide = await loadPyodide({fullStdLib: false, lockFileURL: "custom_lockfile.json" }); + await pyodide.loadPackage("hypothesis"); + return pyodide.runPython("import hypothesis; hypothesis.__version__") + """ + ) + == "6.47.3" + ) + finally: + custom_lockfile.unlink()