#!/usr/bin/env python3 """ Build all of the packages in a given directory. """ import dataclasses import shutil import subprocess import sys from collections import defaultdict from collections.abc import Iterable from datetime import datetime from functools import total_ordering from graphlib import TopologicalSorter from pathlib import Path from queue import PriorityQueue, Queue from threading import Lock, Thread from time import perf_counter, sleep from typing import Any from pyodide_lock import PyodideLockSpec from pyodide_lock.spec import PackageSpec as PackageLockSpec from rich.live import Live from rich.progress import BarColumn, Progress, TimeElapsedColumn from rich.spinner import Spinner from rich.table import Table from . import build_env, recipe from .buildpkg import needs_rebuild from .common import ( extract_wheel_metadata_file, find_matching_wheels, find_missing_executables, repack_zip_archive, ) from .io import MetaConfig, _BuildSpecTypes from .logger import console_stdout, logger from .pywasmcross import BuildArgs class BuildError(Exception): def __init__(self, returncode: int) -> None: self.returncode = returncode super().__init__() @total_ordering @dataclasses.dataclass(eq=False, repr=False) class BasePackage: pkgdir: Path name: str version: str disabled: bool meta: MetaConfig package_type: _BuildSpecTypes run_dependencies: list[str] host_dependencies: list[str] executables_required: list[str] dependencies: set[str] # run + host dependencies unbuilt_host_dependencies: set[str] host_dependents: set[str] unvendored_tests: Path | None = None file_name: str | None = None install_dir: str = "site" _queue_idx: int | None = None # We use this in the priority queue, which pops off the smallest element. # So we want the smallest element to have the largest number of dependents def __lt__(self, other: Any) -> bool: return len(self.host_dependents) > len(other.host_dependents) def __eq__(self, other: Any) -> bool: return len(self.host_dependents) == len(other.host_dependents) def __repr__(self) -> str: return f"{type(self).__name__}({self.name})" def needs_rebuild(self) -> bool: return needs_rebuild(self.pkgdir, self.pkgdir / "build", self.meta.source) def build(self, build_args: BuildArgs) -> None: raise NotImplementedError() def dist_artifact_path(self) -> Path: raise NotImplementedError() def tests_path(self) -> Path | None: return None @dataclasses.dataclass class Package(BasePackage): def __init__(self, pkgdir: Path, config: MetaConfig): self.pkgdir = pkgdir self.meta = config.copy(deep=True) self.name = self.meta.package.name self.version = self.meta.package.version self.disabled = self.meta.package.disabled self.package_type = self.meta.build.package_type assert self.name == pkgdir.name, f"{self.name} != {pkgdir.name}" self.run_dependencies = self.meta.requirements.run self.host_dependencies = self.meta.requirements.host self.executables_required = self.meta.requirements.executable self.dependencies = set(self.run_dependencies + self.host_dependencies) self.unbuilt_host_dependencies = set(self.host_dependencies) self.host_dependents = set() def dist_artifact_path(self) -> Path: dist_dir = self.pkgdir / "dist" if self.package_type in ("shared_library", "cpython_module"): candidates = list(dist_dir.glob("*.zip")) else: candidates = list( find_matching_wheels(dist_dir.glob("*.whl"), build_env.pyodide_tags()) ) if len(candidates) != 1: raise RuntimeError( f"Unexpected number of wheels/archives {len(candidates)} when building {self.name}" ) return candidates[0] def tests_path(self) -> Path | None: tests = list((self.pkgdir / "dist").glob("*-tests.tar")) assert len(tests) <= 1 if tests: return tests[0] return None def build(self, build_args: BuildArgs) -> None: p = subprocess.run( [ sys.executable, "-m", "pyodide_build", "buildpkg", str(self.pkgdir / "meta.yaml"), f"--cflags={build_args.cflags}", f"--cxxflags={build_args.cxxflags}", f"--ldflags={build_args.ldflags}", f"--target-install-dir={build_args.target_install_dir}", f"--host-install-dir={build_args.host_install_dir}", # Either this package has been updated and this doesn't # matter, or this package is dependent on a package that has # been updated and should be rebuilt even though its own # files haven't been updated. "--force-rebuild", ], check=False, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) if p.returncode != 0: logger.error(f"Error building {self.name}. Printing build logs.") logfile = self.pkgdir / "build.log" if logfile.is_file(): logger.error(logfile.read_text(encoding="utf-8")) else: logger.error("ERROR: No build log found.") logger.error("ERROR: cancelling buildall") raise BuildError(p.returncode) class PackageStatus: def __init__( self, *, name: str, idx: int, thread: int, total_packages: int ) -> None: self.pkg_name = name self.prefix = f"[{idx}/{total_packages}] " f"(thread {thread})" self.status = Spinner("dots", style="red", speed=0.2) self.table = Table.grid(padding=1) self.table.add_row(f"{self.prefix} building {self.pkg_name}", self.status) self.finished = False def finish(self, success: bool, elapsed_time: float) -> None: time = datetime.utcfromtimestamp(elapsed_time) if time.minute == 0: minutes = "" else: minutes = f"{time.minute}m " timestr = f"{minutes}{time.second}s" status = "built" if success else "failed" done_message = f"{self.prefix} {status} {self.pkg_name} in {timestr}" self.finished = True if success: logger.success(done_message) else: logger.error(done_message) def __rich__(self): return self.table class ReplProgressFormatter: def __init__(self, num_packages: int) -> None: self.progress = Progress( "[progress.description]{task.description}", BarColumn(), "{task.completed}/{task.total} [progress.percentage]{task.percentage:>3.0f}%", "Time elapsed:", TimeElapsedColumn(), ) self.task = self.progress.add_task("Building packages...", total=num_packages) self.packages: list[PackageStatus] = [] self.reset_grid() def reset_grid(self): """Empty out the rendered grids.""" self.top_grid = Table.grid() for package in self.packages: self.top_grid.add_row(package) self.main_grid = Table.grid() self.main_grid.add_row(self.top_grid) self.main_grid.add_row(self.progress) def add_package( self, *, name: str, idx: int, thread: int, total_packages: int ) -> PackageStatus: status = PackageStatus( name=name, idx=idx, thread=thread, total_packages=total_packages ) self.packages.append(status) self.reset_grid() return status def remove_package(self, pkg: PackageStatus) -> None: self.packages.remove(pkg) self.reset_grid() def update_progress_bar(self): """Step the progress bar by one (to show that a package finished)""" self.progress.update(self.task, advance=1) def __rich__(self): return self.main_grid def _validate_package_map(pkg_map: dict[str, BasePackage]) -> bool: # Check if dependencies are valid for pkg_name, pkg in pkg_map.items(): for runtime_dep_name in pkg.run_dependencies: runtime_dep = pkg_map[runtime_dep_name] if runtime_dep.package_type == "static_library": raise ValueError( f"{pkg_name} has an invalid dependency: {runtime_dep_name}. Static libraries must be a host dependency." ) # Check executables required to build packages are available missing_executables = defaultdict(list) for name, pkg in pkg_map.items(): for exe in find_missing_executables(pkg.executables_required): missing_executables[exe].append(name) if missing_executables: error_msg = "The following executables are missing in the host system:\n" for executable, pkgs in missing_executables.items(): error_msg += f"- {executable} (required by: {', '.join(pkgs)})\n" raise RuntimeError(error_msg) return True def _parse_package_query(query: list[str] | str | None) -> tuple[set[str], set[str]]: """ Parse a package query string into a list of requested packages and a list of disabled packages. Parameters ---------- query A list of packages to build, this can be a comma separated string. Returns ------- A tuple of two lists, the first list contains requested packages, the second list contains disabled packages. Examples -------- >>> _parse_package_query(None) (set(), set()) >>> requested, disabled = _parse_package_query("a,b,c") >>> requested == {'a', 'b', 'c'}, disabled == set() (True, True) >>> requested, disabled = _parse_package_query("a,b,!c") >>> requested == {'a', 'b'}, disabled == {'c'} (True, True) >>> requested, disabled = _parse_package_query(["a", "b", "!c"]) >>> requested == {'a', 'b'}, disabled == {'c'} (True, True) """ if not query: query = [] if isinstance(query, str): query = [el.strip() for el in query.split(",")] requested = set() disabled = set() for name in query: if not name: # empty string continue if name.startswith("!"): disabled.add(name[1:]) else: requested.add(name) return requested, disabled def generate_dependency_graph( packages_dir: Path, requested: set[str], disabled: set[str] | None = None, ) -> dict[str, BasePackage]: """This generates a dependency graph for given packages. A node in the graph is a BasePackage object defined above, which maintains a list of dependencies and also dependents. That is, each node stores both incoming and outgoing edges. The dependencies and dependents are stored via their name, and we have a lookup table pkg_map: Dict[str, BasePackage] to look up the corresponding BasePackage object. The function returns pkg_map, which contains all packages in the graph as its values. Parameters ---------- packages_dir A directory that contains packages requested A set of packages to build disabled A set of packages to not build Returns ------- A dictionary mapping package names to BasePackage objects """ pkg: BasePackage pkgname: str pkg_map: dict[str, BasePackage] = {} if not disabled: disabled = set() # Create dependency graph. # On first pass add all dependencies regardless of whether # disabled since it might happen because of a transitive dependency graph = {} all_recipes = recipe.load_all_recipes(packages_dir) no_numpy_dependents = "no-numpy-dependents" in requested requested.discard("no-numpy-dependents") packages = requested.copy() while packages: pkgname = packages.pop() if pkgname not in all_recipes: raise ValueError( f"No metadata file found for the following package: {pkgname}" ) pkg = Package(packages_dir / pkgname, all_recipes[pkgname]) pkg_map[pkgname] = pkg graph[pkgname] = pkg.dependencies for dep in pkg.dependencies: if pkg_map.get(dep) is None: packages.add(dep) # Traverse in build order (dependencies first then dependents) # Mark a package as disabled if they've either been explicitly disabled # or if any of its transitive dependencies were marked disabled. for pkgname in TopologicalSorter(graph).static_order(): pkg = pkg_map[pkgname] if pkgname in disabled: pkg.disabled = True continue if no_numpy_dependents and "numpy" in pkg.dependencies: pkg.disabled = True continue for dep in pkg.dependencies: if pkg_map[dep].disabled: pkg.disabled = True break # Now traverse in reverse build order (dependents first then their # dependencies). # Locate the subset of packages that are transitive dependencies of packages # that are requested and not disabled. requested_with_deps = requested.copy() disabled_packages = set() for pkgname in reversed(list(TopologicalSorter(graph).static_order())): pkg = pkg_map[pkgname] if pkg.disabled: requested_with_deps.discard(pkgname) disabled_packages.add(pkgname) continue if pkgname not in requested_with_deps: continue requested_with_deps.update(pkg.dependencies) for dep in pkg.host_dependencies: pkg_map[dep].host_dependents.add(pkg.name) pkg_map = {name: pkg_map[name] for name in requested_with_deps} _validate_package_map(pkg_map) if disabled_packages: logger.warning( f"The following packages are disabled: {', '.join(disabled_packages)}" ) return pkg_map def job_priority(pkg: BasePackage) -> int: if pkg.name == "numpy": return 0 else: return 1 def format_name_list(l: list[str]) -> str: """ >>> format_name_list(["regex"]) 'regex' >>> format_name_list(["regex", "parso"]) 'regex and parso' >>> format_name_list(["regex", "parso", "jedi"]) 'regex, parso, and jedi' """ if len(l) == 1: return l[0] most = l[:-1] if len(most) > 1: most = [x + "," for x in most] return " ".join(most) + " and " + l[-1] def mark_package_needs_build( pkg_map: dict[str, BasePackage], pkg: BasePackage, needs_build: set[str] ) -> None: """ Helper for generate_needs_build_set. Modifies needs_build in place. Recursively add pkg and all of its dependencies to needs_build. """ if pkg.name in needs_build: return needs_build.add(pkg.name) for dep in pkg.host_dependents: mark_package_needs_build(pkg_map, pkg_map[dep], needs_build) def generate_needs_build_set(pkg_map: dict[str, BasePackage]) -> set[str]: """ Generate the set of packages that need to be rebuilt. This consists of: 1. packages whose source files have changed since they were last built according to needs_rebuild, and 2. packages which depend on case 1 packages. """ needs_build: set[str] = set() for pkg in pkg_map.values(): # Otherwise, rebuild packages that have been updated and their dependents. if pkg.needs_rebuild(): mark_package_needs_build(pkg_map, pkg, needs_build) return needs_build def build_from_graph( pkg_map: dict[str, BasePackage], build_args: BuildArgs, n_jobs: int = 1, force_rebuild: bool = False, ) -> None: """ This builds packages in pkg_map in parallel, building at most n_jobs packages at once. We have a priority queue of packages we are ready to build (build_queue), where a package is ready to build if all its dependencies are built. The priority is based on the number of dependents --- we prefer to build packages with more dependents first. To build packages in parallel, we use a thread pool of n_jobs many threads listening to build_queue. When the thread is free, it takes an item off build_queue and builds it. Once the package is built, it sends the package to the built_queue. The main thread listens to the built_queue and checks if any of the dependents are ready to be built. If so, it adds the package to the build queue. """ # Insert packages into build_queue. We *must* do this after counting # dependents, because the ordering ought not to change after insertion. build_queue: PriorityQueue[tuple[int, BasePackage]] = PriorityQueue() if force_rebuild: # If "force_rebuild" is set, just rebuild everything needs_build = set(pkg_map.keys()) else: needs_build = generate_needs_build_set(pkg_map) # We won't rebuild the complement of the packages that we will build. already_built = set(pkg_map.keys()).difference(needs_build) # Remove the packages we've already built from the dependency sets of # the remaining ones for pkg_name in needs_build: pkg_map[pkg_name].unbuilt_host_dependencies.difference_update(already_built) if already_built: logger.info( "The following packages are already built: " f"[bold]{format_name_list(sorted(already_built))}[/bold]" ) if not needs_build: logger.success("All packages already built. Quitting.") return logger.info( "Building the following packages: " f"[bold]{format_name_list(sorted(needs_build))}[/bold]" ) for pkg_name in needs_build: pkg = pkg_map[pkg_name] if len(pkg.unbuilt_host_dependencies) == 0: build_queue.put((job_priority(pkg), pkg)) built_queue: Queue[BasePackage | Exception] = Queue() thread_lock = Lock() queue_idx = 1 building_rust_pkg = False progress_formatter = ReplProgressFormatter(len(needs_build)) def builder(n: int) -> None: nonlocal queue_idx, building_rust_pkg while True: _, pkg = build_queue.get() with thread_lock: if pkg.meta.is_rust_package(): # Don't build multiple rust packages at the same time. # See: https://github.com/pyodide/pyodide/issues/3565 # Note that if there are only rust packages left in the queue, # this will keep pushing and popping packages until the current rust package # is built. This is not ideal but presumably the overhead is negligible. if building_rust_pkg: build_queue.put((job_priority(pkg), pkg)) # Release the GIL so new packages get queued sleep(0.1) continue building_rust_pkg = True pkg._queue_idx = queue_idx queue_idx += 1 pkg_status = progress_formatter.add_package( name=pkg.name, idx=pkg._queue_idx, thread=n, total_packages=len(needs_build), ) t0 = perf_counter() success = True try: pkg.build(build_args) except Exception as e: built_queue.put(e) success = False return finally: pkg_status.finish(success, perf_counter() - t0) progress_formatter.remove_package(pkg_status) built_queue.put(pkg) with thread_lock: if pkg.meta.is_rust_package(): building_rust_pkg = False # Release the GIL so new packages get queued sleep(0.01) for n in range(0, n_jobs): Thread(target=builder, args=(n + 1,), daemon=True).start() num_built = len(already_built) with Live(progress_formatter, console=console_stdout): while num_built < len(pkg_map): match built_queue.get(): case BuildError() as err: raise SystemExit(err.returncode) case Exception() as err: raise err case a_package: # MyPy should understand that this is a BasePackage assert not isinstance(a_package, Exception) pkg = a_package num_built += 1 progress_formatter.update_progress_bar() for _dependent in pkg.host_dependents: dependent = pkg_map[_dependent] dependent.unbuilt_host_dependencies.remove(pkg.name) if len(dependent.unbuilt_host_dependencies) == 0: build_queue.put((job_priority(dependent), dependent)) def generate_packagedata( output_dir: Path, pkg_map: dict[str, BasePackage] ) -> dict[str, PackageLockSpec]: packages: dict[str, PackageLockSpec] = {} for name, pkg in pkg_map.items(): if not pkg.file_name or pkg.package_type == "static_library": continue if not Path(output_dir, pkg.file_name).exists(): continue pkg_entry = PackageLockSpec( name=name, version=pkg.version, file_name=pkg.file_name, install_dir=pkg.install_dir, package_type=pkg.package_type, ) pkg_entry.update_sha256(output_dir / pkg.file_name) pkg_type = pkg.package_type if pkg_type in ("shared_library", "cpython_module"): # We handle cpython modules as shared libraries pkg_entry.shared_library = True pkg_entry.install_dir = ( "stdlib" if pkg_type == "cpython_module" else "dynlib" ) pkg_entry.depends = [x.lower() for x in pkg.run_dependencies] if pkg.package_type not in ("static_library", "shared_library"): pkg_entry.imports = ( pkg.meta.package.top_level if pkg.meta.package.top_level else [name] ) packages[name.lower()] = pkg_entry if pkg.unvendored_tests: packages[name.lower()].unvendored_tests = True # Create the test package if necessary pkg_entry = PackageLockSpec( name=name + "-tests", version=pkg.version, depends=[name.lower()], file_name=pkg.unvendored_tests.name, install_dir=pkg.install_dir, ) pkg_entry.update_sha256(output_dir / pkg.unvendored_tests.name) packages[name.lower() + "-tests"] = pkg_entry # sort packages by name packages = dict(sorted(packages.items())) return packages def generate_lockfile( output_dir: Path, pkg_map: dict[str, BasePackage] ) -> PyodideLockSpec: """Generate the package.json file""" from . import __version__ # Build package.json data. [platform, _, arch] = build_env.platform().rpartition("_") info = { "arch": arch, "platform": platform, # This assumes that pyodide-build version == pyodide version. "version": __version__, "python": sys.version.partition(" ")[0], } packages = generate_packagedata(output_dir, pkg_map) return PyodideLockSpec(info=info, packages=packages) def copy_packages_to_dist_dir( packages: Iterable[BasePackage], output_dir: Path, compression_level: int = 6, metadata_files: bool = False, ) -> None: for pkg in packages: if pkg.package_type == "static_library": continue dist_artifact_path = pkg.dist_artifact_path() shutil.copy(dist_artifact_path, output_dir) repack_zip_archive( output_dir / dist_artifact_path.name, compression_level=compression_level ) if metadata_files and dist_artifact_path.suffix == ".whl": extract_wheel_metadata_file( dist_artifact_path, output_dir / f"{dist_artifact_path.name}.metadata", ) test_path = pkg.tests_path() if test_path: shutil.copy(test_path, output_dir) def build_packages( packages_dir: Path, targets: str, build_args: BuildArgs, n_jobs: int = 1, force_rebuild: bool = False, ) -> dict[str, BasePackage]: requested, disabled = _parse_package_query(targets) requested_packages = recipe.load_recipes(packages_dir, requested) pkg_map = generate_dependency_graph( packages_dir, set(requested_packages.keys()), disabled ) build_from_graph(pkg_map, build_args, n_jobs, force_rebuild) for pkg in pkg_map.values(): assert isinstance(pkg, Package) if pkg.package_type == "static_library": continue pkg.file_name = pkg.dist_artifact_path().name pkg.unvendored_tests = pkg.tests_path() return pkg_map def copy_logs(pkg_map: dict[str, BasePackage], log_dir: Path) -> None: """ Copy build logs of packages to the log directory. Parameters ---------- pkg_map A dictionary mapping package names to package objects. log_dir The directory to copy the logs to. """ log_dir.mkdir(exist_ok=True, parents=True) logger.info(f"Copying build logs to {log_dir}") for pkg in pkg_map.values(): log_file = pkg.pkgdir / "build.log" if log_file.exists(): shutil.copy(log_file, log_dir / f"{pkg.name}.log") else: logger.warning(f"Warning: {pkg.name} has no build log") def install_packages( pkg_map: dict[str, BasePackage], output_dir: Path, compression_level: int = 6, metadata_files: bool = False, ) -> None: """ Install packages into the output directory. - copies build artifacts (wheel, zip, ...) to the output directory - create pyodide_lock.json pkg_map package map created from build_packages output_dir output directory to install packages into """ output_dir.mkdir(exist_ok=True, parents=True) logger.info(f"Copying built packages to {output_dir}") copy_packages_to_dist_dir( pkg_map.values(), output_dir, compression_level=compression_level, metadata_files=metadata_files, ) lockfile_path = output_dir / "pyodide-lock.json" logger.info(f"Writing pyodide-lock.json to {lockfile_path}") package_data = generate_lockfile(output_dir, pkg_map) package_data.to_json(lockfile_path) def set_default_build_args(build_args: BuildArgs) -> BuildArgs: args = dataclasses.replace(build_args) if args.cflags is None: args.cflags = build_env.get_build_flag("SIDE_MODULE_CFLAGS") # type: ignore[unreachable] if args.cxxflags is None: args.cxxflags = build_env.get_build_flag("SIDE_MODULE_CXXFLAGS") # type: ignore[unreachable] if args.ldflags is None: args.ldflags = build_env.get_build_flag("SIDE_MODULE_LDFLAGS") # type: ignore[unreachable] if args.target_install_dir is None: args.target_install_dir = build_env.get_build_flag("TARGETINSTALLDIR") # type: ignore[unreachable] if args.host_install_dir is None: args.host_install_dir = build_env.get_build_flag("HOSTINSTALLDIR") # type: ignore[unreachable] if args.compression_level is None: args.compression_level = int(build_env.get_build_flag("PYODIDE_ZIP_COMPRESSION_LEVEL")) # type: ignore[unreachable] return args