#!/usr/bin/env python3 """ Build all of the packages in a given directory. """ import argparse from functools import total_ordering import json from pathlib import Path from queue import Queue, PriorityQueue import shutil import subprocess import sys from threading import Thread, Lock from time import sleep, perf_counter from typing import Dict, Set, Optional, List, Any from . import common from .io import parse_package_config from .common import UNVENDORED_STDLIB_MODULES class BasePackage: pkgdir: Path name: str version: str meta: dict library: bool shared_library: bool dependencies: List[str] unbuilt_dependencies: Set[str] dependents: Set[str] unvendored_tests: Optional[bool] = None # We use this in the priority queue, which pops off the smallest element. # So we want the smallest element to have the largest number of dependents def __lt__(self, other) -> bool: return len(self.dependents) > len(other.dependents) def __eq__(self, other) -> bool: return len(self.dependents) == len(other.dependents) @total_ordering class StdLibPackage(BasePackage): def __init__(self, pkgdir: Path): self.pkgdir = pkgdir self.meta = {} self.name = pkgdir.stem self.version = "1.0" self.library = False self.shared_library = False self.dependencies = [] self.unbuilt_dependencies = set() self.dependents = set() def build(self, outputdir: Path, args) -> None: # All build / packaging steps are already done in the main Makefile return @total_ordering class Package(BasePackage): def __init__(self, pkgdir: Path): self.pkgdir = pkgdir pkgpath = pkgdir / "meta.yaml" if not pkgpath.is_file(): raise ValueError(f"Directory {pkgdir} does not contain meta.yaml") self.meta = parse_package_config(pkgpath) self.name = self.meta["package"]["name"] self.version = self.meta["package"]["version"] self.library = self.meta.get("build", {}).get("library", False) self.shared_library = self.meta.get("build", {}).get("sharedlibrary", False) assert self.name == pkgdir.stem self.dependencies = self.meta.get("requirements", {}).get("run", []) self.unbuilt_dependencies = set(self.dependencies) self.dependents = set() def build(self, outputdir: Path, args) -> None: with open(self.pkgdir / "build.log.tmp", "w") as f: p = subprocess.run( [ sys.executable, "-m", "pyodide_build", "buildpkg", str(self.pkgdir / "meta.yaml"), "--cflags", args.cflags, "--cxxflags", args.cxxflags, "--ldflags", args.ldflags, "--target", args.target, "--install-dir", args.install_dir, ], check=False, stdout=f, stderr=subprocess.STDOUT, ) # Don't overwrite build log if we didn't build the file. # If the file didn't need to be rebuilt, the log will have exactly two lines. rebuilt = True with open(self.pkgdir / "build.log.tmp", "r") as f: try: next(f) next(f) next(f) except StopIteration: rebuilt = False if rebuilt: shutil.move(self.pkgdir / "build.log.tmp", self.pkgdir / "build.log") # type: ignore else: (self.pkgdir / "build.log.tmp").unlink() if args.log_dir and (self.pkgdir / "build.log").exists(): shutil.copy( self.pkgdir / "build.log", Path(args.log_dir) / f"{self.name}.log" ) try: p.check_returncode() except subprocess.CalledProcessError: print(f"Error building {self.name}. Printing build logs.") with open(self.pkgdir / "build.log", "r") as f: shutil.copyfileobj(f, sys.stdout) raise if not self.library: shutil.copyfile( self.pkgdir / "build" / (self.name + ".data"), outputdir / (self.name + ".data"), ) shutil.copyfile( self.pkgdir / "build" / (self.name + ".js"), outputdir / (self.name + ".js"), ) if (self.pkgdir / "build" / (self.name + "-tests.data")).exists(): shutil.copyfile( self.pkgdir / "build" / (self.name + "-tests.data"), outputdir / (self.name + "-tests.data"), ) shutil.copyfile( self.pkgdir / "build" / (self.name + "-tests.js"), outputdir / (self.name + "-tests.js"), ) def generate_dependency_graph( packages_dir: Path, packages: Set[str] ) -> Dict[str, BasePackage]: """This generates a dependency graph for listed packages. A node in the graph is a BasePackage object defined above, which maintains a list of dependencies and also dependents. That is, each node stores both incoming and outgoing edges. The dependencies and dependents are stored via their name, and we have a lookup table pkg_map: Dict[str, BasePackage] to look up the corresponding BasePackage object. The function returns pkg_map, which contains all packages in the graph as its values. Parameters: - packages_dir: directory that contains packages - packages: set of packages to build. If None, then all packages in packages_dir are compiled. Returns: - pkg_map: dictionary mapping package names to BasePackage objects """ pkg_map: Dict[str, BasePackage] = {} if "*" in packages: packages.discard("*") packages.update( str(x) for x in packages_dir.iterdir() if (x / "meta.yaml").is_file() ) no_numpy_dependents = "no-numpy-dependents" in packages if no_numpy_dependents: packages.discard("no-numpy-dependents") while packages: pkgname = packages.pop() pkg: BasePackage if pkgname in UNVENDORED_STDLIB_MODULES: pkg = StdLibPackage(packages_dir / pkgname) else: pkg = Package(packages_dir / pkgname) if no_numpy_dependents and "numpy" in pkg.dependencies: continue pkg_map[pkg.name] = pkg for dep in pkg.dependencies: if pkg_map.get(dep) is None: packages.add(dep) # Compute dependents for pkg in pkg_map.values(): for dep in pkg.dependencies: pkg_map[dep].dependents.add(pkg.name) return pkg_map def job_priority(pkg: BasePackage): if pkg.name == "numpy": return 0 else: return 1 def build_from_graph(pkg_map: Dict[str, BasePackage], outputdir: Path, args) -> None: """ This builds packages in pkg_map in parallel, building at most args.n_jobs packages at once. We have a priority queue of packages we are ready to build (build_queue), where a package is ready to build if all its dependencies are built. The priority is based on the number of dependents --- we prefer to build packages with more dependents first. To build packages in parallel, we use a thread pool of args.n_jobs many threads listening to build_queue. When the thread is free, it takes an item off build_queue and builds it. Once the package is built, it sends the package to the built_queue. The main thread listens to the built_queue and checks if any of the dependents are ready to be built. If so, it adds the package to the build queue. """ # Insert packages into build_queue. We *must* do this after counting # dependents, because the ordering ought not to change after insertion. build_queue: PriorityQueue = PriorityQueue() print("Building the following packages: " + ", ".join(sorted(pkg_map.keys()))) t0 = perf_counter() for pkg in pkg_map.values(): if len(pkg.dependencies) == 0: build_queue.put((job_priority(pkg), pkg)) built_queue: Queue = Queue() thread_lock = Lock() queue_idx = 1 def builder(n): nonlocal queue_idx while True: pkg = build_queue.get()[1] with thread_lock: pkg._queue_idx = queue_idx queue_idx += 1 print(f"[{pkg._queue_idx}/{len(pkg_map)}] (thread {n}) building {pkg.name}") t0 = perf_counter() try: pkg.build(outputdir, args) except Exception as e: built_queue.put(e) return print( f"[{pkg._queue_idx}/{len(pkg_map)}] (thread {n}) " f"built {pkg.name} in {perf_counter() - t0:.2f} s" ) built_queue.put(pkg) # Release the GIL so new packages get queued sleep(0.01) for n in range(0, args.n_jobs): Thread(target=builder, args=(n + 1,), daemon=True).start() num_built = 0 while num_built < len(pkg_map): pkg = built_queue.get() if isinstance(pkg, Exception): raise pkg num_built += 1 for _dependent in pkg.dependents: dependent = pkg_map[_dependent] dependent.unbuilt_dependencies.remove(pkg.name) if len(dependent.unbuilt_dependencies) == 0: build_queue.put((job_priority(dependent), dependent)) for name in list(pkg_map): if (outputdir / (name + "-tests.js")).exists(): pkg_map[name].unvendored_tests = True print( "\n===================================================\n" f"built all packages in {perf_counter() - t0:.2f} s" ) def generate_packages_json(pkg_map: Dict[str, BasePackage]) -> Dict: """Generate the package.json file""" # Build package.json data. package_data: Dict[str, Dict[str, Any]] = { "info": {"arch": "wasm32", "platform": "Emscripten-1.0"}, "packages": {}, } libraries = [pkg.name for pkg in pkg_map.values() if pkg.library] # unvendored stdlib modules for name in UNVENDORED_STDLIB_MODULES: pkg_entry: Dict[str, Any] = { "name": name, "version": "1.0", "depends": [], "imports": [name], } package_data["packages"][name.lower()] = pkg_entry for name, pkg in pkg_map.items(): if pkg.library: continue pkg_entry = {"name": name, "version": pkg.version} if pkg.shared_library: pkg_entry["shared_library"] = True pkg_entry["depends"] = [ x.lower() for x in pkg.dependencies if x not in libraries ] pkg_entry["imports"] = pkg.meta.get("test", {}).get("imports", [name]) package_data["packages"][name.lower()] = pkg_entry if pkg.unvendored_tests: package_data["packages"][name.lower()]["unvendored_tests"] = True # Create the test package if necessary pkg_entry = { "name": name + "-tests", "version": pkg.version, "depends": [name.lower()], "imports": [], } package_data["packages"][name.lower() + "-tests"] = pkg_entry # Workaround for circular dependency between soupsieve and beautifulsoup4 # TODO: FIXME!! if "soupsieve" in package_data["packages"]: package_data["packages"]["soupsieve"]["depends"].append("beautifulsoup4") # re-order packages by name package_data["packages"] = dict(sorted(package_data["packages"].items())) return package_data def build_packages(packages_dir: Path, outputdir: Path, args) -> None: packages = common._parse_package_subset(args.only) pkg_map = generate_dependency_graph(packages_dir, packages) build_from_graph(pkg_map, outputdir, args) package_data = generate_packages_json(pkg_map) with open(outputdir / "packages.json", "w") as fd: json.dump(package_data, fd) def make_parser(parser): parser.description = ( "Build all the packages in a given directory\n\n" "Unless the --only option is provided\n\n" "Note: this is a private endpoint that should not be used " "outside of the pyodide Makefile." ) parser.add_argument( "dir", type=str, nargs=1, help="Input directory containing a tree of package definitions", ) parser.add_argument( "output", type=str, nargs=1, help="Output directory in which to put all built packages", ) parser.add_argument( "--cflags", type=str, nargs="?", default=None, help="Extra compiling flags. Default: SIDE_MODULE_CFLAGS", ) parser.add_argument( "--cxxflags", type=str, nargs="?", default=None, help=("Extra C++ specific compiling flags. " "Default: SIDE_MODULE_CXXFLAGS"), ) parser.add_argument( "--ldflags", type=str, nargs="?", default=None, help="Extra linking flags. Default: SIDE_MODULE_LDFLAGS", ) parser.add_argument( "--target", type=str, nargs="?", default=None, help="The path to the target Python installation. Default: TARGETPYTHONROOT", ) parser.add_argument( "--install-dir", type=str, nargs="?", default="", help=( "Directory for installing built host packages. Defaults to setup.py " "default. Set to 'skip' to skip installation. Installation is " "needed if you want to build other packages that depend on this one." ), ) parser.add_argument( "--log-dir", type=str, dest="log_dir", nargs="?", default=None, help=("Directory to place log files"), ) parser.add_argument( "--only", type=str, nargs="?", default=None, help=("Only build the specified packages, provided as a comma-separated list"), ) parser.add_argument( "--n-jobs", type=int, nargs="?", default=4, help="Number of packages to build in parallel", ) return parser def main(args): packages_dir = Path(args.dir[0]).resolve() outputdir = Path(args.output[0]).resolve() if args.cflags is None: args.cflags = common.get_make_flag("SIDE_MODULE_CFLAGS") if args.cxxflags is None: args.cxxflags = common.get_make_flag("SIDE_MODULE_CXXFLAGS") if args.ldflags is None: args.ldflags = common.get_make_flag("SIDE_MODULE_LDFLAGS") if args.target is None: args.target = common.get_make_flag("TARGETPYTHONROOT") build_packages(packages_dir, outputdir, args) if __name__ == "__main__": parser = make_parser(argparse.ArgumentParser()) args = parser.parse_args() main(args)