pyodide/pyodide_build/buildall.py

326 lines
9.9 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Build all of the packages in a given directory.
"""
import argparse
from functools import total_ordering
import json
from pathlib import Path
from queue import Queue, PriorityQueue
import shutil
import subprocess
import sys
from threading import Thread
from time import sleep
from typing import Dict, Set, Optional, List
from . import common
from .io import parse_package_config
@total_ordering
class Package:
def __init__(self, pkgdir: Path):
self.pkgdir = pkgdir
pkgpath = pkgdir / "meta.yaml"
if not pkgpath.is_file():
raise ValueError(f"Directory {pkgdir} does not contain meta.yaml")
self.meta: dict = parse_package_config(pkgpath)
self.name: str = self.meta["package"]["name"]
self.library: bool = self.meta.get("build", {}).get("library", False)
self.shared_library: bool = self.meta.get("build", {}).get(
"sharedlibrary", False
)
assert self.name == pkgdir.stem
self.dependencies: List[str] = self.meta.get("requirements", {}).get("run", [])
self.unbuilt_dependencies: Set[str] = set(self.dependencies)
self.dependents: Set[str] = set()
def build(self, outputdir: Path, args) -> None:
with open(self.pkgdir / "build.log", "w") as f:
p = subprocess.run(
[
sys.executable,
"-m",
"pyodide_build",
"buildpkg",
str(self.pkgdir / "meta.yaml"),
"--cflags",
args.cflags,
"--cxxflags",
args.cxxflags,
"--ldflags",
args.ldflags,
"--target",
args.target,
"--install-dir",
args.install_dir,
],
check=False,
stdout=f,
stderr=subprocess.STDOUT,
)
try:
p.check_returncode()
except subprocess.CalledProcessError:
print(f"Error building {self.name}. Printing build logs.")
with open(self.pkgdir / "build.log", "r") as f:
shutil.copyfileobj(f, sys.stdout)
raise
if not self.library:
shutil.copyfile(
self.pkgdir / "build" / (self.name + ".data"),
outputdir / (self.name + ".data"),
)
shutil.copyfile(
self.pkgdir / "build" / (self.name + ".js"),
outputdir / (self.name + ".js"),
)
# We use this in the priority queue, which pops off the smallest element.
# So we want the smallest element to have the largest number of dependents
def __lt__(self, other) -> bool:
return len(self.dependents) > len(other.dependents)
def __eq__(self, other) -> bool:
return len(self.dependents) == len(other.dependents)
def generate_dependency_graph(
packages_dir: Path, package_list: Optional[str]
) -> Dict[str, Package]:
"""
This generates a dependency graph for the packages listed in package_list.
A node in the graph is a Package object defined above, which maintains a
list of dependencies and also dependents. That is, each node stores both
incoming and outgoing edges.
The dependencies and dependents are stored via their name, and we have a
lookup table pkg_map: Dict[str, Package] to look up the corresponding
Package object. The function returns pkg_map, which contains all packages
in the graph as its values.
Parameters:
- packages_dir: directory that contains packages
- package_list: set of packages to build. If None, then all packages in
packages_dir are compiled.
Returns:
- pkg_map: dictionary mapping package names to Package objects
"""
pkg_map: Dict[str, Package] = {}
packages: Optional[Set[str]] = common._parse_package_subset(package_list)
if packages is None:
packages = set(
str(x) for x in packages_dir.iterdir() if (x / "meta.yaml").is_file()
)
while packages:
pkgname = packages.pop()
pkg = Package(packages_dir / pkgname)
pkg_map[pkg.name] = pkg
for dep in pkg.dependencies:
if pkg_map.get(dep) is None:
packages.add(dep)
# Compute dependents
for pkg in pkg_map.values():
for dep in pkg.dependencies:
pkg_map[dep].dependents.add(pkg.name)
return pkg_map
def build_from_graph(pkg_map: Dict[str, Package], outputdir: Path, args) -> None:
"""
This builds packages in pkg_map in parallel, building at most args.n_jobs
packages at once.
We have a priority queue of packages we are ready to build (build_queue),
where a package is ready to build if all its dependencies are built. The
priority is based on the number of dependents --- we prefer to build
packages with more dependents first.
To build packages in parallel, we use a thread pool of args.n_jobs many
threads listening to build_queue. When the thread is free, it takes an
item off build_queue and builds it. Once the package is built, it sends the
package to the built_queue. The main thread listens to the built_queue and
checks if any of the dependents are ready to be built. If so, it add the
package to the build queue.
"""
# Insert packages into build_queue. We *must* do this after counting
# dependents, because the ordering ought not to change after insertion.
build_queue: PriorityQueue = PriorityQueue()
for pkg in pkg_map.values():
if len(pkg.dependencies) == 0:
build_queue.put(pkg)
built_queue: Queue = Queue()
def builder(n):
print(f"Starting thread {n}")
while True:
pkg = build_queue.get()
print(f"Thread {n} building {pkg.name}")
try:
pkg.build(outputdir, args)
except Exception as e:
built_queue.put(e)
return
print(f"Thread {n} built {pkg.name}")
built_queue.put(pkg)
# Release the GIL so new packages get queued
sleep(0.01)
for n in range(0, args.n_jobs):
Thread(target=builder, args=(n + 1,), daemon=True).start()
num_built = 0
while num_built < len(pkg_map):
pkg = built_queue.get()
if isinstance(pkg, Exception):
raise pkg
num_built += 1
for _dependent in pkg.dependents:
dependent = pkg_map[_dependent]
dependent.unbuilt_dependencies.remove(pkg.name)
if len(dependent.unbuilt_dependencies) == 0:
build_queue.put(dependent)
def build_packages(packages_dir: Path, outputdir: Path, args) -> None:
pkg_map = generate_dependency_graph(packages_dir, args.only)
build_from_graph(pkg_map, outputdir, args)
# Build package.json data. The "test" package is built in a different way,
# so we hardcode its existence here.
#
# This is done last so the Makefile can use it as a completion token.
package_data: dict = {
"dependencies": {"test": []},
"import_name_to_package_name": {},
"shared_library": {},
}
libraries = [pkg.name for pkg in pkg_map.values() if pkg.library]
for name, pkg in pkg_map.items():
if pkg.library:
continue
if pkg.shared_library:
package_data["shared_library"][name] = True
package_data["dependencies"][name] = [
x for x in pkg.dependencies if x not in libraries
]
for imp in pkg.meta.get("test", {}).get("imports", [name]):
package_data["import_name_to_package_name"][imp] = name
with open(outputdir / "packages.json", "w") as fd:
json.dump(package_data, fd)
def make_parser(parser):
parser.description = (
"Build all of the packages in a given directory\n\n"
"Unless the --only option is provided"
)
parser.add_argument(
"dir",
type=str,
nargs=1,
help="Input directory containing a tree of package definitions",
)
parser.add_argument(
"output",
type=str,
nargs=1,
help="Output directory in which to put all built packages",
)
parser.add_argument(
"--cflags",
type=str,
nargs="?",
default=common.DEFAULTCFLAGS,
help="Extra compiling flags",
)
parser.add_argument(
"--cxxflags",
type=str,
nargs="?",
default=common.DEFAULTCXXFLAGS,
help="Extra C++ specific compiling flags",
)
parser.add_argument(
"--ldflags",
type=str,
nargs="?",
default=common.DEFAULTLDFLAGS,
help="Extra linking flags",
)
parser.add_argument(
"--target",
type=str,
nargs="?",
default=common.TARGETPYTHON,
help="The path to the target Python installation",
)
parser.add_argument(
"--install-dir",
type=str,
nargs="?",
default="",
help=(
"Directory for installing built host packages. Defaults to setup.py "
"default. Set to 'skip' to skip installation. Installation is "
"needed if you want to build other packages that depend on this one."
),
)
parser.add_argument(
"--only",
type=str,
nargs="?",
default=None,
help=(
"Only build the specified packages, provided as a comma " "separated list"
),
)
parser.add_argument(
"--n-jobs",
type=int,
nargs="?",
default=4,
help="Number of packages to build in parallel",
)
return parser
def main(args):
packages_dir = Path(args.dir[0]).resolve()
outputdir = Path(args.output[0]).resolve()
build_packages(packages_dir, outputdir, args)
if __name__ == "__main__":
parser = make_parser(argparse.ArgumentParser())
args = parser.parse_args()
main(args)