From f8276bf5f37ef12aa0033634151fa33a6f7bd4f2 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Sat, 9 Nov 2024 03:12:55 +0100 Subject: [PATCH] gh-126187 Add emscripten.py script to automate emscripten build (#126190) Add emscripten.py script to automate emscripten build. This is modeled heavily on `Tools/wasm/wasi.py`. This will form the basis of an Emscripten build bot. --- ...-10-30-17-47-15.gh-issue-126187.0jFCZB.rst | 1 + Tools/wasm/README.md | 166 ++------- Tools/wasm/emscripten/__main__.py | 325 ++++++++++++++++++ 3 files changed, 355 insertions(+), 137 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2024-10-30-17-47-15.gh-issue-126187.0jFCZB.rst create mode 100644 Tools/wasm/emscripten/__main__.py diff --git a/Misc/NEWS.d/next/Build/2024-10-30-17-47-15.gh-issue-126187.0jFCZB.rst b/Misc/NEWS.d/next/Build/2024-10-30-17-47-15.gh-issue-126187.0jFCZB.rst new file mode 100644 index 00000000000..c295a91c222 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2024-10-30-17-47-15.gh-issue-126187.0jFCZB.rst @@ -0,0 +1 @@ +Introduced ``Tools/wasm/emscripten.py`` to simplify doing Emscripten builds. diff --git a/Tools/wasm/README.md b/Tools/wasm/README.md index bc3e4ba8bd5..4c9a643b0d9 100644 --- a/Tools/wasm/README.md +++ b/Tools/wasm/README.md @@ -1,7 +1,7 @@ # Python WebAssembly (WASM) build **WASI support is [tier 2](https://peps.python.org/pep-0011/#tier-2).** -**Emscripten is NOT officially supported as of Python 3.13.** +**Emscripten support is [tier 3](https://peps.python.org/pep-0011/#tier-3).** This directory contains configuration and helpers to facilitate cross compilation of CPython to WebAssembly (WASM). Python supports Emscripten @@ -27,154 +27,57 @@ ### Build support. The ``Emscripten/node`` target has threading enabled and can access the file system directly. -Cross compiling to the wasm32-emscripten platform needs the -[Emscripten](https://emscripten.org/) SDK and a build Python interpreter. -Emscripten 3.1.19 or newer are recommended. All commands below are relative -to a repository checkout. +To cross compile to the ``wasm32-emscripten`` platform you need +[the Emscripten compiler toolchain](https://emscripten.org/), +a Python interpreter, and an installation of Node version 18 or newer. Emscripten +version 3.1.42 or newer is recommended. All commands below are relative to a checkout +of the Python repository. -#### Toolchain - -##### Container image - -Christian Heimes maintains a container image with Emscripten SDK, Python -build dependencies, WASI-SDK, wasmtime, and several additional tools. - -From within your local CPython repo clone, run one of the following commands: - -``` -# Fedora, RHEL, CentOS -podman run --rm -ti -v $(pwd):/python-wasm/cpython:Z -w /python-wasm/cpython quay.io/tiran/cpythonbuild:emsdk3 - -# other -docker run --rm -ti -v $(pwd):/python-wasm/cpython -w /python-wasm/cpython quay.io/tiran/cpythonbuild:emsdk3 -``` - -##### Manually - -###### Install [Emscripten SDK](https://emscripten.org/docs/getting_started/downloads.html) - -**NOTE**: Follow the on-screen instructions how to add the SDK to ``PATH``. +#### Install [the Emscripten compiler toolchain](https://emscripten.org/docs/getting_started/downloads.html) +You can install the Emscripten toolchain as follows: ```shell -git clone https://github.com/emscripten-core/emsdk.git /opt/emsdk -/opt/emsdk/emsdk install latest -/opt/emsdk/emsdk activate latest +git clone https://github.com/emscripten-core/emsdk.git --depth 1 +./emsdk/emsdk install latest +./emsdk/emsdk activate latest ``` +To add the Emscripten compiler to your path: +```shell +source ./emsdk/emsdk_env.sh +``` +This adds `emcc` and `emconfigure` to your path. -###### Optionally: enable ccache for EMSDK +##### Optionally: enable ccache for EMSDK The ``EM_COMPILER_WRAPPER`` must be set after the EMSDK environment is sourced. Otherwise the source script removes the environment variable. -``` -. /opt/emsdk/emsdk_env.sh -EM_COMPILER_WRAPPER=ccache -``` - -###### Optionally: pre-build and cache static libraries - -Emscripten SDK provides static builds of core libraries without PIC -(position-independent code). Python builds with ``dlopen`` support require -PIC. To populate the build cache, run: - ```shell -. /opt/emsdk/emsdk_env.sh -embuilder build zlib bzip2 MINIMAL_PIC -embuilder --pic build zlib bzip2 MINIMAL_PIC +export EM_COMPILER_WRAPPER=ccache ``` - ### Compile and build Python interpreter -From within the container, run the following command: - +You can use `python Tools/wasm/emscripten` to compile and build targetting +Emscripten. You can do everything at once with: ```shell -./Tools/wasm/wasm_build.py build +python Tools/wasm/emscripten build ``` - -The command is roughly equivalent to: - +or you can break it out into four separate steps: ```shell -mkdir -p builddir/build -pushd builddir/build -../../configure -C -make -j$(nproc) -popd +python Tools/wasm/emscripten configure-build-python +python Tools/wasm/emscripten make-build-python +python Tools/wasm/emscripten configure-host +python Tools/wasm/emscripten make-host ``` - -#### Cross-compile to wasm32-emscripten for browser - +Extra arguments to the configure steps are passed along to configure. For +instance, to do a debug build, you can use: ```shell -./Tools/wasm/wasm_build.py emscripten-browser +python Tools/wasm/emscripten build --with-py-debug ``` -The command is roughly equivalent to: - -```shell -mkdir -p builddir/emscripten-browser -pushd builddir/emscripten-browser - -CONFIG_SITE=../../Tools/wasm/config.site-wasm32-emscripten \ - emconfigure ../../configure -C \ - --host=wasm32-unknown-emscripten \ - --build=$(../../config.guess) \ - --with-emscripten-target=browser \ - --with-build-python=$(pwd)/../build/python - -emmake make -j$(nproc) -popd -``` - -Serve `python.html` with a local webserver and open the file in a browser. -Python comes with a minimal web server script that sets necessary HTTP -headers like COOP, COEP, and mimetypes. Run the script outside the container -and from the root of the CPython checkout. - -```shell -./Tools/wasm/wasm_webserver.py -``` - -and open http://localhost:8000/builddir/emscripten-browser/python.html . This -directory structure enables the *C/C++ DevTools Support (DWARF)* to load C -and header files with debug builds. - - -#### Cross compile to wasm32-emscripten for node - -```shell -./Tools/wasm/wasm_build.py emscripten-node-dl -``` - -The command is roughly equivalent to: - -```shell -mkdir -p builddir/emscripten-node-dl -pushd builddir/emscripten-node-dl - -CONFIG_SITE=../../Tools/wasm/config.site-wasm32-emscripten \ - emconfigure ../../configure -C \ - --host=wasm32-unknown-emscripten \ - --build=$(../../config.guess) \ - --with-emscripten-target=node \ - --enable-wasm-dynamic-linking \ - --with-build-python=$(pwd)/../build/python - -emmake make -j$(nproc) -popd -``` - -```shell -node --experimental-wasm-threads --experimental-wasm-bulk-memory --experimental-wasm-bigint builddir/emscripten-node-dl/python.js -``` - -(``--experimental-wasm-bigint`` is not needed with recent NodeJS versions) - ### Limitations and issues -Emscripten before 3.1.8 has known bugs that can cause memory corruption and -resource leaks. 3.1.8 contains several fixes for bugs in date and time -functions. - #### Network stack - Python's socket module does not work with Emscripten's emulated POSIX @@ -241,8 +144,6 @@ #### Misc [gh-90548](https://github.com/python/cpython/issues/90548). - Python's object allocator ``obmalloc`` is disabled by default. - ``ensurepip`` is not available. -- Some ``ctypes`` features like ``c_longlong`` and ``c_longdouble`` may need - NodeJS option ``--experimental-wasm-bigint``. #### In the browser @@ -263,15 +164,6 @@ ### wasm32-emscripten in node - Node RawFS allows direct access to the host file system without need to perform ``FS.mount()`` call. -### wasm64-emscripten - -- wasm64 requires recent NodeJS and ``--experimental-wasm-memory64``. -- ``EM_JS`` functions must return ``BigInt()``. -- ``Py_BuildValue()`` format strings must match size of types. Confusing 32 - and 64 bits types leads to memory corruption, see - [gh-95876](https://github.com/python/cpython/issues/95876) and - [gh-95878](https://github.com/python/cpython/issues/95878). - ### Hosting Python WASM builds The simple REPL terminal uses SharedArrayBuffer. For security reasons diff --git a/Tools/wasm/emscripten/__main__.py b/Tools/wasm/emscripten/__main__.py new file mode 100644 index 00000000000..2015a3764ea --- /dev/null +++ b/Tools/wasm/emscripten/__main__.py @@ -0,0 +1,325 @@ +#!/usr/bin/env python3 + +import argparse +import contextlib +import functools +import os + +try: + from os import process_cpu_count as cpu_count +except ImportError: + from os import cpu_count +from pathlib import Path +import shutil +import subprocess +import sys +import sysconfig +import tempfile + +WASM_DIR = Path(__file__).parent.parent +CHECKOUT = WASM_DIR.parent.parent + +CROSS_BUILD_DIR = CHECKOUT / "cross-build" +BUILD_DIR = CROSS_BUILD_DIR / "build" +HOST_TRIPLE = "wasm32-emscripten" +HOST_DIR = CROSS_BUILD_DIR / HOST_TRIPLE + +LOCAL_SETUP = CHECKOUT / "Modules" / "Setup.local" +LOCAL_SETUP_MARKER = "# Generated by Tools/wasm/emscripten.py\n".encode("utf-8") + + +def updated_env(updates={}): + """Create a new dict representing the environment to use. + + The changes made to the execution environment are printed out. + """ + env_defaults = {} + # https://reproducible-builds.org/docs/source-date-epoch/ + git_epoch_cmd = ["git", "log", "-1", "--pretty=%ct"] + try: + epoch = subprocess.check_output(git_epoch_cmd, encoding="utf-8").strip() + env_defaults["SOURCE_DATE_EPOCH"] = epoch + except subprocess.CalledProcessError: + pass # Might be building from a tarball. + # This layering lets SOURCE_DATE_EPOCH from os.environ takes precedence. + environment = env_defaults | os.environ | updates + + env_diff = {} + for key, value in environment.items(): + if os.environ.get(key) != value: + env_diff[key] = value + + print("๐ŸŒŽ Environment changes:") + for key in sorted(env_diff.keys()): + print(f" {key}={env_diff[key]}") + + return environment + + +def subdir(working_dir, *, clean_ok=False): + """Decorator to change to a working directory.""" + + def decorator(func): + @functools.wraps(func) + def wrapper(context): + try: + tput_output = subprocess.check_output( + ["tput", "cols"], encoding="utf-8" + ) + terminal_width = int(tput_output.strip()) + except subprocess.CalledProcessError: + terminal_width = 80 + print("โŽฏ" * terminal_width) + print("๐Ÿ“", working_dir) + if clean_ok and getattr(context, "clean", False) and working_dir.exists(): + print(f"๐Ÿšฎ Deleting directory (--clean)...") + shutil.rmtree(working_dir) + + working_dir.mkdir(parents=True, exist_ok=True) + + with contextlib.chdir(working_dir): + return func(context, working_dir) + + return wrapper + + return decorator + + +def call(command, *, quiet, **kwargs): + """Execute a command. + + If 'quiet' is true, then redirect stdout and stderr to a temporary file. + """ + print("โฏ", " ".join(map(str, command))) + if not quiet: + stdout = None + stderr = None + else: + stdout = tempfile.NamedTemporaryFile( + "w", + encoding="utf-8", + delete=False, + prefix="cpython-emscripten-", + suffix=".log", + ) + stderr = subprocess.STDOUT + print(f"๐Ÿ“ Logging output to {stdout.name} (--quiet)...") + + subprocess.check_call(command, **kwargs, stdout=stdout, stderr=stderr) + + +def build_platform(): + """The name of the build/host platform.""" + # Can also be found via `config.guess`.` + return sysconfig.get_config_var("BUILD_GNU_TYPE") + + +def build_python_path(): + """The path to the build Python binary.""" + binary = BUILD_DIR / "python" + if not binary.is_file(): + binary = binary.with_suffix(".exe") + if not binary.is_file(): + raise FileNotFoundError("Unable to find `python(.exe)` in " f"{BUILD_DIR}") + + return binary + + +@subdir(BUILD_DIR, clean_ok=True) +def configure_build_python(context, working_dir): + """Configure the build/host Python.""" + if LOCAL_SETUP.exists(): + print(f"๐Ÿ‘ {LOCAL_SETUP} exists ...") + else: + print(f"๐Ÿ“ Touching {LOCAL_SETUP} ...") + LOCAL_SETUP.write_bytes(LOCAL_SETUP_MARKER) + + configure = [os.path.relpath(CHECKOUT / "configure", working_dir)] + if context.args: + configure.extend(context.args) + + call(configure, quiet=context.quiet) + + +@subdir(BUILD_DIR) +def make_build_python(context, working_dir): + """Make/build the build Python.""" + call(["make", "--jobs", str(cpu_count()), "all"], quiet=context.quiet) + + binary = build_python_path() + cmd = [ + binary, + "-c", + "import sys; " "print(f'{sys.version_info.major}.{sys.version_info.minor}')", + ] + version = subprocess.check_output(cmd, encoding="utf-8").strip() + + print(f"๐ŸŽ‰ {binary} {version}") + + +@subdir(HOST_DIR, clean_ok=True) +def configure_emscripten_python(context, working_dir): + """Configure the emscripten/host build.""" + config_site = os.fsdecode( + CHECKOUT / "Tools" / "wasm" / "config.site-wasm32-emscripten" + ) + + emscripten_build_dir = working_dir.relative_to(CHECKOUT) + + python_build_dir = BUILD_DIR / "build" + lib_dirs = list(python_build_dir.glob("lib.*")) + assert ( + len(lib_dirs) == 1 + ), f"Expected a single lib.* directory in {python_build_dir}" + lib_dir = os.fsdecode(lib_dirs[0]) + pydebug = lib_dir.endswith("-pydebug") + python_version = lib_dir.removesuffix("-pydebug").rpartition("-")[-1] + sysconfig_data = ( + f"{emscripten_build_dir}/build/lib.emscripten-wasm32-{python_version}" + ) + if pydebug: + sysconfig_data += "-pydebug" + + host_runner = context.host_runner + env_additions = {"CONFIG_SITE": config_site, "HOSTRUNNER": host_runner} + build_python = os.fsdecode(build_python_path()) + configure = [ + "emconfigure", + os.path.relpath(CHECKOUT / "configure", working_dir), + "CFLAGS=-DPY_CALL_TRAMPOLINE -sUSE_BZIP2", + f"--host={HOST_TRIPLE}", + f"--build={build_platform()}", + f"--with-build-python={build_python}", + "--without-pymalloc", + "--disable-shared", + "--disable-ipv6", + "--enable-big-digits=30", + "--enable-wasm-dynamic-linking", + f"--prefix={HOST_DIR}", + ] + if pydebug: + configure.append("--with-pydebug") + if context.args: + configure.extend(context.args) + call( + configure, + env=updated_env(env_additions), + quiet=context.quiet, + ) + + python_js = working_dir / "python.js" + exec_script = working_dir / "python.sh" + exec_script.write_text(f'#!/bin/sh\nexec {host_runner} {python_js} "$@"\n') + exec_script.chmod(0o755) + print(f"๐Ÿƒโ€โ™€๏ธ Created {exec_script} ... ") + sys.stdout.flush() + + +@subdir(HOST_DIR) +def make_emscripten_python(context, working_dir): + """Run `make` for the emscripten/host build.""" + call( + ["make", "--jobs", str(cpu_count()), "commoninstall"], + env=updated_env(), + quiet=context.quiet, + ) + + exec_script = working_dir / "python.sh" + subprocess.check_call([exec_script, "--version"]) + + +def build_all(context): + """Build everything.""" + steps = [ + configure_build_python, + make_build_python, + configure_emscripten_python, + make_emscripten_python, + ] + for step in steps: + step(context) + + +def clean_contents(context): + """Delete all files created by this script.""" + if CROSS_BUILD_DIR.exists(): + print(f"๐Ÿงน Deleting {CROSS_BUILD_DIR} ...") + shutil.rmtree(CROSS_BUILD_DIR) + + if LOCAL_SETUP.exists(): + with LOCAL_SETUP.open("rb") as file: + if file.read(len(LOCAL_SETUP_MARKER)) == LOCAL_SETUP_MARKER: + print(f"๐Ÿงน Deleting generated {LOCAL_SETUP} ...") + + +def main(): + default_host_runner = "node" + + parser = argparse.ArgumentParser() + subcommands = parser.add_subparsers(dest="subcommand") + build = subcommands.add_parser("build", help="Build everything") + configure_build = subcommands.add_parser( + "configure-build-python", help="Run `configure` for the " "build Python" + ) + make_build = subcommands.add_parser( + "make-build-python", help="Run `make` for the build Python" + ) + configure_host = subcommands.add_parser( + "configure-host", + help="Run `configure` for the host/emscripten (pydebug builds are inferred from the build Python)", + ) + make_host = subcommands.add_parser("make-host", help="Run `make` for the host/emscripten") + clean = subcommands.add_parser( + "clean", help="Delete files and directories created by this script" + ) + for subcommand in build, configure_build, make_build, configure_host, make_host: + subcommand.add_argument( + "--quiet", + action="store_true", + default=False, + dest="quiet", + help="Redirect output from subprocesses to a log file", + ) + for subcommand in configure_build, configure_host: + subcommand.add_argument( + "--clean", + action="store_true", + default=False, + dest="clean", + help="Delete any relevant directories before building", + ) + for subcommand in build, configure_build, configure_host: + subcommand.add_argument( + "args", nargs="*", help="Extra arguments to pass to `configure`" + ) + for subcommand in build, configure_host: + subcommand.add_argument( + "--host-runner", + action="store", + default=default_host_runner, + dest="host_runner", + help="Command template for running the emscripten host" + f"`{default_host_runner}`)", + ) + + context = parser.parse_args() + + dispatch = { + "configure-build-python": configure_build_python, + "make-build-python": make_build_python, + "configure-host": configure_emscripten_python, + "make-host": make_emscripten_python, + "build": build_all, + "clean": clean_contents, + } + + if not context.subcommand: + # No command provided, display help and exit + print("Expected one of", ", ".join(sorted(dispatch.keys())), file=sys.stderr) + parser.print_help(sys.stderr) + sys.exit(1) + dispatch[context.subcommand](context) + + +if __name__ == "__main__": + main()