#!/usr/bin/env python3 """Helper for cross-compiling distutils-based Python extensions. distutils has never had a proper cross-compilation story. This is a hack, which miraculously works, to get around that. The gist is we compile the package replacing calls to the compiler and linker with wrappers that adjusting include paths and flags as necessary for cross-compiling and then pass the command long to emscripten. """ import json import os import shutil import sys IS_MAIN = __name__ == "__main__" if IS_MAIN: PYWASMCROSS_ARGS = json.loads(os.environ["PYWASMCROSS_ARGS"]) # restore __name__ so that relative imports work as we expect __name__ = PYWASMCROSS_ARGS.pop("orig__name__") sys.path = PYWASMCROSS_ARGS.pop("PYTHONPATH") PYWASMCROSS_ARGS["pythoninclude"] = os.environ["PYTHONINCLUDE"] import re import subprocess from collections import namedtuple from pathlib import Path, PurePosixPath from typing import Any, MutableMapping, NoReturn from pyodide_build import common from pyodide_build._f2c_fixes import fix_f2c_input, fix_f2c_output, scipy_fixes symlinks = {"cc", "c++", "ld", "ar", "gcc", "gfortran", "cargo"} def symlink_dir(): return Path(common.get_make_flag("TOOLSDIR")) / "symlinks" ReplayArgs = namedtuple( "ReplayArgs", [ "pkgname", "cflags", "cxxflags", "ldflags", "target_install_dir", "replace_libs", "builddir", "pythoninclude", ], ) def make_command_wrapper_symlinks(env: MutableMapping[str, str]) -> None: """ Makes sure all the symlinks that make this script look like a compiler exist. """ exec_path = Path(__file__).resolve() SYMLINKDIR = symlink_dir() for symlink in symlinks: symlink_path = SYMLINKDIR / symlink if os.path.lexists(symlink_path) and not symlink_path.exists(): # remove broken symlink so it can be re-created symlink_path.unlink() try: symlink_path.symlink_to(exec_path) except FileExistsError: pass if symlink == "c++": var = "CXX" else: var = symlink.upper() env[var] = symlink def compile( env: dict[str, str], *, pkgname: str, cflags: str, cxxflags: str, ldflags: str, target_install_dir: str, replace_libs: str, ) -> None: kwargs = dict( pkgname=pkgname, cflags=cflags, cxxflags=cxxflags, ldflags=ldflags, target_install_dir=target_install_dir, replace_libs=replace_libs, ) args = environment_substitute_args(kwargs, env) args["builddir"] = str(Path(".").absolute()) env = dict(env) SYMLINKDIR = symlink_dir() env["PATH"] = f"{SYMLINKDIR}:{env['PATH']}" sysconfig_dir = Path(os.environ["TARGETINSTALLDIR"]) / "sysconfigdata" args["PYTHONPATH"] = sys.path + [str(sysconfig_dir)] args["orig__name__"] = __name__ make_command_wrapper_symlinks(env) env["PYWASMCROSS_ARGS"] = json.dumps(args) env["_PYTHON_HOST_PLATFORM"] = common.platform() env["_PYTHON_SYSCONFIGDATA_NAME"] = os.environ["SYSCONFIG_NAME"] from pyodide_build.pypabuild import build try: build(env) except BaseException: build_log_path = Path("build.log") if build_log_path.exists(): build_log_path.unlink() raise def replay_f2c(args: list[str], dryrun: bool = False) -> list[str] | None: """Apply f2c to compilation arguments Parameters ---------- args input compiler arguments dryrun if False run f2c on detected fortran files Returns ------- new_args output compiler arguments Examples -------- >>> replay_f2c(['gfortran', 'test.f'], dryrun=True) ['gcc', 'test.c'] """ new_args = ["gcc"] found_source = False for arg in args[1:]: if arg.endswith(".f") or arg.endswith(".F"): filepath = Path(arg).resolve() if not dryrun: fix_f2c_input(arg) if arg.endswith(".F"): # .F files apparently expect to be run through the C # preprocessor (they have #ifdef's in them) subprocess.check_call( [ "gcc", "-E", "-C", "-P", filepath, "-o", filepath.with_suffix(".f"), ] ) filepath = filepath.with_suffix(".f") subprocess.check_call(["f2c", filepath.name], cwd=filepath.parent) fix_f2c_output(arg[:-2] + ".c") new_args.append(arg[:-2] + ".c") found_source = True else: new_args.append(arg) new_args_str = " ".join(args) if ".so" in new_args_str and "libgfortran.so" not in new_args_str: found_source = True if not found_source: print(f"f2c: source not found, skipping: {new_args_str}") return None return new_args def get_library_output(line: list[str]) -> str | None: """ Check if the command is a linker invocation. If so, return the name of the output file. """ for arg in line: if arg.endswith(".so") and not arg.startswith("-"): return arg return None def parse_replace_libs(replace_libs: str) -> dict[str, str]: """ Parameters ---------- replace_libs The `--replace-libs` argument, should be a string like "a=b;c=d". Returns ------- The input string converted to a dictionary Examples -------- >>> parse_replace_libs("a=b;c=d;e=f") {'a': 'b', 'c': 'd', 'e': 'f'} """ result = {} for l in replace_libs.split(";"): if not l: continue from_lib, to_lib = l.split("=") if to_lib: result[from_lib] = to_lib return result def replay_genargs_handle_dashl( arg: str, replace_libs: dict[str, str], used_libs: set[str] ) -> str | None: """ Figure out how to replace a `-lsomelib` argument. Parameters ---------- arg The argument we are replacing. Must start with `-l`. replace_libs The dictionary of libraries we are replacing used_libs The libraries we've used so far in this command. emcc fails out if `-lsomelib` occurs twice, so we have to track this. Returns ------- The new argument, or None to delete the argument. """ assert arg.startswith("-l") for lib_name in replace_libs.keys(): # this enables glob style **/* matching if PurePosixPath(arg[2:]).match(lib_name): arg = "-l" + replace_libs[lib_name] if arg == "-lffi": return None # See https://github.com/emscripten-core/emscripten/issues/8650 if arg in ["-lfreetype", "-lz", "-lpng", "-lgfortran"]: return None # WASM link doesn't like libraries being included twice # skip second one if arg in used_libs: return None used_libs.add(arg) return arg def replay_genargs_handle_dashI(arg: str, target_install_dir: str) -> str | None: """ Figure out how to replace a `-Iincludepath` argument. Parameters ---------- arg The argument we are replacing. Must start with `-I`. target_install_dir The target_install_dir argument. Returns ------- The new argument, or None to delete the argument. """ assert arg.startswith("-I") if ( str(Path(arg[2:]).resolve()).startswith(sys.prefix + "/include/python") and "site-packages" not in arg ): return arg.replace("-I" + sys.prefix, "-I" + target_install_dir) # Don't include any system directories if arg[2:].startswith("/usr"): return None return arg def replay_genargs_handle_linker_opts(arg): """ ignore some link flags it should not check if `arg == "-Wl,-xxx"` and ignore directly here, because arg may be something like "-Wl,-xxx,-yyy" where we only want to ignore "-xxx" but not "-yyy". """ assert arg.startswith("-Wl") link_opts = arg.split(",")[1:] new_link_opts = ["-Wl"] for opt in link_opts: if opt in [ "-Bsymbolic-functions", # breaks emscripten see https://github.com/emscripten-core/emscripten/issues/14460 "--strip-all", "-strip-all", # wasm-ld does not regconize some link flags "--sort-common", "--as-needed", ]: continue # ignore unsupported --sysroot compile argument used in conda if opt.startswith("--sysroot="): continue if opt.startswith("--version-script="): continue new_link_opts.append(opt) if len(new_link_opts) > 1: return ",".join(new_link_opts) else: return None def replay_genargs_handle_argument(arg: str) -> str | None: """ Figure out how to replace a general argument. Parameters ---------- arg The argument we are replacing. Must not start with `-I` or `-l`. Returns ------- The new argument, or None to delete the argument. """ assert not arg.startswith("-I") # should be handled by other functions assert not arg.startswith("-l") assert not arg.startswith("-Wl,") # Don't include any system directories if arg.startswith("-L/usr"): return None # fmt: off if arg in [ # don't use -shared, SIDE_MODULE is already used # and -shared breaks it "-shared", # threading is disabled for now "-pthread", # this only applies to compiling fortran code, but we already f2c'd "-ffixed-form", # On Mac, we need to omit some darwin-specific arguments "-bundle", "-undefined", "dynamic_lookup", # This flag is needed to build numpy with SIMD optimization which we currently disable "-mpopcnt", # gcc flag that clang does not support "-Bsymbolic-functions", '-fno-second-underscore', ]: return None # fmt: on return arg def handle_command_generate_args( line: list[str], args: ReplayArgs, is_link_command: bool ) -> list[str]: """ A helper command for `handle_command` that generates the new arguments for the compilation. Unlike `handle_command` this avoids I/O: it doesn't sys.exit, it doesn't run subprocesses, it doesn't create any files, and it doesn't write to stdout. Parameters ---------- line The original compilation command as a list e.g., ["gcc", "-c", "input.c", "-o", "output.c"] args The arguments that pywasmcross was invoked with is_link_command Is this a linker invocation? Returns ------- An updated argument list suitable for use with emscripten. Examples -------- >>> from collections import namedtuple >>> Args = namedtuple('args', ['cflags', 'cxxflags', 'ldflags', 'replace_libs','target_install_dir']) >>> args = Args(cflags='', cxxflags='', ldflags='', replace_libs='',target_install_dir='') >>> handle_command_generate_args(['gcc', 'test.c'], args, False) ['emcc', '-Werror=implicit-function-declaration', '-Werror=mismatched-parameter-types', '-Werror=return-type', 'test.c'] """ if "-print-multiarch" in line: return ["echo", "wasm32-emscripten"] for arg in line: if arg.startswith("-print-file-name"): return line cmd = line[0] if cmd == "ar": line[0] = "emar" return line elif cmd == "c++" or cmd == "g++": new_args = ["em++"] elif cmd == "cc" or cmd == "gcc" or cmd == "ld": new_args = ["emcc"] # distutils doesn't use the c++ compiler when compiling c++ if any(arg.endswith((".cpp", ".cc")) for arg in line): new_args = ["em++"] else: return line # set linker and C flags to error on anything to do with function declarations being wrong. # In webassembly, any conflicts mean that a randomly selected 50% of calls to the function # will fail. Better to fail at compile or link time. if is_link_command: new_args.append("-Wl,--fatal-warnings") new_args.extend( [ "-Werror=implicit-function-declaration", "-Werror=mismatched-parameter-types", "-Werror=return-type", ] ) if is_link_command: new_args.extend(args.ldflags.split()) if "-c" in line: if new_args[0] == "emcc": new_args.extend(args.cflags.split()) elif new_args[0] == "em++": new_args.extend(args.cflags.split() + args.cxxflags.split()) new_args.extend(["-I", args.pythoninclude]) optflags_valid = [f"-O{tok}" for tok in "01234sz"] optflag = None # Identify the optflag (e.g. -O3) in cflags/cxxflags/ldflags. Last one has # priority. for arg in reversed(new_args): if arg in optflags_valid: optflag = arg break debugflag = None # Identify the debug flag (e.g. -g0) in cflags/cxxflags/ldflags. Last one has # priority. for arg in reversed(new_args): if arg.startswith("-g"): debugflag = arg break used_libs: set[str] = set() # Go through and adjust arguments for arg in line[1:]: # The native build is possibly multithreaded, but the emscripten one # definitely isn't arg = re.sub(r"/python([0-9]\.[0-9]+)m", r"/python\1", arg) if arg in optflags_valid and optflag is not None: # There are multiple contradictory optflags provided, use the one # from cflags/cxxflags/ldflags continue if arg.startswith("-g") and debugflag is not None: continue if new_args[-1].startswith("-B") and "compiler_compat" in arg: # conda uses custom compiler search paths with the compiler_compat folder. # Ignore it. del new_args[-1] continue replace_libs = parse_replace_libs(args.replace_libs) if arg.startswith("-l"): result = replay_genargs_handle_dashl(arg, replace_libs, used_libs) elif arg.startswith("-I"): result = replay_genargs_handle_dashI(arg, args.target_install_dir) elif arg.startswith("-Wl"): result = replay_genargs_handle_linker_opts(arg) else: result = replay_genargs_handle_argument(arg) if result: new_args.append(result) return new_args def handle_command( line: list[str], args: ReplayArgs, ) -> NoReturn: """Handle a compilation command. Exit with an appropriate exit code when done. Parameters ---------- line : iterable an iterable with the compilation arguments args : {object, namedtuple} an container with additional compilation options, in particular containing ``args.cflags``, ``args.cxxflags``, and ``args.ldflags`` """ # some libraries have different names on wasm e.g. png16 = png is_link_cmd = get_library_output(line) is not None if line[0] == "gfortran": if "-dumpversion" in line: sys.exit(subprocess.run(line).returncode) tmp = replay_f2c(line) if tmp is None: sys.exit(0) line = tmp new_args = handle_command_generate_args(line, args, is_link_cmd) if args.pkgname == "scipy": scipy_fixes(new_args) # FIXME: For some unknown reason, # opencv-python tries to link a same library (libopencv_world.a) multiple times, # which leads to 'duplicated symbols' error. if args.pkgname == "opencv-python": duplicated_lib = "libopencv_world.a" _new_args = [] for arg in new_args: if duplicated_lib in arg and arg in _new_args: continue _new_args.append(arg) new_args = _new_args returncode = subprocess.run(new_args).returncode if returncode != 0: sys.exit(returncode) # Rust gives output files a `.wasm` suffix, but we need them to have a `.so` # suffix. if line[0:2] == ["cargo", "rustc"]: p = Path(args.builddir) for x in p.glob("**/*.wasm"): shutil.move(x, x.with_suffix(".so")) sys.exit(returncode) def environment_substitute_args( args: dict[str, str], env: dict[str, str] | None = None ) -> dict[str, Any]: if env is None: env = dict(os.environ) subbed_args = {} for arg, value in args.items(): if isinstance(value, str): for e_name, e_value in env.items(): value = value.replace(f"$({e_name})", e_value) subbed_args[arg] = value return subbed_args if IS_MAIN: path = os.environ["PATH"] SYMLINKDIR = symlink_dir() while f"{SYMLINKDIR}:" in path: path = path.replace(f"{SYMLINKDIR}:", "") os.environ["PATH"] = path REPLAY_ARGS = ReplayArgs(**PYWASMCROSS_ARGS) basename = Path(sys.argv[0]).name args = list(sys.argv) args[0] = basename if basename in symlinks: sys.exit(handle_command(args, REPLAY_ARGS)) else: raise Exception(f"Unexpected invocation '{basename}'")