pyodide/pyodide_build/pywasmcross.py

406 lines
12 KiB
Python
Raw Normal View History

2018-06-20 18:54:47 +00:00
#!/usr/bin/env python3
"""Helper for cross-compiling distutils-based Python extensions.
distutils has never had a proper cross-compilation story. This is a hack, which
miraculously works, to get around that.
The gist is:
- Compile the package natively, replacing calls to the compiler and linker with
wrappers that store the arguments in a log, and then delegate along to the
real native compiler and linker.
- Remove all of the native build products.
- Play back the log, replacing the native compiler with emscripten and
adjusting include paths and flags as necessary for cross-compiling to
emscripten. This overwrites the results from the original native compilation.
While this results in more work than strictly necessary (it builds a native
version of the package, even though we then throw it away), it seems to be the
only reliable way to automatically build a package that interleaves
configuration with build.
"""
2018-06-20 18:54:47 +00:00
import argparse
import importlib.machinery
2018-06-20 18:54:47 +00:00
import json
import os
2018-08-03 16:48:22 +00:00
from pathlib import Path
2018-06-20 18:54:47 +00:00
import re
import subprocess
import sys
# absolute import is necessary as this file will be symlinked
# under tools
from pyodide_build import common
ROOTDIR = common.ROOTDIR
symlinks = set(['cc', 'c++', 'ld', 'ar', 'gcc', 'gfortran'])
2018-06-20 18:54:47 +00:00
def collect_args(basename):
"""
This is called when this script is called through a symlink that looks like
a compiler or linker.
It writes the arguments to the build.log, and then delegates to the real
native compiler or linker.
"""
# Remove the symlink compiler from the PATH, so we can delegate to the
# native compiler
2018-06-20 18:54:47 +00:00
env = dict(os.environ)
path = env['PATH']
while str(ROOTDIR) + ':' in path:
path = path.replace(str(ROOTDIR) + ':', '')
2018-06-20 18:54:47 +00:00
env['PATH'] = path
skip_host = 'SKIP_HOST' in os.environ
# Skip compilations of C/Fortran extensions for the target environement.
# We still need to generate the output files for distutils to continue
# the build.
# TODO: This may need slight tuning for new projects. In particular,
# currently ar is not skipped, so a known failure would happen when
# we create some object files (that are empty as gcc is skipped), on
# which we run the actual ar command.
skip = False
if (basename in ['gcc', 'cc', 'c++', 'gfortran', 'ld']
and '-o' in sys.argv[1:]
# do not skip numpy as it is needed as build time
# dependency by other packages (e.g. matplotlib)
and skip_host):
out_idx = sys.argv.index('-o')
if (out_idx + 1) < len(sys.argv):
# get the index of the output file path
out_idx += 1
with open(sys.argv[out_idx], 'wb') as fh:
fh.write(b'')
skip = True
2018-10-26 15:03:30 +00:00
2018-06-20 18:54:47 +00:00
with open('build.log', 'a') as fd:
2018-10-26 15:03:30 +00:00
# TODO: store skip status in the build.log
2018-06-20 18:54:47 +00:00
json.dump([basename] + sys.argv[1:], fd)
fd.write('\n')
if skip:
sys.exit(0)
sys.exit(
subprocess.run(
[basename] + sys.argv[1:],
env=env).returncode)
2018-06-20 18:54:47 +00:00
def make_symlinks(env):
"""
Makes sure all of the symlinks that make this script look like a compiler
exist.
"""
2018-08-03 16:48:22 +00:00
exec_path = Path(__file__).resolve()
2018-06-20 18:54:47 +00:00
for symlink in symlinks:
symlink_path = ROOTDIR / symlink
2018-10-11 08:06:27 +00:00
if os.path.lexists(symlink_path) and not symlink_path.exists():
# remove broken symlink so it can be re-created
symlink_path.unlink()
2018-08-03 16:48:22 +00:00
if not symlink_path.exists():
symlink_path.symlink_to(exec_path)
2018-06-20 18:54:47 +00:00
if symlink == 'c++':
var = 'CXX'
else:
var = symlink.upper()
env[var] = symlink
def capture_compile(args):
env = dict(os.environ)
make_symlinks(env)
env['PATH'] = str(ROOTDIR) + ':' + os.environ['PATH']
2018-06-20 18:54:47 +00:00
result = subprocess.run(
2018-08-03 16:48:22 +00:00
[Path(args.host) / 'bin' / 'python3',
'setup.py',
'install'], env=env)
2018-06-20 18:54:47 +00:00
if result.returncode != 0:
2018-08-03 16:48:22 +00:00
build_log_path = Path('build.log')
if build_log_path.exists():
build_log_path.unlink()
2018-06-20 18:54:47 +00:00
sys.exit(result.returncode)
2018-10-26 15:22:10 +00:00
def f2c(args, dryrun=False):
"""Apply f2c to compilation arguments
Parameters
----------
args : iterable
input compiler arguments
dryrun : bool, default=True
if False run f2c on detected fortran files
Returns
-------
new_args : list
output compiler arguments
Examples
--------
>>> f2c(['gfortran', 'test.f'], dryrun=True)
['gfortran', 'test.c']
"""
new_args = []
found_source = False
for arg in args:
if arg.endswith('.f'):
filename = os.path.abspath(arg)
2018-10-26 15:22:10 +00:00
if not dryrun:
subprocess.check_call(
['f2c', os.path.basename(filename)],
cwd=os.path.dirname(filename))
new_args.append(arg[:-2] + '.c')
found_source = True
else:
new_args.append(arg)
2018-10-26 15:22:10 +00:00
new_args_str = ' '.join(args)
if ".so" in new_args_str and "libgfortran.so" not in new_args_str:
found_source = True
if not found_source:
2018-10-26 15:22:10 +00:00
print(f'f2c: source not found, skipping: {new_args_str}')
return None
return new_args
def handle_command(line, args, dryrun=False):
"""Handle a compilation command
Parameters
----------
line : iterable
an iterable with the compilation arguments
args : {object, namedtuple}
an container with additional compilation options,
in particular containing ``args.cflags`` and ``args.ldflags``
dryrun : bool, default=False
if True do not run the resulting command, only return it
Examples
--------
>>> from collections import namedtuple
>>> Args = namedtuple('args', ['cflags', 'ldflags'])
>>> args = Args(cflags='', ldflags='')
>>> handle_command(['gcc', 'test.c'], args, dryrun=True)
emcc test.c
['emcc', 'test.c']
"""
# This is a special case to skip the compilation tests in numpy that aren't
# actually part of the build
2018-06-20 18:54:47 +00:00
for arg in line:
if r'/file.c' in arg or '_configtest' in arg:
return
2018-10-02 13:17:14 +00:00
if re.match(r'/tmp/.*/source\.[bco]+', arg):
return
2018-06-20 18:54:47 +00:00
if arg == '-print-multiarch':
return
2018-07-18 13:32:16 +00:00
if arg.startswith('/tmp'):
return
2018-06-20 18:54:47 +00:00
if line[0] == 'gfortran':
result = f2c(line)
if result is None:
return
line = result
new_args = ['emcc']
elif line[0] == 'ar':
new_args = ['emar']
2018-06-20 18:54:47 +00:00
elif line[0] == 'c++':
new_args = ['em++']
2018-06-20 18:54:47 +00:00
else:
new_args = ['emcc']
2018-06-20 18:54:47 +00:00
# distutils doesn't use the c++ compiler when compiling c++ <sigh>
if any(arg.endswith('.cpp') for arg in line):
new_args = ['em++']
2018-06-20 18:54:47 +00:00
shared = '-shared' in line
if shared:
2018-06-22 18:49:52 +00:00
new_args.extend(args.ldflags.split())
elif new_args[0] in ('emcc', 'em++'):
2018-06-22 18:49:52 +00:00
new_args.extend(args.cflags.split())
2018-10-26 15:22:10 +00:00
2018-10-18 15:19:20 +00:00
lapack_dir = None
# Go through and adjust arguments
2018-06-20 18:54:47 +00:00
for arg in line[1:]:
if arg.startswith('-I'):
# Don't include any system directories
if arg[2:].startswith('/usr'):
continue
2018-08-03 16:48:22 +00:00
if (str(Path(arg[2:]).resolve()).startswith(args.host) and
2018-06-20 19:05:13 +00:00
'site-packages' not in arg):
2018-06-22 18:49:52 +00:00
arg = arg.replace('-I' + args.host, '-I' + args.target)
# Don't include any system directories
2018-06-20 18:54:47 +00:00
if arg.startswith('-L/usr'):
continue
# The native build is possibly multithreaded, but the emscripten one
# definitely isn't
2018-06-20 18:54:47 +00:00
arg = re.sub(r'/python([0-9]\.[0-9]+)m', r'/python\1', arg)
if arg.endswith('.o'):
arg = arg[:-2] + '.bc'
output = arg
elif shared and arg.endswith('.so'):
2018-06-20 18:54:47 +00:00
arg = arg[:-3] + '.wasm'
output = arg
2018-10-27 08:05:40 +00:00
2018-10-18 15:19:20 +00:00
# Fix for scipy to link to the correct BLAS/LAPACK files
if arg.startswith('-L') and 'CLAPACK-WA' in arg:
2018-10-27 08:05:40 +00:00
out_idx = line.index('-o')
out_idx += 1
module_name = line[out_idx]
module_name = Path(module_name).name.split('.')[0]
2018-10-18 15:19:20 +00:00
lapack_dir = arg.replace('-L', '')
2018-10-27 08:05:40 +00:00
# For convinience we determine needed scipy link libraries
# here, instead of in patch files
link_libs = ['F2CLIBS/libf2c.bc', 'blas_WA.bc']
if module_name in ['_flapack', '_flinalg', '_calc_lwork',
'cython_lapack', '_iterative', '_arpack']:
link_libs.append('lapack_WA.bc')
for lib_name in link_libs:
2018-10-18 15:19:20 +00:00
arg = os.path.join(lapack_dir, f"{lib_name}")
new_args.append(arg)
2018-10-31 15:48:53 +00:00
2018-11-01 20:51:49 +00:00
new_args.extend(['-s', 'INLINING_LIMIT=5'])
continue
2018-11-01 20:51:49 +00:00
# Use -Os for files that are statically linked to CLAPACK
if (arg.startswith('-O') and 'CLAPACK' in ' '.join(line)
and '-L' in ' '.join(line)):
new_args.append('-Os')
2018-10-18 15:19:20 +00:00
continue
2018-10-26 15:22:10 +00:00
2018-06-20 18:54:47 +00:00
new_args.append(arg)
2018-10-26 15:22:10 +00:00
# This can only be used for incremental rebuilds -- it generates
# an error during clean build of numpy
2018-11-01 20:51:49 +00:00
# if os.path.isfile(output):
2018-10-26 15:22:10 +00:00
# print('SKIPPING: ' + ' '.join(new_args))
# return
2018-06-20 18:54:47 +00:00
print(' '.join(new_args))
if not dryrun:
2018-10-16 13:31:00 +00:00
result = subprocess.run(new_args)
if result.returncode != 0:
sys.exit(result.returncode)
2018-06-20 18:54:47 +00:00
# Emscripten .so files shouldn't have the native platform slug
2018-06-20 18:54:47 +00:00
if shared:
renamed = output[:-5] + '.so'
for ext in importlib.machinery.EXTENSION_SUFFIXES:
if ext == '.so':
continue
if renamed.endswith(ext):
renamed = renamed[:-len(ext)] + '.so'
break
if not dryrun:
2018-10-16 13:31:00 +00:00
os.rename(output, renamed)
return new_args
2018-06-20 18:54:47 +00:00
def replay_compile(args):
# If pure Python, there will be no build.log file, which is fine -- just do
# nothing
2018-08-03 16:48:22 +00:00
build_log_path = Path('build.log')
if build_log_path.is_file():
with open(build_log_path, 'r') as fd:
2018-06-20 18:54:47 +00:00
for line in fd:
line = json.loads(line)
handle_command(line, args)
def clean_out_native_artifacts():
for root, dirs, files in os.walk('.'):
for file in files:
2018-08-03 16:48:22 +00:00
path = Path(root) / file
if path.suffix in ('.o', '.so', '.a'):
path.unlink()
2018-06-20 18:54:47 +00:00
def install_for_distribution(args):
2018-10-26 15:22:10 +00:00
commands = [
Path(args.host) / 'bin' / 'python3',
'setup.py',
'install',
'--skip-build',
'--prefix=install',
2018-10-26 15:22:10 +00:00
'--old-and-unmanageable'
]
try:
subprocess.check_call(commands)
except Exception:
2018-10-27 08:05:40 +00:00
print(f'Warning: {" ".join(str(arg) for arg in commands)} failed '
f'with distutils, possibly due to the use of distutils '
f'that does not support the --old-and-unmanageable '
'argument. Re-trying the install without this argument.')
2018-10-26 15:22:10 +00:00
subprocess.check_call(commands[:-1])
2018-06-20 18:54:47 +00:00
def build_wrap(args):
2018-08-03 16:48:22 +00:00
build_log_path = Path('build.log')
if not build_log_path.is_file():
2018-06-20 18:54:47 +00:00
capture_compile(args)
clean_out_native_artifacts()
replay_compile(args)
install_for_distribution(args)
2018-06-20 18:54:47 +00:00
def make_parser(parser):
2018-09-21 14:58:09 +00:00
basename = Path(sys.argv[0]).name
if basename in symlinks:
# skip parsing of all arguments
parser._actions = []
else:
parser.description = (
'Cross compile a Python distutils package. '
'Run from the root directory of the package\'s source')
parser.add_argument(
'--cflags', type=str, nargs='?', default=common.DEFAULTCFLAGS,
help='Extra compiling flags')
parser.add_argument(
'--ldflags', type=str, nargs='?', default=common.DEFAULTLDFLAGS,
help='Extra linking flags')
parser.add_argument(
'--host', type=str, nargs='?', default=common.HOSTPYTHON,
help='The path to the host Python installation')
parser.add_argument(
'--target', type=str, nargs='?', default=common.TARGETPYTHON,
help='The path to the target Python installation')
return parser
2018-06-20 18:54:47 +00:00
2018-09-21 14:58:09 +00:00
def main(args):
2018-08-03 16:48:22 +00:00
basename = Path(sys.argv[0]).name
2018-06-20 18:54:47 +00:00
if basename in symlinks:
collect_args(basename)
else:
build_wrap(args)
if __name__ == '__main__':
2018-09-21 14:58:09 +00:00
basename = Path(sys.argv[0]).name
if basename in symlinks:
main(None)
else:
parser = make_parser(argparse.ArgumentParser())
args = parser.parse_args()
main(args)