From 2ed49404e30f206894e8c25fb28f8135d0a69077 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Tue, 11 Feb 2020 17:46:18 -0500 Subject: [PATCH] Improve setup.py and call into Cython directly (#4952) * Improve setup.py and call into Cython directly * Add numpy to setup_requires * Improve clean helper * Update setup.cfg * Try if it builds without pyproject.toml * Update MANIFEST.in --- MANIFEST.in | 2 +- bin/cythonize.py | 169 ------------------------------------------ pyproject.toml | 3 - setup.cfg | 1 + setup.py | 165 ++++++++++++++++------------------------- spacy/tokenizer.pyx | 2 + spacy/tokens/span.pyx | 1 + 7 files changed, 67 insertions(+), 276 deletions(-) delete mode 100755 bin/cythonize.py delete mode 100644 pyproject.toml diff --git a/MANIFEST.in b/MANIFEST.in index 78655a5f4..266af1b0a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,5 @@ recursive-include include *.h -recursive-include spacy *.txt +recursive-include spacy *.pyx *.pxd *.txt include LICENSE include README.md include bin/spacy diff --git a/bin/cythonize.py b/bin/cythonize.py deleted file mode 100755 index 554252294..000000000 --- a/bin/cythonize.py +++ /dev/null @@ -1,169 +0,0 @@ -#!/usr/bin/env python -""" cythonize.py - -Cythonize pyx files into C++ files as needed. - -Usage: cythonize.py [root] - -Checks pyx files to see if they have been changed relative to their -corresponding C++ files. If they have, then runs cython on these files to -recreate the C++ files. - -Additionally, checks pxd files and setup.py if they have been changed. If -they have, rebuilds everything. - -Change detection based on file hashes stored in JSON format. - -For now, this script should be run by developers when changing Cython files -and the resulting C++ files checked in, so that end-users (and Python-only -developers) do not get the Cython dependencies. - -Based upon: - -https://raw.github.com/dagss/private-scipy-refactor/cythonize/cythonize.py -https://raw.githubusercontent.com/numpy/numpy/master/tools/cythonize.py - -Note: this script does not check any of the dependent C++ libraries. -""" -from __future__ import print_function - -import os -import sys -import json -import hashlib -import subprocess -import argparse - - -HASH_FILE = "cythonize.json" - - -def process_pyx(fromfile, tofile, language_level="-3"): - print("Processing %s" % fromfile) - try: - from Cython.Compiler.Version import version as cython_version - from distutils.version import LooseVersion - - if LooseVersion(cython_version) < LooseVersion("0.25"): - raise Exception("Require Cython >= 0.25") - - except ImportError: - pass - - flags = ["--fast-fail", language_level] - if tofile.endswith(".cpp"): - flags += ["--cplus"] - - try: - try: - r = subprocess.call( - ["cython"] + flags + ["-o", tofile, fromfile], env=os.environ - ) # See Issue #791 - if r != 0: - raise Exception("Cython failed") - except OSError: - # There are ways of installing Cython that don't result in a cython - # executable on the path, see gh-2397. - r = subprocess.call( - [ - sys.executable, - "-c", - "import sys; from Cython.Compiler.Main import " - "setuptools_main as main; sys.exit(main())", - ] - + flags - + ["-o", tofile, fromfile] - ) - if r != 0: - raise Exception("Cython failed") - except OSError: - raise OSError("Cython needs to be installed") - - -def preserve_cwd(path, func, *args): - orig_cwd = os.getcwd() - try: - os.chdir(path) - func(*args) - finally: - os.chdir(orig_cwd) - - -def load_hashes(filename): - try: - return json.load(open(filename)) - except (ValueError, IOError): - return {} - - -def save_hashes(hash_db, filename): - with open(filename, "w") as f: - f.write(json.dumps(hash_db)) - - -def get_hash(path): - return hashlib.md5(open(path, "rb").read()).hexdigest() - - -def hash_changed(base, path, db): - full_path = os.path.normpath(os.path.join(base, path)) - return not get_hash(full_path) == db.get(full_path) - - -def hash_add(base, path, db): - full_path = os.path.normpath(os.path.join(base, path)) - db[full_path] = get_hash(full_path) - - -def process(base, filename, db): - root, ext = os.path.splitext(filename) - if ext in [".pyx", ".cpp"]: - if hash_changed(base, filename, db) or not os.path.isfile( - os.path.join(base, root + ".cpp") - ): - preserve_cwd(base, process_pyx, root + ".pyx", root + ".cpp") - hash_add(base, root + ".cpp", db) - hash_add(base, root + ".pyx", db) - - -def check_changes(root, db): - res = False - new_db = {} - - setup_filename = "setup.py" - hash_add(".", setup_filename, new_db) - if hash_changed(".", setup_filename, db): - res = True - - for base, _, files in os.walk(root): - for filename in files: - if filename.endswith(".pxd"): - hash_add(base, filename, new_db) - if hash_changed(base, filename, db): - res = True - - if res: - db.clear() - db.update(new_db) - return res - - -def run(root): - db = load_hashes(HASH_FILE) - - try: - check_changes(root, db) - for base, _, files in os.walk(root): - for filename in files: - process(base, filename, db) - finally: - save_hashes(db, HASH_FILE) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - description="Cythonize pyx files into C++ files as needed" - ) - parser.add_argument("root", help="root directory") - args = parser.parse_args() - run(args.root) diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index fed528d4a..000000000 --- a/pyproject.toml +++ /dev/null @@ -1,3 +0,0 @@ -[build-system] -requires = ["setuptools"] -build-backend = "setuptools.build_meta" diff --git a/setup.cfg b/setup.cfg index a3aede089..f360cac37 100644 --- a/setup.cfg +++ b/setup.cfg @@ -31,6 +31,7 @@ python_requires = >=3.6 setup_requires = wheel cython>=0.25 + numpy>=1.15.0 # We also need our Cython packages here to compile against cymem>=2.0.2,<2.1.0 preshed>=3.0.2,<3.1.0 diff --git a/setup.py b/setup.py index 1afdc7ae4..31f22ba3f 100755 --- a/setup.py +++ b/setup.py @@ -1,34 +1,22 @@ #!/usr/bin/env python -import io -import os -import subprocess import sys -import contextlib from distutils.command.build_ext import build_ext from distutils.sysconfig import get_python_inc import distutils.util from distutils import ccompiler, msvccompiler from setuptools import Extension, setup, find_packages +import numpy +from pathlib import Path +from Cython.Build import cythonize +from Cython.Compiler import Options -def is_new_osx(): - """Check whether we're on OSX >= 10.10""" - name = distutils.util.get_platform() - if sys.platform != "darwin": - return False - elif name.startswith("macosx-10"): - minor_version = int(name.split("-")[1].split(".")[1]) - if minor_version >= 7: - return True - else: - return False - else: - return False +# Preserve `__doc__` on functions and classes +# http://docs.cython.org/en/latest/src/userguide/source_files_and_compilation.html#compiler-options +Options.docstrings = True PACKAGES = find_packages() - - MOD_NAMES = [ "spacy.parts_of_speech", "spacy.strings", @@ -61,16 +49,32 @@ MOD_NAMES = [ "spacy.symbols", "spacy.vectors", ] - - COMPILE_OPTIONS = { "msvc": ["/Ox", "/EHsc"], "mingw32": ["-O2", "-Wno-strict-prototypes", "-Wno-unused-function"], "other": ["-O2", "-Wno-strict-prototypes", "-Wno-unused-function"], } - - LINK_OPTIONS = {"msvc": [], "mingw32": [], "other": []} +COMPILER_DIRECTIVES = { + "language_level": -3, + "embedsignature": True, + "annotation_typing": False, +} + + +def is_new_osx(): + """Check whether we're on OSX >= 10.10""" + name = distutils.util.get_platform() + if sys.platform != "darwin": + return False + elif name.startswith("macosx-10"): + minor_version = int(name.split("-")[1].split(".")[1]) + if minor_version >= 7: + return True + else: + return False + else: + return False if is_new_osx(): @@ -103,95 +107,50 @@ class build_ext_subclass(build_ext, build_ext_options): build_ext.build_extensions(self) -def generate_cython(root, source): - print("Cythonizing sources") - p = subprocess.call( - [sys.executable, os.path.join(root, "bin", "cythonize.py"), source], - env=os.environ, - ) - if p != 0: - raise RuntimeError("Running cythonize failed") - - -def is_source_release(path): - return os.path.exists(os.path.join(path, "PKG-INFO")) - - def clean(path): - for name in MOD_NAMES: - name = name.replace(".", "/") - for ext in [".so", ".html", ".cpp", ".c"]: - file_path = os.path.join(path, name + ext) - if os.path.exists(file_path): - os.unlink(file_path) - - -@contextlib.contextmanager -def chdir(new_dir): - old_dir = os.getcwd() - try: - os.chdir(new_dir) - sys.path.insert(0, new_dir) - yield - finally: - del sys.path[0] - os.chdir(old_dir) + for path in path.glob("**/*"): + if path.is_file() and path.suffix in (".so", ".cpp"): + print(f"Deleting {path.name}") + path.unlink() def setup_package(): - root = os.path.abspath(os.path.dirname(__file__)) + root = Path(__file__).parent if len(sys.argv) > 1 and sys.argv[1] == "clean": - return clean(root) + return clean(root / "spacy") - with chdir(root): - with io.open(os.path.join(root, "spacy", "about.py"), encoding="utf8") as f: - about = {} - exec(f.read(), about) + with (root / "spacy" / "about.py").open("r") as f: + about = {} + exec(f.read(), about) - include_dirs = [ - get_python_inc(plat_specific=True), - os.path.join(root, "include"), - ] + include_dirs = [ + get_python_inc(plat_specific=True), + numpy.get_include(), + str(root / "include"), + ] + if ( + ccompiler.new_compiler().compiler_type == "msvc" + and msvccompiler.get_build_version() == 9 + ): + include_dirs.append(str(root / "include" / "msvc9")) + ext_modules = [] + for name in MOD_NAMES: + mod_path = name.replace(".", "/") + ".pyx" + ext = Extension(name, [mod_path], language="c++") + ext_modules.append(ext) + print("Cythonizing sources") + ext_modules = cythonize(ext_modules, compiler_directives=COMPILER_DIRECTIVES) - if ( - ccompiler.new_compiler().compiler_type == "msvc" - and msvccompiler.get_build_version() == 9 - ): - include_dirs.append(os.path.join(root, "include", "msvc9")) - - ext_modules = [] - for mod_name in MOD_NAMES: - mod_path = mod_name.replace(".", "/") + ".cpp" - extra_link_args = [] - # ??? - # Imported from patch from @mikepb - # See Issue #267. Running blind here... - if sys.platform == "darwin": - dylib_path = [".." for _ in range(mod_name.count("."))] - dylib_path = "/".join(dylib_path) - dylib_path = "@loader_path/%s/spacy/platform/darwin/lib" % dylib_path - extra_link_args.append("-Wl,-rpath,%s" % dylib_path) - ext_modules.append( - Extension( - mod_name, - [mod_path], - language="c++", - include_dirs=include_dirs, - extra_link_args=extra_link_args, - ) - ) - - if not is_source_release(root): - generate_cython(root, "spacy") - - setup( - name="spacy", - packages=PACKAGES, - version=about["__version__"], - ext_modules=ext_modules, - cmdclass={"build_ext": build_ext_subclass}, - ) + setup( + name="spacy", + packages=PACKAGES, + version=about["__version__"], + ext_modules=ext_modules, + cmdclass={"build_ext": build_ext_subclass}, + include_dirs=include_dirs, + package_data={"": ["*.pyx", "*.pxd", "*.pxi", "*.cpp"]}, + ) if __name__ == "__main__": diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx index 7491a11fc..25d9f239d 100644 --- a/spacy/tokenizer.pyx +++ b/spacy/tokenizer.pyx @@ -1,5 +1,7 @@ # cython: embedsignature=True # cython: profile=True +from __future__ import unicode_literals + from cython.operator cimport dereference as deref from cython.operator cimport preincrement as preinc from libc.string cimport memcpy, memset diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx index 7ab1c1d18..d24a38029 100644 --- a/spacy/tokens/span.pyx +++ b/spacy/tokens/span.pyx @@ -1,3 +1,4 @@ +from __future__ import unicode_literals cimport numpy as np from libc.math cimport sqrt