refactor, listen on setup.py, *.pxd

This commit is contained in:
Henning Peters 2016-02-05 15:37:00 +01:00
parent d19f1a3254
commit 7627969aba
1 changed files with 95 additions and 139 deletions

View File

@ -1,62 +1,50 @@
#!/usr/bin/env python #!/usr/bin/env python
""" cythonize """ cythonize.py
Cythonize pyx files into C files as needed. Cythonize pyx files into C++ files as needed.
Usage: cythonize [root_dir] Usage: cythonize.py [root]
Default [root_dir] is 'spacy'.
Checks pyx files to see if they have been changed relative to their Checks pyx files to see if they have been changed relative to their
corresponding C files. If they have, then runs cython on these files to corresponding C++ files. If they have, then runs cython on these files to
recreate the C files. recreate the C++ files.
The script thinks that the pyx files have changed relative to the C files Additionally, checks pxd files and setup.py if they have been changed. If
by comparing hashes stored in a database file. they have, rebuilds everything.
Simple script to invoke Cython (and Tempita) on all .pyx (.pyx.in) Change detection based on file hashes stored in JSON format.
files; while waiting for a proper build system. Uses file hashes to
figure out if rebuild is needed.
For now, this script should be run by developers when changing Cython files For now, this script should be run by developers when changing Cython files
only, and the resulting C files checked in, so that end-users (and Python-only and the resulting C++ files checked in, so that end-users (and Python-only
developers) do not get the Cython/Tempita dependencies. developers) do not get the Cython dependencies.
Originally written by Dag Sverre Seljebotn, and copied here from: Based upon:
https://raw.github.com/dagss/private-scipy-refactor/cythonize/cythonize.py https://raw.github.com/dagss/private-scipy-refactor/cythonize/cythonize.py
https://raw.githubusercontent.com/numpy/numpy/master/tools/cythonize.py
Note: this script does not check any of the dependent C libraries; it only Note: this script does not check any of the dependent C++ libraries.
operates on the Cython .pyx files.
""" """
from __future__ import print_function
from __future__ import division, print_function, absolute_import
import os import os
import re
import sys import sys
import json
import hashlib import hashlib
import subprocess import subprocess
import argparse
HASH_FILE = 'cythonize.dat'
DEFAULT_ROOT = 'spacy'
VENDOR = 'spaCy'
# WindowsError is not defined on unix systems HASH_FILE = 'cythonize.json'
try:
WindowsError
except NameError:
WindowsError = None
#
# Rules
#
def process_pyx(fromfile, tofile): def process_pyx(fromfile, tofile):
print('Processing %s' % fromfile)
try: try:
from Cython.Compiler.Version import version as cython_version from Cython.Compiler.Version import version as cython_version
from distutils.version import LooseVersion from distutils.version import LooseVersion
if LooseVersion(cython_version) < LooseVersion('0.19'): if LooseVersion(cython_version) < LooseVersion('0.19'):
raise Exception('Building %s requires Cython >= 0.19' % VENDOR) raise Exception('Require Cython >= 0.19')
except ImportError: except ImportError:
pass pass
@ -67,133 +55,101 @@ def process_pyx(fromfile, tofile):
try: try:
try: try:
r = subprocess.call(['cython'] + flags + ["-o", tofile, fromfile]) r = subprocess.call(['cython'] + flags + ['-o', tofile, fromfile])
if r != 0: if r != 0:
raise Exception('Cython failed') raise Exception('Cython failed')
except OSError: except OSError:
# There are ways of installing Cython that don't result in a cython # There are ways of installing Cython that don't result in a cython
# executable on the path, see gh-2397. # executable on the path, see gh-2397.
r = subprocess.call([sys.executable, '-c', r = subprocess.call([sys.executable, '-c',
'import sys; from Cython.Compiler.Main import ' 'import sys; from Cython.Compiler.Main import '
'setuptools_main as main; sys.exit(main())'] + flags + 'setuptools_main as main; sys.exit(main())'] + flags +
["-o", tofile, fromfile]) ['-o', tofile, fromfile])
if r != 0: if r != 0:
raise Exception('Cython failed') raise Exception('Cython failed')
except OSError: except OSError:
raise OSError('Cython needs to be installed') raise OSError('Cython needs to be installed')
def process_tempita_pyx(fromfile, tofile):
try:
try:
from Cython import Tempita as tempita
except ImportError:
import tempita
except ImportError:
raise Exception('Building %s requires Tempita: '
'pip install --user Tempita' % VENDOR)
with open(fromfile, "r") as f:
tmpl = f.read()
pyxcontent = tempita.sub(tmpl)
assert fromfile.endswith('.pyx.in')
pyxfile = fromfile[:-len('.pyx.in')] + '.pyx'
with open(pyxfile, "w") as f:
f.write(pyxcontent)
process_pyx(pyxfile, tofile)
rules = {
# fromext : function
'.pyx' : process_pyx,
'.pyx.in' : process_tempita_pyx
}
#
# Hash db
#
def load_hashes(filename):
# Return { filename : (sha1 of input, sha1 of output) }
if os.path.isfile(filename):
hashes = {}
with open(filename, 'r') as f:
for line in f:
filename, inhash, outhash = line.split()
hashes[filename] = (inhash, outhash)
else:
hashes = {}
return hashes
def save_hashes(hash_db, filename):
with open(filename, 'w') as f:
for key, value in sorted(hash_db.items()):
f.write("%s %s %s\n" % (key, value[0], value[1]))
def sha1_of_file(filename):
h = hashlib.sha1()
with open(filename, "rb") as f:
h.update(f.read())
return h.hexdigest()
#
# Main program
#
def normpath(path):
path = path.replace(os.sep, '/')
if path.startswith('./'):
path = path[2:]
return path
def get_hash(frompath, topath):
from_hash = sha1_of_file(frompath)
to_hash = sha1_of_file(topath) if os.path.exists(topath) else None
return (from_hash, to_hash)
def process(path, fromfile, tofile, processor_function, hash_db):
fullfrompath = os.path.join(path, fromfile)
fulltopath = os.path.join(path, tofile)
current_hash = get_hash(fullfrompath, fulltopath)
if current_hash == hash_db.get(normpath(fullfrompath), None):
print('%s has not changed' % fullfrompath)
return
def preserve_cwd(path, func, *args):
orig_cwd = os.getcwd() orig_cwd = os.getcwd()
try: try:
os.chdir(path) os.chdir(path)
print('Processing %s' % fullfrompath) func(*args)
processor_function(fromfile, tofile)
finally: finally:
os.chdir(orig_cwd) os.chdir(orig_cwd)
# changed target file, recompute hash
current_hash = get_hash(fullfrompath, fulltopath)
# store hash in db
hash_db[normpath(fullfrompath)] = current_hash
def find_process_files(root_dir): def load_hashes(filename):
hash_db = load_hashes(HASH_FILE)
for cur_dir, dirs, files in os.walk(root_dir):
for filename in files:
in_file = os.path.join(cur_dir, filename + ".in")
if filename.endswith('.pyx') and os.path.isfile(in_file):
continue
for fromext, function in rules.items():
if filename.endswith(fromext):
toext = ".cpp"
# with open(os.path.join(cur_dir, filename), 'rb') as f:
# data = f.read()
# m = re.search(br"^\s*#\s*distutils:\s*language\s*=\s*c\+\+\s*$", data, re.I|re.M)
# if m:
# toext = ".cxx"
fromfile = filename
tofile = filename[:-len(fromext)] + toext
process(cur_dir, fromfile, tofile, function, hash_db)
save_hashes(hash_db, HASH_FILE)
def main():
try: try:
root_dir = sys.argv[1] return json.load(open(filename))
except IndexError: except (ValueError, IOError):
root_dir = DEFAULT_ROOT return {}
find_process_files(root_dir)
def save_hashes(hash_db, filename):
json.dump(hash_db, open(filename, 'w'))
def get_hash(path):
return hashlib.md5(open(path).read()).hexdigest()
def hash_changed(base, path, db):
full_path = os.path.normpath(os.path.join(base, path))
return not get_hash(full_path) == db.get(full_path)
def hash_add(base, path, db):
full_path = os.path.normpath(os.path.join(base, path))
db[full_path] = get_hash(full_path)
def process(base, filename, db):
root, ext = os.path.splitext(filename)
if ext in ['.pyx', '.cpp']:
if hash_changed(base, filename, db):
preserve_cwd(base, process_pyx, root + '.pyx', root + '.cpp')
hash_add(base, root + '.cpp', db)
hash_add(base, root + '.pyx', db)
def check_changes(root, db):
res = False
new_db = {}
setup_filename = 'setup.py'
hash_add('.', setup_filename, new_db)
if hash_changed('.', setup_filename, db):
res = True
for base, _, files in os.walk(root):
for filename in files:
if filename.endswith('.pxd'):
hash_add(base, filename, new_db)
if hash_changed(base, filename, db):
res = True
if res:
db.clear()
db.update(new_db)
return res
def run(root):
db = load_hashes(HASH_FILE)
try:
check_changes(root, db)
for base, _, files in os.walk(root):
for filename in files:
process(base, filename, db)
finally:
save_hashes(db, HASH_FILE)
if __name__ == '__main__': if __name__ == '__main__':
main() parser = argparse.ArgumentParser(description='Cythonize pyx files into C++ files as needed')
parser.add_argument('root', help='root directory')
args = parser.parse_args()
run(args.root)