2014-07-05 18:49:34 +00:00
|
|
|
#!/usr/bin/env python
|
2015-01-04 18:30:56 +00:00
|
|
|
from setuptools import setup
|
2015-03-09 05:46:35 +00:00
|
|
|
import shutil
|
2015-01-04 18:30:56 +00:00
|
|
|
|
2014-07-05 18:49:34 +00:00
|
|
|
import sys
|
|
|
|
import os
|
|
|
|
from os import path
|
2014-12-03 00:06:57 +00:00
|
|
|
|
2015-01-04 18:30:56 +00:00
|
|
|
from setuptools import Extension
|
2015-01-06 01:34:55 +00:00
|
|
|
from distutils import sysconfig
|
2015-10-12 22:31:59 +00:00
|
|
|
from distutils.core import setup, Extension
|
|
|
|
from distutils.command.build_ext import build_ext
|
|
|
|
|
2015-01-06 01:34:55 +00:00
|
|
|
import platform
|
|
|
|
|
2015-10-12 22:31:59 +00:00
|
|
|
# http://stackoverflow.com/questions/724664/python-distutils-how-to-get-a-compiler-that-is-going-to-be-used
|
|
|
|
compile_options = {'msvc' : ['/Ox', '/EHsc'] ,
|
|
|
|
'other' : ['-O3', '-Wno-strict-prototypes', '-Wno-unused-function'] }
|
|
|
|
link_options = {'msvc' : [] ,
|
|
|
|
'other' : [] }
|
|
|
|
class build_ext_options:
|
|
|
|
def build_options(self):
|
|
|
|
c_type = None
|
|
|
|
if compile_options.has_key(self.compiler.compiler_type):
|
|
|
|
c_type = self.compiler.compiler_type
|
|
|
|
elif compile_options.has_key('other'):
|
|
|
|
c_type = 'other'
|
|
|
|
if c_type is not None:
|
|
|
|
for e in self.extensions:
|
|
|
|
e.extra_compile_args = compile_options[c_type]
|
|
|
|
|
|
|
|
l_type = None
|
|
|
|
if link_options.has_key(self.compiler.compiler_type):
|
|
|
|
l_type = self.compiler.compiler_type
|
|
|
|
elif link_options.has_key('other'):
|
|
|
|
l_type = 'other'
|
|
|
|
if l_type is not None:
|
|
|
|
for e in self.extensions:
|
|
|
|
e.extra_link_args = link_options[l_type]
|
|
|
|
|
|
|
|
class build_ext_subclass( build_ext, build_ext_options ):
|
|
|
|
def build_extensions(self):
|
|
|
|
build_ext_options.build_options(self)
|
|
|
|
build_ext.build_extensions(self)
|
|
|
|
|
|
|
|
|
|
|
|
|
2015-01-17 05:19:54 +00:00
|
|
|
# PyPy --- NB! PyPy doesn't really work, it segfaults all over the place. But,
|
|
|
|
# this is necessary to get it compile.
|
2015-01-06 01:34:55 +00:00
|
|
|
# We have to resort to monkey-patching to set the compiler, because pypy broke
|
2015-01-30 07:14:45 +00:00
|
|
|
# all the everything.
|
2015-01-06 01:34:55 +00:00
|
|
|
|
2015-01-06 02:05:43 +00:00
|
|
|
pre_patch_customize_compiler = sysconfig.customize_compiler
|
2015-01-06 01:34:55 +00:00
|
|
|
def my_customize_compiler(compiler):
|
2015-01-06 02:05:43 +00:00
|
|
|
pre_patch_customize_compiler(compiler)
|
2015-01-06 01:34:55 +00:00
|
|
|
compiler.compiler_cxx = ['c++']
|
|
|
|
|
|
|
|
|
|
|
|
if platform.python_implementation() == 'PyPy':
|
|
|
|
sysconfig.customize_compiler = my_customize_compiler
|
2015-01-03 10:02:10 +00:00
|
|
|
|
2015-01-25 03:49:10 +00:00
|
|
|
#def install_headers():
|
|
|
|
# dest_dir = path.join(sys.prefix, 'include', 'murmurhash')
|
|
|
|
# if not path.exists(dest_dir):
|
|
|
|
# shutil.copytree('murmurhash/headers/murmurhash', dest_dir)
|
|
|
|
#
|
|
|
|
# dest_dir = path.join(sys.prefix, 'include', 'numpy')
|
2015-01-17 05:19:54 +00:00
|
|
|
|
|
|
|
|
|
|
|
includes = ['.', path.join(sys.prefix, 'include')]
|
|
|
|
|
|
|
|
|
2015-01-25 03:49:10 +00:00
|
|
|
try:
|
|
|
|
import numpy
|
2015-01-30 07:14:45 +00:00
|
|
|
numpy_headers = path.join(numpy.get_include(), 'numpy')
|
|
|
|
shutil.copytree(numpy_headers, path.join(sys.prefix, 'include', 'numpy'))
|
2015-01-25 03:49:10 +00:00
|
|
|
except ImportError:
|
|
|
|
pass
|
2015-01-30 08:48:19 +00:00
|
|
|
except OSError:
|
|
|
|
pass
|
2015-01-25 03:49:10 +00:00
|
|
|
|
|
|
|
|
2015-03-09 05:46:35 +00:00
|
|
|
def clean(mod_names):
|
|
|
|
for name in mod_names:
|
|
|
|
name = name.replace('.', '/')
|
|
|
|
so = name + '.so'
|
|
|
|
html = name + '.html'
|
|
|
|
cpp = name + '.cpp'
|
|
|
|
c = name + '.c'
|
|
|
|
for file_path in [so, html, cpp, c]:
|
|
|
|
if os.path.exists(file_path):
|
2015-03-09 11:06:33 +00:00
|
|
|
os.unlink(file_path)
|
2014-07-05 18:49:34 +00:00
|
|
|
|
|
|
|
|
2015-01-04 18:30:56 +00:00
|
|
|
def name_to_path(mod_name, ext):
|
|
|
|
return '%s.%s' % (mod_name.replace('.', '/'), ext)
|
2014-07-25 14:47:27 +00:00
|
|
|
|
|
|
|
|
2015-10-12 22:31:59 +00:00
|
|
|
def c_ext(mod_name, language, includes):
|
2015-01-04 18:30:56 +00:00
|
|
|
mod_path = name_to_path(mod_name, language)
|
2015-10-12 22:31:59 +00:00
|
|
|
return Extension(mod_name, [mod_path], include_dirs=includes)
|
2015-01-04 10:14:07 +00:00
|
|
|
|
2014-07-25 14:47:27 +00:00
|
|
|
|
2015-10-12 22:31:59 +00:00
|
|
|
def cython_setup(mod_names, language, includes):
|
2015-01-04 18:30:56 +00:00
|
|
|
import Cython.Distutils
|
|
|
|
import Cython.Build
|
2015-03-08 05:16:32 +00:00
|
|
|
import distutils.core
|
|
|
|
|
2015-10-12 22:31:59 +00:00
|
|
|
class build_ext_cython_subclass( Cython.Distutils.build_ext, build_ext_options ):
|
|
|
|
def build_extensions(self):
|
|
|
|
build_ext_options.build_options(self)
|
|
|
|
Cython.Distutils.build_ext.build_extensions(self)
|
|
|
|
|
2015-01-04 18:30:56 +00:00
|
|
|
if language == 'cpp':
|
|
|
|
language = 'c++'
|
2015-01-28 03:00:20 +00:00
|
|
|
exts = []
|
|
|
|
for mod_name in mod_names:
|
|
|
|
mod_path = mod_name.replace('.', '/') + '.pyx'
|
2015-10-12 22:31:59 +00:00
|
|
|
e = Extension(mod_name, [mod_path], language=language, include_dirs=includes)
|
2015-01-28 03:00:20 +00:00
|
|
|
exts.append(e)
|
2015-03-08 05:16:32 +00:00
|
|
|
distutils.core.setup(
|
|
|
|
name='spacy',
|
2015-07-26 23:51:37 +00:00
|
|
|
packages=['spacy', 'spacy.tokens', 'spacy.en', 'spacy.serialize',
|
|
|
|
'spacy.syntax', 'spacy.munge'],
|
2015-03-08 05:16:32 +00:00
|
|
|
description="Industrial-strength NLP",
|
|
|
|
author='Matthew Honnibal',
|
|
|
|
author_email='honnibal@gmail.com',
|
2015-06-07 17:05:28 +00:00
|
|
|
version=VERSION,
|
2015-03-08 05:16:32 +00:00
|
|
|
url="http://honnibal.github.io/spaCy/",
|
|
|
|
package_data={"spacy": ["*.pxd"],
|
|
|
|
"spacy.en": ["*.pxd", "data/pos/*",
|
|
|
|
"data/wordnet/*", "data/tokenizer/*",
|
|
|
|
"data/vocab/lexemes.bin",
|
|
|
|
"data/vocab/strings.txt"],
|
|
|
|
"spacy.syntax": ["*.pxd"]},
|
|
|
|
ext_modules=exts,
|
2015-10-12 22:31:59 +00:00
|
|
|
cmdclass={'build_ext': build_ext_cython_subclass},
|
2015-03-08 05:16:32 +00:00
|
|
|
license="Dual: Commercial or AGPL",
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2015-01-04 18:30:56 +00:00
|
|
|
def run_setup(exts):
|
|
|
|
setup(
|
|
|
|
name='spacy',
|
2015-07-26 23:51:37 +00:00
|
|
|
packages=['spacy', 'spacy.tokens', 'spacy.en', 'spacy.serialize',
|
|
|
|
'spacy.syntax', 'spacy.munge'],
|
2015-01-04 18:30:56 +00:00
|
|
|
description="Industrial-strength NLP",
|
|
|
|
author='Matthew Honnibal',
|
|
|
|
author_email='honnibal@gmail.com',
|
2015-06-07 17:05:28 +00:00
|
|
|
version=VERSION,
|
2015-01-04 18:30:56 +00:00
|
|
|
url="http://honnibal.github.io/spaCy/",
|
|
|
|
package_data={"spacy": ["*.pxd"],
|
|
|
|
"spacy.en": ["*.pxd", "data/pos/*",
|
|
|
|
"data/wordnet/*", "data/tokenizer/*",
|
2015-01-25 05:58:38 +00:00
|
|
|
"data/vocab/lexemes.bin",
|
2015-07-27 00:14:36 +00:00
|
|
|
"data/vocab/serializer.json",
|
|
|
|
"data/vocab/oov_prob",
|
2015-01-25 05:58:38 +00:00
|
|
|
"data/vocab/strings.txt"],
|
2015-01-04 18:30:56 +00:00
|
|
|
"spacy.syntax": ["*.pxd"]},
|
|
|
|
ext_modules=exts,
|
2015-09-29 13:02:37 +00:00
|
|
|
license="MIT",
|
2015-10-10 14:32:44 +00:00
|
|
|
install_requires=['numpy', 'murmurhash', 'cymem >= 1.11', 'preshed == 0.42',
|
2015-09-29 13:02:37 +00:00
|
|
|
'thinc == 3.3', "text_unidecode", 'wget', 'plac', 'six',
|
2015-06-07 20:24:21 +00:00
|
|
|
'ujson'],
|
2015-01-04 18:30:56 +00:00
|
|
|
setup_requires=["headers_workaround"],
|
2015-10-12 22:31:59 +00:00
|
|
|
cmdclass = {'build_ext': build_ext_subclass },
|
2015-01-04 18:30:56 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
import headers_workaround
|
|
|
|
|
|
|
|
headers_workaround.fix_venv_pypy_include()
|
|
|
|
headers_workaround.install_headers('murmurhash')
|
2015-01-17 05:19:54 +00:00
|
|
|
headers_workaround.install_headers('numpy')
|
2015-01-04 18:30:56 +00:00
|
|
|
|
|
|
|
|
2015-10-09 10:42:41 +00:00
|
|
|
VERSION = '0.94'
|
2015-01-04 18:30:56 +00:00
|
|
|
def main(modules, is_pypy):
|
|
|
|
language = "cpp"
|
|
|
|
includes = ['.', path.join(sys.prefix, 'include')]
|
2015-10-10 17:11:20 +00:00
|
|
|
if sys.platform.startswith('darwin'):
|
2015-10-12 22:31:59 +00:00
|
|
|
compile_options['other'].append(['-mmacosx-version-min=10.8', '-stdlib=libc++'])
|
|
|
|
link_opions['other'].append('-lc++')
|
2015-01-28 03:00:20 +00:00
|
|
|
if use_cython:
|
2015-10-12 22:31:59 +00:00
|
|
|
cython_setup(modules, language, includes)
|
2015-01-28 03:00:20 +00:00
|
|
|
else:
|
2015-10-12 22:31:59 +00:00
|
|
|
exts = [c_ext(mn, language, includes)
|
2015-02-18 10:01:27 +00:00
|
|
|
for mn in modules]
|
2015-03-08 05:16:32 +00:00
|
|
|
run_setup(exts)
|
2015-01-04 18:30:56 +00:00
|
|
|
|
2015-01-25 05:32:48 +00:00
|
|
|
MOD_NAMES = ['spacy.parts_of_speech', 'spacy.strings',
|
2015-07-17 14:39:25 +00:00
|
|
|
'spacy.lexeme', 'spacy.vocab', 'spacy.attrs',
|
2015-08-24 03:25:55 +00:00
|
|
|
'spacy.morphology', 'spacy.tagger',
|
2015-06-09 19:20:33 +00:00
|
|
|
'spacy.syntax.stateclass',
|
2015-06-26 11:51:39 +00:00
|
|
|
'spacy._ml', 'spacy._theano',
|
2015-10-08 03:00:34 +00:00
|
|
|
'spacy.tokenizer',
|
2015-10-10 14:32:44 +00:00
|
|
|
'spacy.syntax.parser',
|
2015-04-19 08:31:31 +00:00
|
|
|
'spacy.syntax.transition_system',
|
2015-06-14 18:28:14 +00:00
|
|
|
'spacy.syntax.arc_eager',
|
|
|
|
'spacy.syntax._parse_features',
|
2015-07-16 23:19:29 +00:00
|
|
|
'spacy.gold', 'spacy.orth',
|
2015-07-13 20:29:33 +00:00
|
|
|
'spacy.tokens.doc', 'spacy.tokens.spans', 'spacy.tokens.token',
|
2015-07-16 23:19:29 +00:00
|
|
|
'spacy.serialize.packer', 'spacy.serialize.huffman', 'spacy.serialize.bits',
|
2015-08-05 21:48:11 +00:00
|
|
|
'spacy.cfile', 'spacy.matcher',
|
2015-03-09 05:46:35 +00:00
|
|
|
'spacy.syntax.ner']
|
2015-01-04 18:30:56 +00:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2015-03-09 05:46:35 +00:00
|
|
|
if sys.argv[1] == 'clean':
|
|
|
|
clean(MOD_NAMES)
|
|
|
|
else:
|
|
|
|
use_cython = sys.argv[1] == 'build_ext'
|
|
|
|
main(MOD_NAMES, use_cython)
|