new approach to dependency headers

2015-12-13 11:49:17 +01:00 · 2015-12-13 11:49:17 +01:00 · ac318b568c
parent a9fc35d3bf
commit ac318b568c
11 changed files with 252 additions and 580 deletions
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -0,0 +1 @@
 recursive-include include *.h
--- a/setup.py
+++ b/setup.py
@ -1,231 +1,261 @@
 #!/usr/bin/env python
-from setuptools import setup
+from __future__ import division, print_function
 import shutil
 import sys
 import os
-from os import path
+import shutil
-
+import subprocess
-from setuptools import Extension
+import sys
 from distutils import sysconfig
 from distutils.core import setup, Extension
 from distutils.command.build_ext import build_ext
 from distutils.sysconfig import get_python_inc
-import platform
+try:
    from setuptools import Extension, setup
 except ImportError:
    from distutils.core import Extension, setup
-PACKAGE_DATA =  {
+
-    "spacy": ["*.pxd"],
+MAJOR      = 0
-    "spacy.tokens": ["*.pxd"],
+MINOR      = 100
-    "spacy.serialize": ["*.pxd"],
+MICRO      = 0
-    "spacy.syntax": ["*.pxd"],
+ISRELEASED = False
-    "spacy.en": [
+VERSION    = '%d.%d.%d' % (MAJOR, MINOR, MICRO)
-        "*.pxd",
+
-        "data/wordnet/*.exc",
+
-        "data/wordnet/index.*",
+PACKAGES = [
-        "data/tokenizer/*",
+    'spacy',
-        "data/vocab/serializer.json"
+    'spacy.tokens',
-    ]
+    'spacy.en',
-}
+    'spacy.serialize',
    'spacy.syntax',
    'spacy.munge',
    'spacy.tests',
    'spacy.tests.matcher',
    'spacy.tests.morphology',
    'spacy.tests.munge',
    'spacy.tests.parser',
    'spacy.tests.serialize',
    'spacy.tests.spans',
    'spacy.tests.tagger',
    'spacy.tests.tokenizer',
    'spacy.tests.tokens',
    'spacy.tests.vectors',
    'spacy.tests.vocab']
 MOD_NAMES = [
    'spacy.parts_of_speech',
    'spacy.strings',
    'spacy.lexeme',
    'spacy.vocab',
    'spacy.attrs',
    'spacy.morphology',
    'spacy.tagger',
    'spacy.syntax.stateclass',
    'spacy.tokenizer',
    'spacy.syntax.parser',
    'spacy.syntax.transition_system',
    'spacy.syntax.arc_eager',
    'spacy.syntax._parse_features',
    'spacy.gold',
    'spacy.orth',
    'spacy.tokens.doc',
    'spacy.tokens.span',
    'spacy.tokens.token',
    'spacy.serialize.packer',
    'spacy.serialize.huffman',
    'spacy.serialize.bits',
    'spacy.cfile',
    'spacy.matcher',
    'spacy.syntax.ner',
    'spacy.symbols']
 if sys.version_info[:2] < (2, 7) or (3, 0) <= sys.version_info[0:2] < (3, 4):
    raise RuntimeError('Python version 2.7 or >= 3.4 required.')
 # By subclassing build_extensions we have the actual compiler that will be used which is really known only after finalize_options
 # http://stackoverflow.com/questions/724664/python-distutils-how-to-get-a-compiler-that-is-going-to-be-used
-compile_options =  {'msvc'  : ['/Ox', '/EHsc']  ,
+compile_options =  {'msvc'  : ['/Ox', '/EHsc'],
-                    'other' : ['-O3', '-Wno-strict-prototypes', '-Wno-unused-function']       }
+                    'other' : ['-O3', '-Wno-strict-prototypes', '-Wno-unused-function']}
-link_options    =  {'msvc'  : [] ,
+link_options    =  {'msvc'  : [],
-                    'other' : [] }
+                    'other' : []}
 if sys.platform.startswith('darwin'):
    compile_options['other'].append('-mmacosx-version-min=10.8')
    compile_options['other'].append('-stdlib=libc++')
    link_options['other'].append('-lc++')
 class build_ext_options:
    def build_options(self):
-        c_type = None
+        for e in self.extensions:
-        if self.compiler.compiler_type in compile_options:
+            e.extra_compile_args = compile_options.get(
-            c_type = self.compiler.compiler_type
+                self.compiler.compiler_type, compile_options['other'])
-        elif 'other' in compile_options:
+        for e in self.extensions:
-            c_type = 'other'
+            e.extra_link_args = link_options.get(
-        if c_type is not None:
+                self.compiler.compiler_type, link_options['other'])
           for e in self.extensions:
               e.extra_compile_args = compile_options[c_type]
        l_type = None 
        if self.compiler.compiler_type in link_options:
            l_type = self.compiler.compiler_type
        elif 'other' in link_options:
            l_type = 'other'
        if l_type is not None:
           for e in self.extensions:
               e.extra_link_args = link_options[l_type]
-class build_ext_subclass( build_ext, build_ext_options ):
+class build_ext_subclass(build_ext, build_ext_options):
    def build_extensions(self):
        build_ext_options.build_options(self)
        build_ext.build_extensions(self)
 # PyPy --- NB! PyPy doesn't really work, it segfaults all over the place. But,
 # this is necessary to get it compile.
 # We have to resort to monkey-patching to set the compiler, because pypy broke
 # all the everything.
 pre_patch_customize_compiler = sysconfig.customize_compiler
 def my_customize_compiler(compiler):
    pre_patch_customize_compiler(compiler)
    compiler.compiler_cxx = ['c++']
-if platform.python_implementation() == 'PyPy':
+# Return the git revision as a string
-    sysconfig.customize_compiler = my_customize_compiler
+def git_version():
    def _minimal_ext_cmd(cmd):
        # construct minimal environment
        env = {}
        for k in ['SYSTEMROOT', 'PATH']:
            v = os.environ.get(k)
            if v is not None:
                env[k] = v
        # LANGUAGE is used on win32
        env['LANGUAGE'] = 'C'
        env['LANG'] = 'C'
        env['LC_ALL'] = 'C'
        out = subprocess.Popen(cmd, stdout = subprocess.PIPE, env=env).communicate()[0]
        return out
-#def install_headers():
+    try:
-#    dest_dir = path.join(sys.prefix, 'include', 'murmurhash')
+        out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD'])
-#    if not path.exists(dest_dir):
+        GIT_REVISION = out.strip().decode('ascii')
-#        shutil.copytree('murmurhash/headers/murmurhash', dest_dir)
+    except OSError:
-#
+        GIT_REVISION = 'Unknown'
-#    dest_dir = path.join(sys.prefix, 'include', 'numpy')
+
    return GIT_REVISION
-includes = ['.', path.join(sys.prefix, 'include')]
+def get_version_info():
-
+    # Adding the git rev number needs to be done inside write_version_py(),
-
+    # otherwise the import of spacy.about messes up the build under Python 3.
-try:
+    FULLVERSION = VERSION
-    import numpy
+    if os.path.exists('.git'):
-    numpy_headers = path.join(numpy.get_include(), 'numpy')
+        GIT_REVISION = git_version()
-    shutil.copytree(numpy_headers, path.join(sys.prefix, 'include', 'numpy'))
+    elif os.path.exists('spacy/about.py'):
-except ImportError:
+        # must be a source distribution, use existing version file
-    pass
+        try:
-except OSError:
+            from spacy.about import git_revision as GIT_REVISION
-    pass
+        except ImportError:
-
+            raise ImportError('Unable to import git_revision. Try removing '
-
+                              'spacy/about.py and the build directory '
-
+                              'before building.')
 def clean(mod_names):
    for name in mod_names:
        name = name.replace('.', '/')
        so = name + '.so'
        html = name + '.html'
        cpp = name + '.cpp'
        c = name + '.c'
        for file_path in [so, html, cpp, c]:
            if os.path.exists(file_path):
                os.unlink(file_path)
 def name_to_path(mod_name, ext):
    return '%s.%s' % (mod_name.replace('.', '/'), ext)
 def c_ext(mod_name, language, includes):
    mod_path = name_to_path(mod_name, language)
    return Extension(mod_name, [mod_path], include_dirs=includes)
 def cython_setup(mod_names, language, includes):
    import Cython.Distutils
    import Cython.Build
    import distutils.core
    class build_ext_cython_subclass( Cython.Distutils.build_ext, build_ext_options ):
        def build_extensions(self):
            build_ext_options.build_options(self)
            Cython.Distutils.build_ext.build_extensions(self)
    if language == 'cpp':
        language = 'c++'
    exts = []
    for mod_name in mod_names:
        mod_path = mod_name.replace('.', '/') + '.pyx'
        e = Extension(mod_name, [mod_path], language=language, include_dirs=includes)
        exts.append(e)
    distutils.core.setup(
        name='spacy',
        packages=['spacy', 'spacy.tokens', 'spacy.en', 'spacy.serialize',
                  'spacy.syntax', 'spacy.munge'],
        description="Industrial-strength NLP",
        author='Matthew Honnibal',
        author_email='honnibal@gmail.com',
        version=VERSION,
        url="http://spacy.io",
        package_data=PACKAGE_DATA,
        ext_modules=exts,
        cmdclass={'build_ext': build_ext_cython_subclass},
        license="MIT",
    )
 def run_setup(exts):
    setup(
        name='spacy',
        packages=['spacy', 'spacy.tokens', 'spacy.en', 'spacy.serialize',
                  'spacy.syntax', 'spacy.munge',
                  'spacy.tests',
                  'spacy.tests.matcher',
                  'spacy.tests.morphology',
                  'spacy.tests.munge',
                  'spacy.tests.parser',
                  'spacy.tests.serialize',
                  'spacy.tests.spans',
                  'spacy.tests.tagger',
                  'spacy.tests.tokenizer',
                  'spacy.tests.tokens',
                  'spacy.tests.vectors',
                  'spacy.tests.vocab'],
        description="Industrial-strength NLP",
        author='Matthew Honnibal',
        author_email='honnibal@gmail.com',
        version=VERSION,
        url="http://honnibal.github.io/spaCy/",
        package_data=PACKAGE_DATA,
        ext_modules=exts,
        license="MIT",
        install_requires=['numpy', 'murmurhash == 0.24', 'cymem == 1.30', 'preshed == 0.44',
                          'thinc == 4.0.0', "text_unidecode", 'plac', 'six',
                          'ujson', 'cloudpickle', 'sputnik == 0.5.2'],
        setup_requires=["headers_workaround"],
        cmdclass = {'build_ext': build_ext_subclass },
    )
    import headers_workaround
    headers_workaround.fix_venv_pypy_include()
    headers_workaround.install_headers('murmurhash')
    headers_workaround.install_headers('numpy')
 VERSION = '0.100'
 def main(modules, is_pypy):
    language = "cpp"
    includes = ['.', path.join(sys.prefix, 'include')]
    if sys.platform.startswith('darwin'):
        compile_options['other'].append('-mmacosx-version-min=10.8')
        compile_options['other'].append('-stdlib=libc++')
        link_options['other'].append('-lc++')
    if use_cython:
        cython_setup(modules, language, includes)
    else:
-        exts = [c_ext(mn, language, includes)
+        GIT_REVISION = 'Unknown'
                      for mn in modules]
        run_setup(exts)
-MOD_NAMES = ['spacy.parts_of_speech', 'spacy.strings',
+    if not ISRELEASED:
-             'spacy.lexeme', 'spacy.vocab', 'spacy.attrs',
+        FULLVERSION += '.dev0+' + GIT_REVISION[:7]
-             'spacy.morphology', 'spacy.tagger',
+
-             'spacy.syntax.stateclass', 
+    return FULLVERSION, GIT_REVISION
-             'spacy.tokenizer',
+
-             'spacy.syntax.parser', 
+
-             'spacy.syntax.transition_system',
+def write_version_py(filename='spacy/about.py'):
-             'spacy.syntax.arc_eager',
+    cnt = """# THIS FILE IS GENERATED FROM SPACY SETUP.PY
-             'spacy.syntax._parse_features',
+short_version = '%(version)s'
-             'spacy.gold', 'spacy.orth',
+version = '%(version)s'
-             'spacy.tokens.doc', 'spacy.tokens.span', 'spacy.tokens.token',
+full_version = '%(full_version)s'
-             'spacy.serialize.packer', 'spacy.serialize.huffman', 'spacy.serialize.bits',
+git_revision = '%(git_revision)s'
-             'spacy.cfile', 'spacy.matcher',
+release = %(isrelease)s
-             'spacy.syntax.ner',
+if not release:
-             'spacy.symbols']
+    version = full_version
 """
    FULLVERSION, GIT_REVISION = get_version_info()
    with open(filename, 'w') as f:
        f.write(cnt % {'version': VERSION,
                       'full_version' : FULLVERSION,
                       'git_revision' : GIT_REVISION,
                       'isrelease': str(ISRELEASED)})
 def generate_cython():
    cwd = os.path.abspath(os.path.dirname(__file__))
    print('Cythonizing sources')
    p = subprocess.call([sys.executable,
                         os.path.join(cwd, 'bin', 'cythonize.py'),
                         'spacy'],
                         cwd=cwd)
    if p != 0:
        raise RuntimeError('Running cythonize failed')
 def clean():
    for name in MOD_NAMES:
        name = name.replace('.', '/')
        for ext in ['.so', '.html', '.cpp', '.c']:
            if os.path.exists(name + ext):
                os.unlink(name + ext)
 def setup_package():
    src_path = os.path.dirname(os.path.abspath(sys.argv[0]))
    old_path = os.getcwd()
    os.chdir(src_path)
    sys.path.insert(0, src_path)
    # Rewrite the version file everytime
    write_version_py()
    include_dirs = [
        get_python_inc(plat_specific=True),
        os.path.join(src_path, 'include')]
    ext_modules = []
    for mod_name in MOD_NAMES:
        mod_path = mod_name.replace('.', '/') + '.cpp'
        ext_modules.append(
            Extension(mod_name, [mod_path],
                language='c++', include_dirs=include_dirs))
    metadata = dict(
        name='spacy',
        packages=PACKAGES,
        description='Industrial-strength NLP',
        author='Matthew Honnibal',
        author_email='matt@spacy.io',
        version=VERSION,
        url='https://spacy.io',
        license='MIT',
        ext_modules=ext_modules,
        install_requires=['numpy', 'murmurhash == 0.24', 'cymem == 1.30', 'preshed == 0.44',
                          'thinc == 4.0.0', 'text_unidecode', 'plac', 'six',
                          'ujson', 'cloudpickle', 'sputnik == 0.5.2'],
        cmdclass = {
            'build_ext': build_ext_subclass},
    )
    # Run build
    cwd = os.path.abspath(os.path.dirname(__file__))
    if not os.path.exists(os.path.join(cwd, 'PKG-INFO')):
        # Generate Cython sources, unless building from source release
        generate_cython()
        # sync include dirs from native dependencies
        include_dir = os.path.join(src_path, 'include')
        if os.path.exists(include_dir):
            shutil.rmtree(include_dir)
        os.mkdir(include_dir)
        import numpy
        shutil.copytree(
            os.path.join(numpy.get_include(), 'numpy'),
            os.path.join(include_dir, 'numpy'))
        import murmurhash
        shutil.copytree(
            os.path.join(os.path.dirname(murmurhash.__file__), 'headers', 'murmurhash'),
            os.path.join(include_dir, 'murmurhash'))
    try:
        setup(**metadata)
    finally:
        del sys.path[0]
        os.chdir(old_path)
 if __name__ == '__main__':
    if sys.argv[1] == 'clean':
-        clean(MOD_NAMES)
+        clean()
    else:
-        use_cython = sys.argv[1] == 'build_ext'
+        setup_package()
        main(MOD_NAMES, use_cython)
--- a/spacy/_nn.py
+++ b/spacy/_nn.py
@ -1,3 +0,0 @@
 """Feed-forward neural network, using Thenao."""
--- a/spacy/_nn.pyx
+++ b/spacy/_nn.pyx
@ -1,146 +0,0 @@
 """Feed-forward neural network, using Thenao."""
 import os
 import sys
 import time
 import numpy
 import theano
 import theano.tensor as T
 import plac
 from spacy.gold import read_json_file
 from spacy.gold import GoldParse
 from spacy.en.pos import POS_TEMPLATES, POS_TAGS, setup_model_dir
 def build_model(n_classes, n_vocab, n_hidden, n_word_embed, n_tag_embed):
    # allocate symbolic variables for the data
    words = T.vector('words')
    tags = T.vector('tags') 
    word_e = _init_embedding(n_words, n_word_embed)
    tag_e = _init_embedding(n_tags, n_tag_embed)
    label_e = _init_embedding(n_labels, n_label_embed)
    maxent_W, maxent_b = _init_maxent_weights(n_hidden, n_classes)
    hidden_W, hidden_b = _init_hidden_weights(28*28, n_hidden, T.tanh) 
    params = [hidden_W, hidden_b, maxent_W, maxent_b, word_e, tag_e, label_e]
    x = T.concatenate([
          T.flatten(word_e[word_indices], outdim=1),
          T.flatten(tag_e[tag_indices], outdim=1)])
    p_y_given_x = feed_layer(
                    T.nnet.softmax,
                    maxent_W,
                    maxent_b,
                      feed_layer(
                        T.tanh,
                        hidden_W,
                        hidden_b,
                        x))[0]
    guess = T.argmax(p_y_given_x)
    cost = (
        -T.log(p_y_given_x[y])
        + L1(L1_reg, maxent_W, hidden_W, word_e, tag_e)
        + L2(L2_reg, maxent_W, hidden_W, wod_e, tag_e)
    )
    train_model = theano.function(
        inputs=[words, tags, y],
        outputs=guess,
        updates=[update(learning_rate, param, cost) for param in params]
    )
    evaluate_model = theano.function(
        inputs=[x, y],
        outputs=T.neq(y, T.argmax(p_y_given_x[0])),
    )
    return train_model, evaluate_model
 def _init_embedding(vocab_size, n_dim):
    embedding = 0.2 * numpy.random.uniform(-1.0, 1.0, (vocab_size+1, n_dim))
    return theano.shared(embedding).astype(theano.config.floatX)
 def _init_maxent_weights(n_hidden, n_out):
    weights = numpy.zeros((n_hidden, 10), dtype=theano.config.floatX)
    bias =  numpy.zeros((10,), dtype=theano.config.floatX)
    return (
        theano.shared(name='W', borrow=True, value=weights),
        theano.shared(name='b', borrow=True, value=bias)
    )
 def _init_hidden_weights(n_in, n_out, activation=T.tanh):
    rng = numpy.random.RandomState(1234)
    weights = numpy.asarray(
        rng.uniform(
            low=-numpy.sqrt(6. / (n_in + n_out)),
            high=numpy.sqrt(6. / (n_in + n_out)),
            size=(n_in, n_out)
        ),
        dtype=theano.config.floatX
    )
    bias = numpy.zeros((n_out,), dtype=theano.config.floatX)
    return (
        theano.shared(value=weights, name='W', borrow=True),
        theano.shared(value=bias, name='b', borrow=True)
    )
 def feed_layer(activation, weights, bias, input):
    return activation(T.dot(input, weights) + bias)
 def L1(L1_reg, w1, w2):
    return L1_reg * (abs(w1).sum() + abs(w2).sum())
 def L2(L2_reg, w1, w2):
    return L2_reg * ((w1 ** 2).sum() + (w2 ** 2).sum())
 def update(eta, param, cost):
    return (param, param - (eta * T.grad(cost, param)))
 def main(train_loc, eval_loc, model_dir):
    learning_rate = 0.01
    L1_reg = 0.00
    L2_reg = 0.0001
    print "... reading the data"
    gold_train = list(read_json_file(train_loc))
    print '... building the model'
    pos_model_dir = path.join(model_dir, 'pos')
    if path.exists(pos_model_dir):
        shutil.rmtree(pos_model_dir)
    os.mkdir(pos_model_dir)
    setup_model_dir(sorted(POS_TAGS.keys()), POS_TAGS, POS_TEMPLATES, pos_model_dir)
    train_model, evaluate_model = build_model(n_hidden, len(POS_TAGS), learning_rate,
                                              L1_reg, L2_reg)
    print '... training'
    for epoch in range(1, n_epochs+1):
        for raw_text, sents in gold_tuples:
            for (ids, words, tags, ner, heads, deps), _ in sents:
                tokens = nlp.tokenizer.tokens_from_list(words)
                for t in tokens:
                    guess = train_model([t.orth], [t.tag])
                    loss += guess != t.tag
        print loss
        # compute zero-one loss on validation set
        #error = numpy.mean([evaluate_model(x, y) for x, y in dev_examples])
        #print('epoch %i, validation error %f %%' % (epoch, error * 100))
 if __name__ == '__main__':
    plac.call(main)
--- a/spacy/_theano.pxd
+++ b/spacy/_theano.pxd
@ -1,13 +0,0 @@
 from ._ml cimport Model
 from thinc.nn cimport InputLayer
 cdef class TheanoModel(Model):
    cdef InputLayer input_layer
    cdef object train_func
    cdef object predict_func
    cdef object debug
    cdef public float eta
    cdef public float mu
    cdef public float t
--- a/spacy/_theano.pyx
+++ b/spacy/_theano.pyx
@ -1,52 +0,0 @@
 from thinc.api cimport Example, ExampleC
 from thinc.typedefs cimport weight_t
 from ._ml cimport arg_max_if_true
 from ._ml cimport arg_max_if_zero
 import numpy
 from os import path
 cdef class TheanoModel(Model):
    def __init__(self, n_classes, input_spec, train_func, predict_func, model_loc=None,
                 eta=0.001, mu=0.9, debug=None):
        if model_loc is not None and path.isdir(model_loc):
            model_loc = path.join(model_loc, 'model')
        self.eta = eta
        self.mu = mu
        self.t = 1
        initializer = lambda: 0.2 * numpy.random.uniform(-1.0, 1.0)
        self.input_layer = InputLayer(input_spec, initializer)
        self.train_func = train_func
        self.predict_func = predict_func
        self.debug = debug
        self.n_classes = n_classes
        self.n_feats = len(self.input_layer)
        self.model_loc = model_loc
    def predict(self, Example eg):
        self.input_layer.fill(eg.embeddings, eg.atoms, use_avg=True)
        theano_scores = self.predict_func(eg.embeddings)[0]
        cdef int i
        for i in range(self.n_classes):
            eg.c.scores[i] = theano_scores[i]
        eg.c.guess = arg_max_if_true(eg.c.scores, eg.c.is_valid, self.n_classes)
    def train(self, Example eg):
        self.input_layer.fill(eg.embeddings, eg.atoms, use_avg=False)
        theano_scores, update, y, loss = self.train_func(eg.embeddings, eg.costs,
                                                         self.eta, self.mu)
        self.input_layer.update(update, eg.atoms, self.t, self.eta, self.mu)
        for i in range(self.n_classes):
            eg.c.scores[i] = theano_scores[i]
        eg.c.guess = arg_max_if_true(eg.c.scores, eg.c.is_valid, self.n_classes)
        eg.c.best = arg_max_if_zero(eg.c.scores, eg.c.costs, self.n_classes)
        eg.c.cost = eg.c.costs[eg.c.guess]
        eg.c.loss = loss
        self.t += 1
    def end_training(self):
        pass
--- a/spacy/en/download.py
+++ b/spacy/en/download.py
@ -30,8 +30,12 @@ def main(data_size='all', force=False):
    path = os.path.dirname(os.path.abspath(__file__))
    data_path = os.path.abspath(os.path.join(path, '..', 'data'))
    if not os.path.isdir(data_path):
        os.mkdir(data_path)
    command = sputnik.make_command(
-        data_path=os.path.abspath(os.path.join(path, '..', 'data')),
+        data_path=data_path,
        repository_url='https://index.spacy.io')
    if force:
--- a/spacy/senses.pxd
+++ b/spacy/senses.pxd
@ -1,62 +0,0 @@
 # Enum of Wordnet supersenses
 cimport parts_of_speech
 from .typedefs cimport flags_t
 cpdef enum:
    A_behavior
    A_body
    A_feeling
    A_mind
    A_motion
    A_perception
    A_quantity
    A_relation
    A_social
    A_spatial
    A_substance
    A_time
    A_weather
    N_act
    N_animal
    N_artifact
    N_attribute
    N_body
    N_cognition
    N_communication
    N_event
    N_feeling
    N_food
    N_group
    N_location
    N_motive
    N_object
    N_person
    N_phenomenon
    N_plant
    N_possession
    N_process
    N_quantity
    N_relation
    N_shape
    N_state
    N_substance
    N_time
    V_body
    V_change
    V_cognition
    V_communication
    V_competition
    V_consumption
    V_contact
    V_creation
    V_emotion
    V_motion
    V_perception
    V_possession
    V_social
    V_stative
    V_weather
 cdef flags_t[<int>parts_of_speech.N_UNIV_TAGS] POS_SENSES
--- a/spacy/senses.pyx
+++ b/spacy/senses.pyx
@ -1,88 +0,0 @@
 from __future__ import unicode_literals
 cimport parts_of_speech
 POS_SENSES[<int>parts_of_speech.NO_TAG] = 0
 POS_SENSES[<int>parts_of_speech.ADJ] = 0
 POS_SENSES[<int>parts_of_speech.ADV] = 0
 POS_SENSES[<int>parts_of_speech.ADP] = 0
 POS_SENSES[<int>parts_of_speech.CONJ] = 0
 POS_SENSES[<int>parts_of_speech.DET] = 0
 POS_SENSES[<int>parts_of_speech.NOUN] = 0
 POS_SENSES[<int>parts_of_speech.NUM] = 0
 POS_SENSES[<int>parts_of_speech.PRON] = 0
 POS_SENSES[<int>parts_of_speech.PRT] = 0
 POS_SENSES[<int>parts_of_speech.VERB] = 0
 POS_SENSES[<int>parts_of_speech.X] = 0
 POS_SENSES[<int>parts_of_speech.PUNCT] = 0
 POS_SENSES[<int>parts_of_speech.EOL] = 0
 cdef int _sense = 0
 for _sense in range(A_behavior, N_act):
    POS_SENSES[<int>parts_of_speech.ADJ] |= 1 << _sense
 for _sense in range(N_act, V_body):
    POS_SENSES[<int>parts_of_speech.NOUN] |= 1 << _sense
 for _sense in range(V_body, V_weather+1):
    POS_SENSES[<int>parts_of_speech.VERB] |= 1 << _sense
 STRINGS = (
    'A_behavior',
    'A_body',
    'A_feeling',
    'A_mind',
    'A_motion',
    'A_perception',
    'A_quantity',
    'A_relation',
    'A_social',
    'A_spatial',
    'A_substance',
    'A_time',
    'A_weather',
    'N_act',
    'N_animal',
    'N_artifact',
    'N_attribute',
    'N_body',
    'N_cognition',
    'N_communication',
    'N_event',
    'N_feeling',
    'N_food',
    'N_group',
    'N_location',
    'N_motive',
    'N_object',
    'N_person',
    'N_phenomenon',
    'N_plant',
    'N_possession',
    'N_process',
    'N_quantity',
    'N_relation',
    'N_shape',
    'N_state',
    'N_substance',
    'N_time',
    'V_body',
    'V_change',
    'V_cognition',
    'V_communication',
    'V_competition',
    'V_consumption',
    'V_contact',
    'V_creation',
    'V_emotion',
    'V_motion',
    'V_perception',
    'V_possession',
    'V_social',
    'V_stative',
    'V_weather'
 )
--- a/spacy/tests/de/test_de.py
+++ b/spacy/tests/de/test_de.py
@ -1,12 +0,0 @@
 # encoding: utf8
 from __future__ import unicode_literals
 import spacy.de
 #def test_tokenizer():
 #    lang = spacy.de.German()
 #
 #    doc = lang(u'Biografie: Ein Spiel ist ein Theaterstück des Schweizer Schriftstellers Max Frisch, das 1967 entstand und am 1. Februar 1968 im Schauspielhaus Zürich uraufgeführt wurde. 1984 legte Frisch eine überarbeitete Neufassung vor. Das von Frisch als Komödie bezeichnete Stück greift eines seiner zentralen Themen auf: die Möglichkeit oder Unmöglichkeit des Menschen, seine Identität zu verändern.')
 #    for token in doc:
 #        print(repr(token.string))
--- a/tox.ini
+++ b/tox.ini
@ -0,0 +1,13 @@
 [tox]
 envlist =
    py27
    py34
 recreate = True
 [testenv]
 changedir = {envtmpdir}
 deps =
    pytest
 commands =
    python -m spacy.en.download
    python -m pytest {toxinidir}/spacy/ --models --vectors --slow
		`@ -1,3 +0,0 @@`
			`"""Feed-forward neural network, using Thenao."""`