From 2a89d704291c6cb26c0df7ae30641c7cab8ce11c Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 21 Dec 2014 06:03:53 +1100 Subject: [PATCH] * Add vocab.pyx to setup, and ensure we can import spacy.en.lang --- setup.py | 39 +++++++++++++++++++++------------------ spacy/morphology.pyx | 1 - spacy/vocab.pyx | 6 +++--- 3 files changed, 24 insertions(+), 22 deletions(-) diff --git a/setup.py b/setup.py index f8d9ad120..b1897851c 100644 --- a/setup.py +++ b/setup.py @@ -46,26 +46,29 @@ else: # If you're not using virtualenv, set your include dir here. pass +ext_args = {'language': "c++", "include_dirs": includes} exts = [ - Extension("spacy.lang", ["spacy/lang.pyx"], language="c++", include_dirs=includes), - Extension("spacy.lexeme", ["spacy/lexeme.pyx"], language="c++", include_dirs=includes), - Extension("spacy.en", ["spacy/en.pyx"], language="c++", include_dirs=includes), - Extension("spacy.tokens", ["spacy/tokens.pyx"], language="c++", include_dirs=includes), - Extension("spacy.utf8string", ["spacy/utf8string.pyx"], language="c++", include_dirs=includes), - Extension("spacy.index", ["spacy/index.pyx"], language="c++", include_dirs=includes), - Extension("spacy.tagger", ["spacy/tagger.pyx"], language="c++", include_dirs=includes), - Extension("spacy.morphology", ["spacy/morphology.pyx"], language="c++", - include_dirs=includes), - - Extension("spacy.syntax.parser", ["spacy/syntax/parser.pyx"], language="c++", - include_dirs=includes), - Extension("spacy.syntax.arc_eager", ["spacy/syntax/arc_eager.pyx"], language="c++", - include_dirs=includes), - Extension("spacy.syntax._state", ["spacy/syntax/_state.pyx"], language="c++", - include_dirs=includes), - Extension("spacy.syntax._parse_features", ["spacy/syntax/_parse_features.pyx"], language="c++", - include_dirs=includes), + Extension("spacy.strings", ["spacy/strings.pyx"], **ext_args), + Extension("spacy.lexeme", ["spacy/lexeme.pyx"], **ext_args), + Extension("spacy.vocab", ["spacy/vocab.pyx"], **ext_args), + Extension("spacy.tokens", ["spacy/tokens.pyx"], **ext_args), + Extension("spacy.morphology", ["spacy/morphology.pyx"], **ext_args), + Extension("spacy.tagger", ["spacy/tagger.pyx"], **ext_args), + Extension("spacy.tokenizer", ["spacy/tokenizer.pyx"], **ext_args), + Extension("spacy.en.lang", ["spacy/en/lang.pyx"], **ext_args), + Extension("spacy.en.pos", ["spacy/en/pos.pyx"], **ext_args), + Extension("spacy.syntax._state", ["spacy/syntax/_state.pyx"], **ext_args), + Extension("spacy.syntax.arc_eager", ["spacy/syntax/arc_eager.pyx"], **ext_args), + #Extension("spacy.syntax.parser", ["spacy/syntax/parser.pyx"], **ext_args), + #Extension("spacy.syntax.parser", ["spacy/syntax/parser.pyx"], language="c++", + # include_dirs=includes), + #Extension("spacy.syntax.arc_eager", ["spacy/syntax/arc_eager.pyx"], language="c++", + # include_dirs=includes), + #Extension("spacy.syntax._state", ["spacy/syntax/_state.pyx"], language="c++", + # include_dirs=includes), + # Extension("spacy.syntax._parse_features", ["spacy/syntax/_parse_features.pyx"], language="c++", + # include_dirs=includes), #Extension("spacy.pos_feats", ["spacy/pos_feats.pyx"], language="c++", include_dirs=includes), #Extension("spacy.ner._state", ["spacy/ner/_state.pyx"], language="c++", include_dirs=includes), diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx index 5401de3ad..da4485960 100644 --- a/spacy/morphology.pyx +++ b/spacy/morphology.pyx @@ -3,7 +3,6 @@ from os import path import json -from .lemmatizer import Lemmatizer from .typedefs cimport id_t, univ_tag_t from .typedefs cimport NO_TAG, ADJ, ADV, ADP, CONJ, DET, NOUN, NUM, PRON, PRT from .typedefs cimport VERB, X, PUNCT, EOL diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx index abcee19b8..3ab1005f6 100644 --- a/spacy/vocab.pyx +++ b/spacy/vocab.pyx @@ -12,12 +12,12 @@ cdef class Vocab: Also interns UTF-8 strings, and maps them to consecutive integer IDs. ''' - def __init__(self, object get_props): + def __init__(self, object get_lex_props): self.mem = Pool() self._map = PreshMap(2 ** 20) self.strings = StringStore() self.lexemes.push_back(&EMPTY_LEXEME) - self.get_lex_props = get_props + self.get_lex_props = get_lex_props @classmethod def from_dir(cls, object data_dir, object get_lex_props=None): @@ -25,7 +25,7 @@ cdef class Vocab: raise IOError("Directory %s not found -- cannot load Vocab." % data_dir) if not path.isdir(data_dir): raise IOError("Path %s is a file, not a dir -- cannot load Vocab." % data_dir) - cdef Vocab self = cls(get_props) + cdef Vocab self = cls(get_lex_props) self.strings.load(path.join(data_dir, 'strings')) self.load(path.join(data_dir, 'lexemes')) return self