From 27f988b1671854e2c08b6c6a5653eeb2f38bc3d6 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 15 Sep 2015 14:41:48 +1000 Subject: [PATCH] * Remove the vectors option to Vocab, preferring to either load vectors from disk, or set them on the Lexeme objects. --- spacy/language.py | 13 +++---------- spacy/vocab.pyx | 7 +++---- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/spacy/language.py b/spacy/language.py index c3a938458..ecce54e7a 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -137,21 +137,14 @@ class Language(object): return path.join(path.dirname(__file__), 'data') @classmethod - def default_vectors(cls, data_dir): - return None - - @classmethod - def default_vocab(cls, data_dir=None, get_lex_attr=None, vectors=None): + def default_vocab(cls, data_dir=None, get_lex_attr=None): if data_dir is None: data_dir = cls.default_data_dir() - if vectors is None: - vectors = cls.default_vectors(data_dir) if get_lex_attr is None: get_lex_attr = cls.default_lex_attrs(data_dir) return Vocab.from_dir( path.join(data_dir, 'vocab'), - get_lex_attr=get_lex_attr, - vectors=vectors) + get_lex_attr=get_lex_attr) @classmethod def default_tokenizer(cls, vocab, data_dir): @@ -214,7 +207,7 @@ class Language(object): self.entity = entity self.matcher = matcher - def __call__(self, text, tag=True, parse=True, entity=True, merge_mwes=False): + def __call__(self, text, tag=True, parse=True, entity=True): """Apply the pipeline to some text. The text can span multiple sentences, and can contain arbtrary whitespace. Alignment into the original string is preserved. diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx index 7a07bde7a..210fa8426 100644 --- a/spacy/vocab.pyx +++ b/spacy/vocab.pyx @@ -56,11 +56,10 @@ cdef class Vocab: self.load_lexemes(path.join(data_dir, 'strings.txt'), path.join(data_dir, 'lexemes.bin')) if vectors is None and path.exists(path.join(data_dir, 'vec.bin')): - self.repvec_length = self.load_rep_vectors(path.join(data_dir, 'vec.bin')) + self.vectors_length = self.load_vectors(path.join(data_dir, 'vec.bin')) return self - def __init__(self, get_lex_attr=None, tag_map=None, vectors=None, lemmatizer=None, - serializer_freqs=None): + def __init__(self, get_lex_attr=None, tag_map=None, lemmatizer=None, serializer_freqs=None): if tag_map is None: tag_map = {} if lemmatizer is None: @@ -262,7 +261,7 @@ cdef class Vocab: i += 1 fp.close() - def load_rep_vectors(self, loc): + def load_vectors(self, loc): cdef CFile file_ = CFile(loc, b'rb') cdef int32_t word_len cdef int32_t vec_len