diff --git a/spacy/vectors.pyx b/spacy/vectors.pyx index 5512279ae..cea583110 100644 --- a/spacy/vectors.pyx +++ b/spacy/vectors.pyx @@ -32,22 +32,20 @@ cdef class Vectors: cdef public object keys cdef public int i - def __init__(self, strings, data_or_width=0): + def __init__(self, strings, data=None, width=0): if isinstance(strings, StringStore): self.strings = strings else: self.strings = StringStore() for string in strings: self.strings.add(string) - if isinstance(data_or_width, int): - self.data = data = numpy.zeros((len(strings), data_or_width), - dtype='f') + if data is not None: + self.data = numpy.asarray(data, dtype='f') else: - data = data_or_width + self.data = numpy.zeros((len(self.strings), width), dtype='f') self.i = 0 - self.data = data self.key2row = {} - self.keys = np.ndarray((self.data.shape[0],), dtype='uint64') + self.keys = numpy.zeros((self.data.shape[0],), dtype='uint64') def __reduce__(self): return (Vectors, (self.strings, self.data)) diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx index 205e5a2af..e6ba9944b 100644 --- a/spacy/vocab.pyx +++ b/spacy/vocab.pyx @@ -62,12 +62,10 @@ cdef class Vocab: if strings: for string in strings: _ = self[string] - for name in tag_map.keys(): - if name: - self.strings.add(name) self.lex_attr_getters = lex_attr_getters + print("Create morphology", list(self.strings), tag_map) self.morphology = Morphology(self.strings, tag_map, lemmatizer) - self.vectors = Vectors(self.strings) + self.vectors = Vectors(self.strings, width=0) property lang: def __get__(self): @@ -338,7 +336,7 @@ cdef class Vocab: if self.vectors is None: return None else: - return self.vectors.to_bytes(exclude='strings.json') + return self.vectors.to_bytes() getters = OrderedDict(( ('strings', lambda: self.strings.to_bytes()), @@ -358,7 +356,7 @@ cdef class Vocab: if self.vectors is None: return None else: - return self.vectors.from_bytes(b, exclude='strings') + return self.vectors.from_bytes(b) setters = OrderedDict(( ('strings', lambda b: self.strings.from_bytes(b)), ('lexemes', lambda b: self.lexemes_from_bytes(b)),