mirror of https://github.com/explosion/spaCy.git
Fix initialization of vectors, to address serialization problem
This commit is contained in:
parent
61bc203f3f
commit
92ac9316b5
|
@ -32,22 +32,20 @@ cdef class Vectors:
|
|||
cdef public object keys
|
||||
cdef public int i
|
||||
|
||||
def __init__(self, strings, data_or_width=0):
|
||||
def __init__(self, strings, data=None, width=0):
|
||||
if isinstance(strings, StringStore):
|
||||
self.strings = strings
|
||||
else:
|
||||
self.strings = StringStore()
|
||||
for string in strings:
|
||||
self.strings.add(string)
|
||||
if isinstance(data_or_width, int):
|
||||
self.data = data = numpy.zeros((len(strings), data_or_width),
|
||||
dtype='f')
|
||||
if data is not None:
|
||||
self.data = numpy.asarray(data, dtype='f')
|
||||
else:
|
||||
data = data_or_width
|
||||
self.data = numpy.zeros((len(self.strings), width), dtype='f')
|
||||
self.i = 0
|
||||
self.data = data
|
||||
self.key2row = {}
|
||||
self.keys = np.ndarray((self.data.shape[0],), dtype='uint64')
|
||||
self.keys = numpy.zeros((self.data.shape[0],), dtype='uint64')
|
||||
|
||||
def __reduce__(self):
|
||||
return (Vectors, (self.strings, self.data))
|
||||
|
|
|
@ -62,12 +62,10 @@ cdef class Vocab:
|
|||
if strings:
|
||||
for string in strings:
|
||||
_ = self[string]
|
||||
for name in tag_map.keys():
|
||||
if name:
|
||||
self.strings.add(name)
|
||||
self.lex_attr_getters = lex_attr_getters
|
||||
print("Create morphology", list(self.strings), tag_map)
|
||||
self.morphology = Morphology(self.strings, tag_map, lemmatizer)
|
||||
self.vectors = Vectors(self.strings)
|
||||
self.vectors = Vectors(self.strings, width=0)
|
||||
|
||||
property lang:
|
||||
def __get__(self):
|
||||
|
@ -338,7 +336,7 @@ cdef class Vocab:
|
|||
if self.vectors is None:
|
||||
return None
|
||||
else:
|
||||
return self.vectors.to_bytes(exclude='strings.json')
|
||||
return self.vectors.to_bytes()
|
||||
|
||||
getters = OrderedDict((
|
||||
('strings', lambda: self.strings.to_bytes()),
|
||||
|
@ -358,7 +356,7 @@ cdef class Vocab:
|
|||
if self.vectors is None:
|
||||
return None
|
||||
else:
|
||||
return self.vectors.from_bytes(b, exclude='strings')
|
||||
return self.vectors.from_bytes(b)
|
||||
setters = OrderedDict((
|
||||
('strings', lambda b: self.strings.from_bytes(b)),
|
||||
('lexemes', lambda b: self.lexemes_from_bytes(b)),
|
||||
|
|
Loading…
Reference in New Issue