diff --git a/spacy/pipeline/pipes.pyx b/spacy/pipeline/pipes.pyx index a20c9b6df..ce95b2752 100644 --- a/spacy/pipeline/pipes.pyx +++ b/spacy/pipeline/pipes.pyx @@ -202,7 +202,7 @@ class Pipe(object): serialize["cfg"] = lambda p: srsly.write_json(p, self.cfg) serialize["vocab"] = lambda p: self.vocab.to_disk(p) if self.model not in (None, True, False): - serialize["model"] = lambda p: p.open("wb").write(self.model.to_bytes()) + serialize["model"] = self.model.to_disk exclude = util.get_serialization_exclude(serialize, exclude, kwargs) util.to_disk(path, serialize, exclude) @@ -625,7 +625,7 @@ class Tagger(Pipe): serialize = OrderedDict(( ("vocab", lambda p: self.vocab.to_disk(p)), ("tag_map", lambda p: srsly.write_msgpack(p, tag_map)), - ("model", lambda p: p.open("wb").write(self.model.to_bytes())), + ("model", self.model.to_disk), ("cfg", lambda p: srsly.write_json(p, self.cfg)) )) exclude = util.get_serialization_exclude(serialize, exclude, kwargs) @@ -1394,7 +1394,7 @@ class EntityLinker(Pipe): serialize["vocab"] = lambda p: self.vocab.to_disk(p) serialize["kb"] = lambda p: self.kb.dump(p) if self.model not in (None, True, False): - serialize["model"] = lambda p: p.open("wb").write(self.model.to_bytes()) + serialize["model"] = self.model.to_disk exclude = util.get_serialization_exclude(serialize, exclude, kwargs) util.to_disk(path, serialize, exclude) diff --git a/spacy/tests/regression/test_issue5230.py b/spacy/tests/regression/test_issue5230.py index 9cfa3fc05..716a4624b 100644 --- a/spacy/tests/regression/test_issue5230.py +++ b/spacy/tests/regression/test_issue5230.py @@ -24,7 +24,6 @@ def test_language_to_disk_resource_warning(): assert len(w) == 0 -@pytest.mark.xfail def test_vectors_to_disk_resource_warning(): data = numpy.zeros((3, 300), dtype="f") keys = ["cat", "dog", "rat"] @@ -36,7 +35,6 @@ def test_vectors_to_disk_resource_warning(): assert len(w) == 0 -@pytest.mark.xfail def test_custom_pipes_to_disk_resource_warning(): # create dummy pipe partially implementing interface -- only want to test to_disk class SerializableDummy(object): @@ -76,7 +74,6 @@ def test_custom_pipes_to_disk_resource_warning(): assert len(w) == 0 -@pytest.mark.xfail def test_tagger_to_disk_resource_warning(): nlp = Language() nlp.add_pipe(nlp.create_pipe("tagger")) @@ -93,7 +90,6 @@ def test_tagger_to_disk_resource_warning(): assert len(w) == 0 -@pytest.mark.xfail def test_entity_linker_to_disk_resource_warning(): nlp = Language() nlp.add_pipe(nlp.create_pipe("entity_linker")) diff --git a/spacy/vectors.pyx b/spacy/vectors.pyx index f3c20fb7f..62d176c6c 100644 --- a/spacy/vectors.pyx +++ b/spacy/vectors.pyx @@ -376,8 +376,16 @@ cdef class Vectors: save_array = lambda arr, file_: xp.save(file_, arr, allow_pickle=False) else: save_array = lambda arr, file_: xp.save(file_, arr) + + def save_vectors(path): + # the source of numpy.save indicates that the file object is closed after use. + # but it seems that somehow this does not happen, as ResourceWarnings are raised here. + # in order to not rely on this, wrap in context manager. + with path.open("wb") as _file: + save_array(self.data, _file) + serializers = OrderedDict(( - ("vectors", lambda p: save_array(self.data, p.open("wb"))), + ("vectors", save_vectors), ("key2row", lambda p: srsly.write_msgpack(p, self.key2row)) )) return util.to_disk(path, serializers, [])