diff --git a/spacy/tests/regression/test_issue5230.py b/spacy/tests/regression/test_issue5230.py new file mode 100644 index 000000000..e3d7c7e82 --- /dev/null +++ b/spacy/tests/regression/test_issue5230.py @@ -0,0 +1,112 @@ +import warnings + +import numpy +import pytest +import srsly + +from spacy.kb import KnowledgeBase +from spacy.vectors import Vectors +from spacy.language import Language +from spacy.pipeline import Pipe +from spacy.tests.util import make_tempdir + + +@pytest.mark.xfail +def test_language_to_disk_resource_warning(): + nlp = Language() + with make_tempdir() as d: + with warnings.catch_warnings(record=True) as w: + # catch only warnings raised in spacy.language since there may be others from other components or pipelines + warnings.filterwarnings( + "always", module="spacy.language", category=ResourceWarning + ) + nlp.to_disk(d) + assert len(w) == 0 + + +@pytest.mark.xfail +def test_vectors_to_disk_resource_warning(): + data = numpy.zeros((3, 300), dtype="f") + keys = ["cat", "dog", "rat"] + vectors = Vectors(data=data, keys=keys) + with make_tempdir() as d: + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings("always", category=ResourceWarning) + vectors.to_disk(d) + assert len(w) == 0 + + +@pytest.mark.xfail +def test_custom_pipes_to_disk_resource_warning(): + # create dummy pipe partially implementing interface -- only want to test to_disk + class SerializableDummy(object): + def __init__(self, **cfg): + if cfg: + self.cfg = cfg + else: + self.cfg = None + super(SerializableDummy, self).__init__() + + def to_bytes(self, exclude=tuple(), disable=None, **kwargs): + return srsly.msgpack_dumps({"dummy": srsly.json_dumps(None)}) + + def from_bytes(self, bytes_data, exclude): + return self + + def to_disk(self, path, exclude=tuple(), **kwargs): + pass + + def from_disk(self, path, exclude=tuple(), **kwargs): + return self + + class MyPipe(Pipe): + def __init__(self, vocab, model=True, **cfg): + if cfg: + self.cfg = cfg + else: + self.cfg = None + self.model = SerializableDummy() + self.vocab = SerializableDummy() + + pipe = MyPipe(None) + with make_tempdir() as d: + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings("always", category=ResourceWarning) + pipe.to_disk(d) + assert len(w) == 0 + + +@pytest.mark.xfail +def test_tagger_to_disk_resource_warning(): + nlp = Language() + nlp.add_pipe(nlp.create_pipe("tagger")) + tagger = nlp.get_pipe("tagger") + # need to add model for two reasons: + # 1. no model leads to error in serialization, + # 2. the affected line is the one for model serialization + tagger.begin_training(pipeline=nlp.pipeline) + + with make_tempdir() as d: + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings("always", category=ResourceWarning) + tagger.to_disk(d) + assert len(w) == 0 + + +@pytest.mark.xfail +def test_entity_linker_to_disk_resource_warning(): + nlp = Language() + nlp.add_pipe(nlp.create_pipe("entity_linker")) + entity_linker = nlp.get_pipe("entity_linker") + # need to add model for two reasons: + # 1. no model leads to error in serialization, + # 2. the affected line is the one for model serialization + kb = KnowledgeBase(nlp.vocab, entity_vector_length=1) + entity_linker.set_kb(kb) + entity_linker.begin_training(pipeline=nlp.pipeline) + + with make_tempdir() as d: + with warnings.catch_warnings(record=True) as w: + warnings.filterwarnings("always", category=ResourceWarning) + entity_linker.to_disk(d) + assert len(w) == 0