spaCy/spacy/tests/regression/test_issue5230.py

112 lines
3.6 KiB
Python
Raw Normal View History

import warnings
import numpy
import pytest
import srsly
from spacy.kb import KnowledgeBase
from spacy.vectors import Vectors
from spacy.language import Language
from spacy.pipeline import Pipe
from spacy.tests.util import make_tempdir
def test_language_to_disk_resource_warning():
nlp = Language()
with make_tempdir() as d:
with warnings.catch_warnings(record=True) as w:
# catch only warnings raised in spacy.language since there may be others from other components or pipelines
warnings.filterwarnings(
"always", module="spacy.language", category=ResourceWarning
)
nlp.to_disk(d)
assert len(w) == 0
@pytest.mark.xfail
def test_vectors_to_disk_resource_warning():
data = numpy.zeros((3, 300), dtype="f")
keys = ["cat", "dog", "rat"]
vectors = Vectors(data=data, keys=keys)
with make_tempdir() as d:
with warnings.catch_warnings(record=True) as w:
warnings.filterwarnings("always", category=ResourceWarning)
vectors.to_disk(d)
assert len(w) == 0
@pytest.mark.xfail
def test_custom_pipes_to_disk_resource_warning():
# create dummy pipe partially implementing interface -- only want to test to_disk
class SerializableDummy(object):
def __init__(self, **cfg):
if cfg:
self.cfg = cfg
else:
self.cfg = None
super(SerializableDummy, self).__init__()
def to_bytes(self, exclude=tuple(), disable=None, **kwargs):
return srsly.msgpack_dumps({"dummy": srsly.json_dumps(None)})
def from_bytes(self, bytes_data, exclude):
return self
def to_disk(self, path, exclude=tuple(), **kwargs):
pass
def from_disk(self, path, exclude=tuple(), **kwargs):
return self
class MyPipe(Pipe):
def __init__(self, vocab, model=True, **cfg):
if cfg:
self.cfg = cfg
else:
self.cfg = None
self.model = SerializableDummy()
self.vocab = SerializableDummy()
pipe = MyPipe(None)
with make_tempdir() as d:
with warnings.catch_warnings(record=True) as w:
warnings.filterwarnings("always", category=ResourceWarning)
pipe.to_disk(d)
assert len(w) == 0
@pytest.mark.xfail
def test_tagger_to_disk_resource_warning():
nlp = Language()
nlp.add_pipe(nlp.create_pipe("tagger"))
tagger = nlp.get_pipe("tagger")
# need to add model for two reasons:
# 1. no model leads to error in serialization,
# 2. the affected line is the one for model serialization
tagger.begin_training(pipeline=nlp.pipeline)
with make_tempdir() as d:
with warnings.catch_warnings(record=True) as w:
warnings.filterwarnings("always", category=ResourceWarning)
tagger.to_disk(d)
assert len(w) == 0
@pytest.mark.xfail
def test_entity_linker_to_disk_resource_warning():
nlp = Language()
nlp.add_pipe(nlp.create_pipe("entity_linker"))
entity_linker = nlp.get_pipe("entity_linker")
# need to add model for two reasons:
# 1. no model leads to error in serialization,
# 2. the affected line is the one for model serialization
kb = KnowledgeBase(nlp.vocab, entity_vector_length=1)
entity_linker.set_kb(kb)
entity_linker.begin_training(pipeline=nlp.pipeline)
with make_tempdir() as d:
with warnings.catch_warnings(record=True) as w:
warnings.filterwarnings("always", category=ResourceWarning)
entity_linker.to_disk(d)
assert len(w) == 0