From daac8ebacd87a5008ad4b905efd9795ffc288252 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Thu, 27 Aug 2020 16:44:36 +0200 Subject: [PATCH] Don't interpolate config on Language deserialization --- spacy/language.py | 8 ++++++-- spacy/tests/serialize/test_serialize_config.py | 14 ++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/spacy/language.py b/spacy/language.py index bac90af85..90d2cf81a 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -1542,7 +1542,9 @@ class Language: path = util.ensure_path(path) deserializers = {} if Path(path / "config.cfg").exists(): - deserializers["config.cfg"] = lambda p: self.config.from_disk(p) + deserializers["config.cfg"] = lambda p: self.config.from_disk( + p, interpolate=False + ) deserializers["meta.json"] = deserialize_meta deserializers["vocab"] = deserialize_vocab deserializers["tokenizer"] = lambda p: self.tokenizer.from_disk( @@ -1605,7 +1607,9 @@ class Language: self.vocab.vectors.name = data.get("vectors", {}).get("name") deserializers = {} - deserializers["config.cfg"] = lambda b: self.config.from_bytes(b) + deserializers["config.cfg"] = lambda b: self.config.from_bytes( + b, interpolate=False + ) deserializers["meta.json"] = deserialize_meta deserializers["vocab"] = self.vocab.from_bytes deserializers["tokenizer"] = lambda b: self.tokenizer.from_bytes( diff --git a/spacy/tests/serialize/test_serialize_config.py b/spacy/tests/serialize/test_serialize_config.py index 9eae7c775..e425d370d 100644 --- a/spacy/tests/serialize/test_serialize_config.py +++ b/spacy/tests/serialize/test_serialize_config.py @@ -209,6 +209,20 @@ def test_config_nlp_roundtrip(): assert new_nlp._factory_meta == nlp._factory_meta +def test_config_nlp_roundtrip_bytes_disk(): + """Test that the config is serialized correctly and not interpolated + by mistake.""" + nlp = English() + nlp_bytes = nlp.to_bytes() + new_nlp = English().from_bytes(nlp_bytes) + assert new_nlp.config == nlp.config + nlp = English() + with make_tempdir() as d: + nlp.to_disk(d) + new_nlp = spacy.load(d) + assert new_nlp.config == nlp.config + + def test_serialize_config_language_specific(): """Test that config serialization works as expected with language-specific factories."""