From 253ba5ef14a42b42684628204ed358981de104db Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Tue, 15 Sep 2020 13:25:34 +0200 Subject: [PATCH] Raise for bad Vocab values --- spacy/errors.py | 3 +++ spacy/language.py | 2 ++ spacy/tests/test_language.py | 11 +++++++++++ 3 files changed, 16 insertions(+) diff --git a/spacy/errors.py b/spacy/errors.py index f857bea52..84593bede 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -480,6 +480,9 @@ class Errors: E201 = ("Span index out of range.") # TODO: fix numbering after merging develop into master + E918 = ("Received invalid value for vocab: {vocab} ({vocab_type}). Valid " + "values are an instance of spacy.vocab.Vocab or True to create one" + " (default).") E919 = ("A textcat 'positive_label' '{pos_label}' was provided for training " "data that does not appear to be a binary classification problem " "with two labels. Labels found: {labels}") diff --git a/spacy/language.py b/spacy/language.py index e71f4215f..543bcd8bc 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -144,6 +144,8 @@ class Language: self._pipe_meta: Dict[str, "FactoryMeta"] = {} # meta by component self._pipe_configs: Dict[str, Config] = {} # config by component + if not isinstance(vocab, Vocab) and vocab is not True: + raise ValueError(Errors.E918.format(vocab=vocab, vocab_type=type(Vocab))) if vocab is True: vectors_name = meta.get("vectors", {}).get("name") vocab = create_vocab( diff --git a/spacy/tests/test_language.py b/spacy/tests/test_language.py index 1156d26b0..4c689e524 100644 --- a/spacy/tests/test_language.py +++ b/spacy/tests/test_language.py @@ -277,3 +277,14 @@ def test_spacy_blank(): nlp = spacy.blank("en", config=config, meta=meta) assert nlp.config["training"]["dropout"] == 0.2 assert nlp.meta["name"] == "my_custom_model" + + +@pytest.mark.parametrize( + "value", + [False, None, ["x", "y"], Language, Vocab], +) +def test_language_init_invalid_vocab(value): + err_fragment = "invalid value" + with pytest.raises(ValueError) as e: + Language(value) + assert err_fragment in str(e)