diff --git a/spacy/errors.py b/spacy/errors.py index ff71b60eb..79ed5ecdb 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -127,13 +127,12 @@ class Warnings(object): "this, download a newer compatible model or retrain your custom " "model with the current spaCy version. For more details and " "available updates, run: python -m spacy validate") - W033 = ("Training a new {model} using a model with no lexeme normalization " - "table. This may degrade the performance of the model to some " - "degree. If this is intentional or the language you're using " - "doesn't have a normalization table, please ignore this warning. " - "If this is surprising, make sure you have the spacy-lookups-data " - "package installed. The languages with lexeme normalization tables " - "are currently: da, de, el, en, id, lb, pt, ru, sr, ta, th.") + W033 = ("Training a new {model} using a model with an empty lexeme " + "normalization table. This may degrade the performance to some " + "degree. If this is intentional or this language doesn't have a " + "normalization table, please ignore this warning.") + W034 = ("Please install the package spacy-lookups-data in order to include " + "the default lexeme normalization table for the language '{lang}'.") @add_codes diff --git a/spacy/pipeline/pipes.pyx b/spacy/pipeline/pipes.pyx index b28f34a7a..ea40e3ae0 100644 --- a/spacy/pipeline/pipes.pyx +++ b/spacy/pipeline/pipes.pyx @@ -519,6 +519,12 @@ class Tagger(Pipe): warnings.warn(Warnings.W022) if len(self.vocab.lookups.get_table("lexeme_norm", {})) == 0: warnings.warn(Warnings.W033.format(model="part-of-speech tagger")) + try: + import spacy_lookups_data + except ImportError: + if self.vocab.lang in ("da", "de", "el", "en", "id", "lb", "pt", + "ru", "sr", "ta", "th"): + warnings.warn(Warnings.W034.format(lang=self.vocab.lang)) orig_tag_map = dict(self.vocab.morphology.tag_map) new_tag_map = OrderedDict() for raw_text, annots_brackets in get_gold_tuples(): diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx index 6944e9113..145c382a5 100644 --- a/spacy/syntax/nn_parser.pyx +++ b/spacy/syntax/nn_parser.pyx @@ -604,6 +604,12 @@ cdef class Parser: def begin_training(self, get_gold_tuples, pipeline=None, sgd=None, **cfg): if len(self.vocab.lookups.get_table("lexeme_norm", {})) == 0: warnings.warn(Warnings.W033.format(model="parser or NER")) + try: + import spacy_lookups_data + except ImportError: + if self.vocab.lang in ("da", "de", "el", "en", "id", "lb", "pt", + "ru", "sr", "ta", "th"): + warnings.warn(Warnings.W034.format(lang=self.vocab.lang)) if 'model' in cfg: self.model = cfg['model'] if not hasattr(get_gold_tuples, '__call__'):