From 5e8e8525f02656917c95cd74afa4aa61af818edf Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Tue, 12 Oct 2021 19:56:44 +0200 Subject: [PATCH] fix W108 filter (#9438) * remove text argument from W108 to enable 'once' filtering * include the option of partial POS annotation * fix typo * Update spacy/errors.py Co-authored-by: Adriane Boyd --- spacy/errors.py | 6 +++--- spacy/pipeline/lemmatizer.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/spacy/errors.py b/spacy/errors.py index b6659a041..fc44f6ba3 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -25,7 +25,7 @@ def setup_default_warnings(): filter_warning("once", error_msg=Warnings.W036.format(name=pipe)) # warn once about lemmatizer without required POS - filter_warning("once", error_msg="[W108]") + filter_warning("once", error_msg=Warnings.W108) def filter_warning(action: str, error_msg: str): @@ -170,8 +170,8 @@ class Warnings: "call the {matcher} on each Doc object.") W107 = ("The property `Doc.{prop}` is deprecated. Use " "`Doc.has_annotation(\"{attr}\")` instead.") - W108 = ("The rule-based lemmatizer did not find POS annotation for the " - "token '{text}'. Check that your pipeline includes components that " + W108 = ("The rule-based lemmatizer did not find POS annotation for one or " + "more tokens. Check that your pipeline includes components that " "assign token.pos, typically 'tagger'+'attribute_ruler' or " "'morphologizer'.") W109 = ("Unable to save user hooks while serializing the doc. Re-add any " diff --git a/spacy/pipeline/lemmatizer.py b/spacy/pipeline/lemmatizer.py index 2f436c57a..b2338724d 100644 --- a/spacy/pipeline/lemmatizer.py +++ b/spacy/pipeline/lemmatizer.py @@ -184,7 +184,7 @@ class Lemmatizer(Pipe): univ_pos = token.pos_.lower() if univ_pos in ("", "eol", "space"): if univ_pos == "": - warnings.warn(Warnings.W108.format(text=string)) + warnings.warn(Warnings.W108) return [string.lower()] # See Issue #435 for example of where this logic is requied. if self.is_base_form(token):