fix W108 filter (#9438)

* remove text argument from W108 to enable 'once' filtering

* include the option of partial POS annotation

* fix typo

* Update spacy/errors.py

Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
This commit is contained in:
Sofie Van Landeghem 2021-10-12 19:56:44 +02:00 committed by GitHub
parent 6425b9a1c4
commit 5e8e8525f0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 4 additions and 4 deletions

View File

@ -25,7 +25,7 @@ def setup_default_warnings():
filter_warning("once", error_msg=Warnings.W036.format(name=pipe)) filter_warning("once", error_msg=Warnings.W036.format(name=pipe))
# warn once about lemmatizer without required POS # warn once about lemmatizer without required POS
filter_warning("once", error_msg="[W108]") filter_warning("once", error_msg=Warnings.W108)
def filter_warning(action: str, error_msg: str): def filter_warning(action: str, error_msg: str):
@ -170,8 +170,8 @@ class Warnings:
"call the {matcher} on each Doc object.") "call the {matcher} on each Doc object.")
W107 = ("The property `Doc.{prop}` is deprecated. Use " W107 = ("The property `Doc.{prop}` is deprecated. Use "
"`Doc.has_annotation(\"{attr}\")` instead.") "`Doc.has_annotation(\"{attr}\")` instead.")
W108 = ("The rule-based lemmatizer did not find POS annotation for the " W108 = ("The rule-based lemmatizer did not find POS annotation for one or "
"token '{text}'. Check that your pipeline includes components that " "more tokens. Check that your pipeline includes components that "
"assign token.pos, typically 'tagger'+'attribute_ruler' or " "assign token.pos, typically 'tagger'+'attribute_ruler' or "
"'morphologizer'.") "'morphologizer'.")
W109 = ("Unable to save user hooks while serializing the doc. Re-add any " W109 = ("Unable to save user hooks while serializing the doc. Re-add any "

View File

@ -184,7 +184,7 @@ class Lemmatizer(Pipe):
univ_pos = token.pos_.lower() univ_pos = token.pos_.lower()
if univ_pos in ("", "eol", "space"): if univ_pos in ("", "eol", "space"):
if univ_pos == "": if univ_pos == "":
warnings.warn(Warnings.W108.format(text=string)) warnings.warn(Warnings.W108)
return [string.lower()] return [string.lower()]
# See Issue #435 for example of where this logic is requied. # See Issue #435 for example of where this logic is requied.
if self.is_base_form(token): if self.is_base_form(token):