Replace lexeme_norm warning with logging

This commit is contained in:
Ines Montani 2020-08-14 15:00:52 +02:00
parent 37814b608d
commit 8128e5eb35
14 changed files with 30 additions and 31 deletions

View File

@ -14,7 +14,7 @@ from . import pipeline # noqa: F401
from .cli.info import info # noqa: F401
from .glossary import explain # noqa: F401
from .about import __version__ # noqa: F401
from .util import registry # noqa: F401
from .util import registry, logger # noqa: F401
from .errors import Errors
from .language import Language

View File

@ -9,6 +9,7 @@ from thinc.api import use_pytorch_for_gpu_memory, require_gpu, fix_random_seed
from thinc.api import Config, Optimizer
import random
import typer
import logging
from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error
from ._util import import_code, get_sourced_components
@ -17,7 +18,6 @@ from .. import util
from ..gold.example import Example
from ..errors import Errors
# Don't remove - required to load the built-in architectures
from ..ml import models # noqa: F401
@ -48,6 +48,7 @@ def train_cli(
used to register custom functions and architectures that can then be
referenced in the config.
"""
util.logger.setLevel(logging.DEBUG if verbose else logging.ERROR)
verify_cli_args(config_path, output_path)
overrides = parse_config_overrides(ctx.args)
import_code(code_path)

View File

@ -409,7 +409,7 @@ cdef class Parser(Pipe):
lexeme_norms = self.vocab.lookups.get_table("lexeme_norm", {})
if len(lexeme_norms) == 0 and self.vocab.lang in util.LEXEME_NORM_LANGS:
langs = ", ".join(util.LEXEME_NORM_LANGS)
warnings.warn(Warnings.W033.format(model="parser or NER", langs=langs))
util.logger.debug(Warnings.W033.format(model="parser or NER", langs=langs))
actions = self.moves.get_actions(
examples=get_examples(),
min_freq=self.cfg['min_action_freq'],

View File

@ -1,17 +1,17 @@
import pytest
from spacy import util
from spacy.lang.en import English
from spacy.language import Language
from spacy.lookups import Lookups
from spacy.pipeline._parser_internals.ner import BiluoPushDown
from spacy.gold import Example
from spacy.tokens import Doc
from spacy.vocab import Vocab
import logging
from ..util import make_tempdir
TRAIN_DATA = [
("Who is Shaka Khan?", {"entities": [(7, 17, "PERSON")]}),
("I like London and Berlin.", {"entities": [(7, 13, "LOC"), (18, 24, "LOC")]}),
@ -56,6 +56,7 @@ def test_get_oracle_moves(tsys, doc, entity_annots):
assert names == ["U-PERSON", "O", "O", "B-GPE", "L-GPE", "O"]
@pytest.mark.filterwarnings("ignore::UserWarning")
def test_get_oracle_moves_negative_entities(tsys, doc, entity_annots):
entity_annots = [(s, e, "!" + label) for s, e, label in entity_annots]
example = Example.from_dict(doc, {"entities": entity_annots})
@ -332,19 +333,21 @@ def test_overfitting_IO():
assert ents2[0].label_ == "LOC"
def test_ner_warns_no_lookups():
def test_ner_warns_no_lookups(caplog):
nlp = English()
assert nlp.lang in util.LEXEME_NORM_LANGS
nlp.vocab.lookups = Lookups()
assert not len(nlp.vocab.lookups)
nlp.add_pipe("ner")
with pytest.warns(UserWarning):
with caplog.at_level(logging.DEBUG):
nlp.begin_training()
assert "W033" in caplog.text
caplog.clear()
nlp.vocab.lookups.add_table("lexeme_norm")
nlp.vocab.lookups.get_table("lexeme_norm")["a"] = "A"
with pytest.warns(None) as record:
with caplog.at_level(logging.DEBUG):
nlp.begin_training()
assert not record.list
assert "W033" not in caplog.text
@Language.factory("blocker")

View File

@ -25,7 +25,6 @@ def test_issue2070():
assert len(doc) == 11
@pytest.mark.filterwarnings("ignore::UserWarning")
def test_issue2179():
"""Test that spurious 'extra_labels' aren't created when initializing NER."""
nlp = Italian()
@ -135,7 +134,6 @@ def test_issue2464(en_vocab):
assert len(matches) == 3
@pytest.mark.filterwarnings("ignore::UserWarning")
def test_issue2482():
"""Test we can serialize and deserialize a blank NER or parser model."""
nlp = Italian()

View File

@ -136,7 +136,6 @@ def test_issue2782(text, lang_cls):
assert doc[0].like_num
@pytest.mark.filterwarnings("ignore::UserWarning")
def test_issue2800():
"""Test issue that arises when too many labels are added to NER model.
Used to cause segfault.

View File

@ -90,7 +90,6 @@ def test_issue3199():
assert list(doc[0:3].noun_chunks) == []
@pytest.mark.filterwarnings("ignore::UserWarning")
def test_issue3209():
"""Test issue that occurred in spaCy nightly where NER labels were being
mapped to classes incorrectly after loading the model, when the labels

View File

@ -91,7 +91,6 @@ def test_issue_3526_3(en_vocab):
assert new_ruler.overwrite is not ruler.overwrite
@pytest.mark.filterwarnings("ignore::UserWarning")
def test_issue_3526_4(en_vocab):
nlp = Language(vocab=en_vocab)
patterns = [{"label": "ORG", "pattern": "Apple"}]
@ -252,7 +251,6 @@ def test_issue3803():
assert [t.like_num for t in doc] == [True, True, True, True, True, True]
@pytest.mark.filterwarnings("ignore::UserWarning")
def test_issue3830_no_subtok():
"""Test that the parser doesn't have subtok label if not learn_tokens"""
config = {
@ -270,7 +268,6 @@ def test_issue3830_no_subtok():
assert "subtok" not in parser.labels
@pytest.mark.filterwarnings("ignore::UserWarning")
def test_issue3830_with_subtok():
"""Test that the parser does have subtok label if learn_tokens=True."""
config = {
@ -333,7 +330,6 @@ def test_issue3879(en_vocab):
assert len(matcher(doc)) == 2 # fails because of a FP match 'is a test'
@pytest.mark.filterwarnings("ignore::UserWarning")
def test_issue3880():
"""Test that `nlp.pipe()` works when an empty string ends the batch.

View File

@ -81,7 +81,6 @@ def test_issue4030():
assert doc.cats["inoffensive"] == 0.0
@pytest.mark.filterwarnings("ignore::UserWarning")
def test_issue4042():
"""Test that serialization of an EntityRuler before NER works fine."""
nlp = English()
@ -110,7 +109,6 @@ def test_issue4042():
assert doc2.ents[0].label_ == "MY_ORG"
@pytest.mark.filterwarnings("ignore::UserWarning")
def test_issue4042_bug2():
"""
Test that serialization of an NER works fine when new labels were added.
@ -242,7 +240,6 @@ def test_issue4190():
assert result_1b == result_2
@pytest.mark.filterwarnings("ignore::UserWarning")
def test_issue4267():
""" Test that running an entity_ruler after ner gives consistent results"""
nlp = English()
@ -324,7 +321,6 @@ def test_issue4313():
entity_scores[(start, end, label)] += score
@pytest.mark.filterwarnings("ignore::UserWarning")
def test_issue4348():
"""Test that training the tagger with empty data, doesn't throw errors"""
nlp = English()

View File

@ -179,7 +179,6 @@ def test_issue4707():
assert "entity_ruler" in new_nlp.pipe_names
@pytest.mark.filterwarnings("ignore::UserWarning")
def test_issue4725_1():
""" Ensure the pickling of the NER goes well"""
vocab = Vocab(vectors_name="test_vocab_add_vector")
@ -198,7 +197,6 @@ def test_issue4725_1():
assert ner2.cfg["update_with_oracle_cut_size"] == 111
@pytest.mark.filterwarnings("ignore::UserWarning")
def test_issue4725_2():
# ensures that this runs correctly and doesn't hang or crash because of the global vectors
# if it does crash, it's usually because of calling 'spawn' for multiprocessing (e.g. on Windows),

View File

@ -1,8 +1,7 @@
import pytest
from spacy.lang.en import English
import pytest
@pytest.mark.filterwarnings("ignore::UserWarning")
def test_issue5152():
# Test that the comparison between a Span and a Token, goes well
# There was a bug when the number of tokens in the span equaled the number of characters in the token (!)
@ -14,6 +13,8 @@ def test_issue5152():
span_2 = text[0:3] # Talk about being
span_3 = text_var[0:3] # Talk of being
token = y[0] # Let
assert span.similarity(token) == 0.0
with pytest.warns(UserWarning):
assert span.similarity(token) == 0.0
assert span.similarity(span_2) == 1.0
assert span_2.similarity(span_3) < 1.0
with pytest.warns(UserWarning):
assert span_2.similarity(span_3) < 1.0

View File

@ -154,6 +154,7 @@ def test_example_from_dict_some_ner(en_vocab):
assert ner_tags == ["U-LOC", None, None, None]
@pytest.mark.filterwarnings("ignore::UserWarning")
def test_json2docs_no_ner(en_vocab):
data = [
{
@ -506,6 +507,7 @@ def test_roundtrip_docs_to_docbin(doc):
assert cats["BAKING"] == reloaded_example.reference.cats["BAKING"]
@pytest.mark.filterwarnings("ignore::UserWarning")
def test_make_orth_variants(doc):
nlp = English()
with make_tempdir() as tmpdir:
@ -586,7 +588,7 @@ def test_tuple_format_implicit():
("Uber blew through $1 million a week", {"entities": [(0, 4, "ORG")]}),
(
"Spotify steps up Asia expansion",
{"entities": [(0, 8, "ORG"), (17, 21, "LOC")]},
{"entities": [(0, 7, "ORG"), (17, 21, "LOC")]},
),
("Google rebrands its business apps", {"entities": [(0, 6, "ORG")]}),
]
@ -601,7 +603,7 @@ def test_tuple_format_implicit_invalid():
("Uber blew through $1 million a week", {"frumble": [(0, 4, "ORG")]}),
(
"Spotify steps up Asia expansion",
{"entities": [(0, 8, "ORG"), (17, 21, "LOC")]},
{"entities": [(0, 7, "ORG"), (17, 21, "LOC")]},
),
("Google rebrands its business apps", {"entities": [(0, 6, "ORG")]}),
]

View File

@ -46,6 +46,7 @@ def test_Example_from_dict_with_tags(pred_words, annots):
assert aligned_tags == ["NN" for _ in predicted]
@pytest.mark.filterwarnings("ignore::UserWarning")
def test_aligned_tags():
pred_words = ["Apply", "some", "sunscreen", "unless", "you", "can", "not"]
gold_words = ["Apply", "some", "sun", "screen", "unless", "you", "cannot"]
@ -198,8 +199,8 @@ def test_Example_from_dict_with_entities(annots):
def test_Example_from_dict_with_entities_invalid(annots):
vocab = Vocab()
predicted = Doc(vocab, words=annots["words"])
example = Example.from_dict(predicted, annots)
# TODO: shouldn't this throw some sort of warning ?
with pytest.warns(UserWarning):
example = Example.from_dict(predicted, annots)
assert len(list(example.reference.ents)) == 0

View File

@ -24,6 +24,7 @@ import tempfile
import shutil
import shlex
import inspect
import logging
try:
import cupy.random
@ -58,6 +59,10 @@ OOV_RANK = numpy.iinfo(numpy.uint64).max
LEXEME_NORM_LANGS = ["da", "de", "el", "en", "id", "lb", "pt", "ru", "sr", "ta", "th"]
logging.basicConfig()
logger = logging.getLogger("spacy")
class registry(thinc.registry):
languages = catalogue.create("spacy", "languages", entry_points=True)
architectures = catalogue.create("spacy", "architectures", entry_points=True)