Adding insensitive lemmatisation test

2017-04-25 18:07:02 +02:00 · 2017-04-25 18:07:02 +02:00 · ed5f094451
parent 26e31afc18
commit ed5f094451
2 changed files with 12 additions and 1 deletions
--- a/spacy/fr/init.py
+++ b/spacy/fr/init.py
@ -19,6 +19,7 @@ class FrenchDefaults(BaseDefaults):
    token_match = TOKEN_MATCH
    @classmethod
    def create_tokenizer(cls, nlp=None):
        cls.tokenizer_exceptions = get_tokenizer_exceptions()
--- a/spacy/tests/fr/test_lemmatization.py
+++ b/spacy/tests/fr/test_lemmatization.py
@ -28,4 +28,14 @@ def test_tokenizer_verb_noun(fr_tokenizer):
    tokens = fr_tokenizer(text)
    assert len(tokens) == 11
    assert tokens[1].lemma_ == "valider"
-    assert tokens[3].lemma_ == "notes"
+    assert tokens[3].lemma_ == "notes"
@pytest.mark.xfail
 def test_tokenizer_verb_noun_insensitive(fr_tokenizer):
    # This one is tricky because notes is a NOUN and can be a VERB
    text = "Les Costaricaines et les costaricains sont jolies"
    tokens = fr_tokenizer(text)
    assert len(tokens) == 11
    assert tokens[1].lemma_ == "costaricain"
    assert tokens[4].lemma_ == "costaricain"