From 8bd85fd9d51b8961e6d264554908e1f1b80d8e02 Mon Sep 17 00:00:00 2001 From: foufaster Date: Sun, 27 Jan 2019 06:01:30 +0100 Subject: [PATCH] Fix french lemmatization (#3180) --- spacy/lang/fr/lemmatizer/lemmatizer.py | 2 +- spacy/tests/regression/test_issue3178.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 spacy/tests/regression/test_issue3178.py diff --git a/spacy/lang/fr/lemmatizer/lemmatizer.py b/spacy/lang/fr/lemmatizer/lemmatizer.py index 4103fdfd2..e572f84a0 100644 --- a/spacy/lang/fr/lemmatizer/lemmatizer.py +++ b/spacy/lang/fr/lemmatizer/lemmatizer.py @@ -131,7 +131,7 @@ def lemmatize(string, index, exceptions, rules): if not forms: forms.extend(oov_forms) if not forms and string in LOOKUP.keys(): - forms.append(LOOKUP[string]) + forms.append(LOOKUP[string][0]) if not forms: forms.append(string) return list(set(forms)) diff --git a/spacy/tests/regression/test_issue3178.py b/spacy/tests/regression/test_issue3178.py new file mode 100644 index 000000000..fb07f5c03 --- /dev/null +++ b/spacy/tests/regression/test_issue3178.py @@ -0,0 +1,10 @@ +from __future__ import unicode_literals +import pytest +import spacy + + +@pytest.mark.models('fr') +def test_issue1959(FR): + texts = ['Je suis la mauvaise herbe', "Me, myself and moi"] + for text in texts: + FR(text)