From 40509e8bca5f317761f248b5428d33fa805bb437 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 27 Sep 2016 14:01:16 +0200 Subject: [PATCH] Tweak the new is_base_form logic, because we can expect the 'pos' key in the morphology we're passed. --- spacy/lemmatizer.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/spacy/lemmatizer.py b/spacy/lemmatizer.py index f2e425e93..f5ce5ae6f 100644 --- a/spacy/lemmatizer.py +++ b/spacy/lemmatizer.py @@ -55,9 +55,10 @@ class Lemmatizer(object): def is_base_form(self, pos, **morphology): '''Check whether we're dealing with an uninflected paradigm, so we can avoid lemmatization entirely.''' - if pos == 'noun' and morphology.get('number') == 'sing' and len(morphology) == 1: + others = [key for key in morphology if key not in ('number', 'pos', 'verbform')] + if pos == 'noun' and morphology.get('number') == 'sing' and not others: return True - elif pos == 'verb' and morphology.get('verbform') == 'inf' and len(morphology) == 1: + elif pos == 'verb' and morphology.get('verbform') == 'inf' and not others: return True else: return False