From bfddf50081557b8ad5815a60a3fd8625ee5fe728 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 4 Sep 2017 15:18:41 +0200 Subject: [PATCH] Fix #1296: Incorrect lemmatization of base form verbs --- spacy/lemmatizer.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/spacy/lemmatizer.py b/spacy/lemmatizer.py index d7541c56b..4d534b50f 100644 --- a/spacy/lemmatizer.py +++ b/spacy/lemmatizer.py @@ -44,6 +44,11 @@ class Lemmatizer(object): return True elif univ_pos == 'verb' and morphology.get('VerbForm') == 'inf': return True + # This maps 'VBP' to base form -- probably just need 'IS_BASE' + # morphology + elif univ_pos == 'verb' and (morphology.get('VerbForm') == 'fin' and \ + morphology.get('Tense') == 'pres'): + return True elif univ_pos == 'adj' and morphology.get('Degree') == 'pos': return True elif VerbForm_inf in morphology: