Fix #1296: Incorrect lemmatization of base form verbs

This commit is contained in:
Matthew Honnibal 2017-09-04 15:18:41 +02:00
parent b29e6bff46
commit bfddf50081
1 changed files with 5 additions and 0 deletions

View File

@ -44,6 +44,11 @@ class Lemmatizer(object):
return True return True
elif univ_pos == 'verb' and morphology.get('VerbForm') == 'inf': elif univ_pos == 'verb' and morphology.get('VerbForm') == 'inf':
return True return True
# This maps 'VBP' to base form -- probably just need 'IS_BASE'
# morphology
elif univ_pos == 'verb' and (morphology.get('VerbForm') == 'fin' and \
morphology.get('Tense') == 'pres'):
return True
elif univ_pos == 'adj' and morphology.get('Degree') == 'pos': elif univ_pos == 'adj' and morphology.get('Degree') == 'pos':
return True return True
elif VerbForm_inf in morphology: elif VerbForm_inf in morphology: