Tweak the new is_base_form logic, because we can expect the 'pos' key in the morphology we're passed.

This commit is contained in:
Matthew Honnibal 2016-09-27 14:01:16 +02:00
parent 9c8ac91d72
commit 40509e8bca
1 changed files with 3 additions and 2 deletions

View File

@ -55,9 +55,10 @@ class Lemmatizer(object):
def is_base_form(self, pos, **morphology): def is_base_form(self, pos, **morphology):
'''Check whether we're dealing with an uninflected paradigm, so we can '''Check whether we're dealing with an uninflected paradigm, so we can
avoid lemmatization entirely.''' avoid lemmatization entirely.'''
if pos == 'noun' and morphology.get('number') == 'sing' and len(morphology) == 1: others = [key for key in morphology if key not in ('number', 'pos', 'verbform')]
if pos == 'noun' and morphology.get('number') == 'sing' and not others:
return True return True
elif pos == 'verb' and morphology.get('verbform') == 'inf' and len(morphology) == 1: elif pos == 'verb' and morphology.get('verbform') == 'inf' and not others:
return True return True
else: else:
return False return False