mirror of https://github.com/explosion/spaCy.git
Tweak the new is_base_form logic, because we can expect the 'pos' key in the morphology we're passed.
This commit is contained in:
parent
9c8ac91d72
commit
40509e8bca
|
@ -55,9 +55,10 @@ class Lemmatizer(object):
|
||||||
def is_base_form(self, pos, **morphology):
|
def is_base_form(self, pos, **morphology):
|
||||||
'''Check whether we're dealing with an uninflected paradigm, so we can
|
'''Check whether we're dealing with an uninflected paradigm, so we can
|
||||||
avoid lemmatization entirely.'''
|
avoid lemmatization entirely.'''
|
||||||
if pos == 'noun' and morphology.get('number') == 'sing' and len(morphology) == 1:
|
others = [key for key in morphology if key not in ('number', 'pos', 'verbform')]
|
||||||
|
if pos == 'noun' and morphology.get('number') == 'sing' and not others:
|
||||||
return True
|
return True
|
||||||
elif pos == 'verb' and morphology.get('verbform') == 'inf' and len(morphology) == 1:
|
elif pos == 'verb' and morphology.get('verbform') == 'inf' and not others:
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
Loading…
Reference in New Issue