Avoid lemmatizing inappropriate tags in English lemmatizer

This commit is contained in:
Matthew Honnibal 2017-10-11 03:23:23 +02:00
parent d528b6e36d
commit c15d8278cb
1 changed files with 2 additions and 0 deletions

View File

@ -24,6 +24,8 @@ class Lemmatizer(object):
univ_pos = 'adj'
elif univ_pos == PUNCT:
univ_pos = 'punct'
else:
return set([string.lower()])
# See Issue #435 for example of where this logic is requied.
if self.is_base_form(univ_pos, morphology):
return set([string.lower()])