* Allow punctuation to be lemmatized

This commit is contained in:
Matthew Honnibal 2015-10-09 19:02:42 +11:00
parent 5332c0b697
commit 2d9e5bf566
1 changed files with 2 additions and 2 deletions

View File

@ -7,7 +7,7 @@ except ImportError:
import json import json
from .parts_of_speech import UNIV_POS_NAMES from .parts_of_speech import UNIV_POS_NAMES
from .parts_of_speech cimport ADJ, VERB, NOUN from .parts_of_speech cimport ADJ, VERB, NOUN, PUNCT
cdef class Morphology: cdef class Morphology:
@ -81,7 +81,7 @@ cdef class Morphology:
if self.lemmatizer is None: if self.lemmatizer is None:
return orth return orth
cdef unicode py_string = self.strings[orth] cdef unicode py_string = self.strings[orth]
if pos != NOUN and pos != VERB and pos != ADJ: if pos != NOUN and pos != VERB and pos != ADJ and pos != PUNCT:
return orth return orth
cdef set lemma_strings cdef set lemma_strings
cdef unicode lemma_string cdef unicode lemma_string