mirror of https://github.com/explosion/spaCy.git
Handle lemmatization for unknown string IDs
This commit is contained in:
parent
204b58c864
commit
72bbcc0871
|
@ -146,6 +146,8 @@ cdef class Morphology:
|
||||||
self.add_special_case(tag_str, form_str, attrs)
|
self.add_special_case(tag_str, form_str, attrs)
|
||||||
|
|
||||||
def lemmatize(self, const univ_pos_t univ_pos, attr_t orth, morphology):
|
def lemmatize(self, const univ_pos_t univ_pos, attr_t orth, morphology):
|
||||||
|
if orth not in self.strings:
|
||||||
|
return orth
|
||||||
cdef unicode py_string = self.strings[orth]
|
cdef unicode py_string = self.strings[orth]
|
||||||
if self.lemmatizer is None:
|
if self.lemmatizer is None:
|
||||||
return self.strings.add(py_string.lower())
|
return self.strings.add(py_string.lower())
|
||||||
|
|
Loading…
Reference in New Issue