From 72bbcc0871568fc6944a45e1aa4907735c743453 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 24 Sep 2017 05:01:31 -0500 Subject: [PATCH] Handle lemmatization for unknown string IDs --- spacy/morphology.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx index 13a0ed8e3..5ee11c151 100644 --- a/spacy/morphology.pyx +++ b/spacy/morphology.pyx @@ -146,6 +146,8 @@ cdef class Morphology: self.add_special_case(tag_str, form_str, attrs) def lemmatize(self, const univ_pos_t univ_pos, attr_t orth, morphology): + if orth not in self.strings: + return orth cdef unicode py_string = self.strings[orth] if self.lemmatizer is None: return self.strings.add(py_string.lower())