From 17c467e0ab143eb89c45917740b5d32be303f56a Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 11 Oct 2017 03:33:06 -0500 Subject: [PATCH] Avoid clobbering existing lemmas --- spacy/morphology.pyx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx index 5a4399698..b8dbb83ba 100644 --- a/spacy/morphology.pyx +++ b/spacy/morphology.pyx @@ -55,7 +55,7 @@ cdef class Morphology: # Add a 'null' tag, which we can reference when assign morphology to # untagged tokens. self.rich_tags[self.n_tags].id = self.n_tags - + self._cache = PreshMapArray(self.n_tags) self.exc = {} if exc is not None: @@ -68,7 +68,8 @@ cdef class Morphology: cdef int assign_untagged(self, TokenC* token) except -1: '''Set morphological attributes on a token without a POS tag.''' - token.lemma = self.lemmatize(0, token.lex.orth, {}) + if token.lemma == 0: + token.lemma = self.lemmatize(0, token.lex.orth, {}) cdef int assign_tag(self, TokenC* token, tag) except -1: if isinstance(tag, basestring):