From 6dd14dc3427167045c63b9cba8f24bcde87cd765 Mon Sep 17 00:00:00 2001 From: ines Date: Wed, 11 Oct 2017 13:27:10 +0200 Subject: [PATCH] Add lookup lemmas to tokens without POS tags --- spacy/morphology.pyx | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx index b8dbb83ba..4a1a0aa54 100644 --- a/spacy/morphology.pyx +++ b/spacy/morphology.pyx @@ -67,9 +67,13 @@ cdef class Morphology: self.exc), None, None) cdef int assign_untagged(self, TokenC* token) except -1: - '''Set morphological attributes on a token without a POS tag.''' + """Set morphological attributes on a token without a POS tag. Uses + the lemmatizer's lookup() method, which looks up the string in the + table provided by the language data as lemma_lookup (if available).""" if token.lemma == 0: - token.lemma = self.lemmatize(0, token.lex.orth, {}) + orth_str = self.strings[token.lex.orth] + lemma = self.lemmatizer.lookup(orth_str) + token.lemma = self.strings.add(lemma) cdef int assign_tag(self, TokenC* token, tag) except -1: if isinstance(tag, basestring):