From 813249f82699b89f32634a063ba88a67a1a1da80 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 18 Dec 2016 17:35:22 +0100 Subject: [PATCH] Work on morphology class. Still not fully consistent with rest of library. --- spacy/morphology.pyx | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx index bd02d0489..c13ce1920 100644 --- a/spacy/morphology.pyx +++ b/spacy/morphology.pyx @@ -1,3 +1,6 @@ +# cython: infer_types +from __future__ import unicode_literals + from os import path from libc.string cimport memset @@ -13,7 +16,7 @@ from .parts_of_speech cimport ADJ, VERB, NOUN, PUNCT from .attrs cimport POS, IS_SPACE from .parts_of_speech import IDS as POS_IDS from .lexeme cimport Lexeme -from .attrs import intify_attrs +from .attrs import LEMMA, intify_attrs def _normalize_props(props): @@ -106,9 +109,8 @@ cdef class Morphology: tag = self.strings[tag_str] tag_id = self.reverse_index[tag] orth = self.strings[orth_str] - rich_tag = self.rich_tags[tag_id] + cdef RichTagC rich_tag = self.rich_tags[tag_id] attrs = intify_attrs(attrs, self.strings, _do_deprecated=True) - cached = self._cache.get(tag_id, orth) if cached is NULL: cached = self.mem.alloc(1, sizeof(MorphAnalysisC)) @@ -121,8 +123,12 @@ cdef class Morphology: raise ValueError(msg) cached.tag = rich_tag + # TODO: Refactor this to take arbitrary attributes. for name_id, value_id in attrs.items(): - self.assign_feature(&cached.tag.morph, name_id, value_id) + if name_id == LEMMA: + cached.lemma = value_id + else: + self.assign_feature(&cached.tag.morph, name_id, value_id) if cached.lemma == 0: cached.lemma = self.lemmatize(rich_tag.pos, orth, self.tag_map.get(tag_str, {}))