mirror of https://github.com/explosion/spaCy.git
Work on morphology class. Still not fully consistent with rest of library.
This commit is contained in:
parent
3679fb43a3
commit
813249f826
|
@ -1,3 +1,6 @@
|
||||||
|
# cython: infer_types
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from os import path
|
from os import path
|
||||||
|
|
||||||
from libc.string cimport memset
|
from libc.string cimport memset
|
||||||
|
@ -13,7 +16,7 @@ from .parts_of_speech cimport ADJ, VERB, NOUN, PUNCT
|
||||||
from .attrs cimport POS, IS_SPACE
|
from .attrs cimport POS, IS_SPACE
|
||||||
from .parts_of_speech import IDS as POS_IDS
|
from .parts_of_speech import IDS as POS_IDS
|
||||||
from .lexeme cimport Lexeme
|
from .lexeme cimport Lexeme
|
||||||
from .attrs import intify_attrs
|
from .attrs import LEMMA, intify_attrs
|
||||||
|
|
||||||
|
|
||||||
def _normalize_props(props):
|
def _normalize_props(props):
|
||||||
|
@ -106,9 +109,8 @@ cdef class Morphology:
|
||||||
tag = self.strings[tag_str]
|
tag = self.strings[tag_str]
|
||||||
tag_id = self.reverse_index[tag]
|
tag_id = self.reverse_index[tag]
|
||||||
orth = self.strings[orth_str]
|
orth = self.strings[orth_str]
|
||||||
rich_tag = self.rich_tags[tag_id]
|
cdef RichTagC rich_tag = self.rich_tags[tag_id]
|
||||||
attrs = intify_attrs(attrs, self.strings, _do_deprecated=True)
|
attrs = intify_attrs(attrs, self.strings, _do_deprecated=True)
|
||||||
|
|
||||||
cached = <MorphAnalysisC*>self._cache.get(tag_id, orth)
|
cached = <MorphAnalysisC*>self._cache.get(tag_id, orth)
|
||||||
if cached is NULL:
|
if cached is NULL:
|
||||||
cached = <MorphAnalysisC*>self.mem.alloc(1, sizeof(MorphAnalysisC))
|
cached = <MorphAnalysisC*>self.mem.alloc(1, sizeof(MorphAnalysisC))
|
||||||
|
@ -121,8 +123,12 @@ cdef class Morphology:
|
||||||
raise ValueError(msg)
|
raise ValueError(msg)
|
||||||
|
|
||||||
cached.tag = rich_tag
|
cached.tag = rich_tag
|
||||||
|
# TODO: Refactor this to take arbitrary attributes.
|
||||||
for name_id, value_id in attrs.items():
|
for name_id, value_id in attrs.items():
|
||||||
self.assign_feature(&cached.tag.morph, name_id, value_id)
|
if name_id == LEMMA:
|
||||||
|
cached.lemma = value_id
|
||||||
|
else:
|
||||||
|
self.assign_feature(&cached.tag.morph, name_id, value_id)
|
||||||
if cached.lemma == 0:
|
if cached.lemma == 0:
|
||||||
cached.lemma = self.lemmatize(rich_tag.pos, orth,
|
cached.lemma = self.lemmatize(rich_tag.pos, orth,
|
||||||
self.tag_map.get(tag_str, {}))
|
self.tag_map.get(tag_str, {}))
|
||||||
|
|
Loading…
Reference in New Issue