Update morphology API

This commit is contained in:
Matthew Honnibal 2018-09-25 20:53:24 +02:00
parent 9998d9b9ff
commit 34cab8cc49
2 changed files with 17 additions and 15 deletions

View File

@ -28,7 +28,7 @@ cdef class Morphology:
cdef int assign_untagged(self, TokenC* token) except -1 cdef int assign_untagged(self, TokenC* token) except -1
cdef int assign_tag(self, TokenC* token, tag) except -1 cdef int assign_tag(self, TokenC* token, tag) except -1
cdef int assign_tag_id(self, TokenC* token, int tag_id) except -1 cdef int assign_tag_id(self, TokenC* token, int tag_id) except -1
cdef update_morph(self, hash_t morph, features) cpdef update_morph_key(self, hash_t morph, features)
cdef int _assign_tag_from_exceptions(self, TokenC* token, int tag_id) except -1 cdef int _assign_tag_from_exceptions(self, TokenC* token, int tag_id) except -1

View File

@ -96,11 +96,24 @@ cdef class Morphology:
"""Insert a morphological analysis in the morphology table, if not already """Insert a morphological analysis in the morphology table, if not already
present. Returns the hash of the new analysis. present. Returns the hash of the new analysis.
""" """
features = intify_features(self.strings, features) features = intify_features(features)
cdef RichTagC tag = create_rich_tag(features) cdef RichTagC tag = create_rich_tag(features)
cdef hash_t key = self.insert(tag) cdef hash_t key = self.insert(tag)
return key return key
cpdef update_morph_key(self, hash_t morph, features):
"""Update a morphological analysis with new feature values."""
tag = (<RichTagC*>self.tags.get(morph))[0]
cdef univ_morph_t feature
cdef int value
for feature_, value in features.items():
feature = self.strings.as_int(feature_)
set_feature(&tag, feature, 1)
morph = self.insert_tag(tag)
return morph
def lemmatize(self, const univ_pos_t univ_pos, attr_t orth, morphology): def lemmatize(self, const univ_pos_t univ_pos, attr_t orth, morphology):
if orth not in self.strings: if orth not in self.strings:
return orth return orth
@ -188,17 +201,6 @@ cdef class Morphology:
token.pos = attrs.get(POS, token.pos) token.pos = attrs.get(POS, token.pos)
token.lemma = attrs.get(LEMMA, token.lemma) token.lemma = attrs.get(LEMMA, token.lemma)
cdef update_morph(self, hash_t morph, features):
"""Update a morphological analysis with new feature values."""
tag = (<RichTagC*>self.tags.get(morph))[0]
cdef univ_morph_t feature
cdef int value
for feature_, value in features.items():
feature = self.strings.as_int(feature_)
set_feature(&tag, feature, 1)
morph = self.insert_tag(tag)
return morph
def load_morph_exceptions(self, dict exc): def load_morph_exceptions(self, dict exc):
# Map (form, pos) to attributes # Map (form, pos) to attributes
for tag_str, entries in exc.items(): for tag_str, entries in exc.items():
@ -226,8 +228,8 @@ cdef class Morphology:
cpdef univ_pos_t get_int_tag(pos_): cpdef univ_pos_t get_int_tag(pos_):
return <univ_pos_t>0 return <univ_pos_t>0
cpdef intify_features(StringStore strings, features): cpdef intify_features(features):
return {strings.as_int(feature) for feature in features} return {IDS.get(feature, feature) for feature in features}
cdef hash_t hash_tag(RichTagC tag) nogil: cdef hash_t hash_tag(RichTagC tag) nogil:
return mrmr.hash64(&tag, sizeof(tag), 0) return mrmr.hash64(&tag, sizeof(tag), 0)