mirror of https://github.com/explosion/spaCy.git
Update morphology API
This commit is contained in:
parent
9998d9b9ff
commit
34cab8cc49
|
@ -28,7 +28,7 @@ cdef class Morphology:
|
||||||
cdef int assign_untagged(self, TokenC* token) except -1
|
cdef int assign_untagged(self, TokenC* token) except -1
|
||||||
cdef int assign_tag(self, TokenC* token, tag) except -1
|
cdef int assign_tag(self, TokenC* token, tag) except -1
|
||||||
cdef int assign_tag_id(self, TokenC* token, int tag_id) except -1
|
cdef int assign_tag_id(self, TokenC* token, int tag_id) except -1
|
||||||
cdef update_morph(self, hash_t morph, features)
|
cpdef update_morph_key(self, hash_t morph, features)
|
||||||
|
|
||||||
cdef int _assign_tag_from_exceptions(self, TokenC* token, int tag_id) except -1
|
cdef int _assign_tag_from_exceptions(self, TokenC* token, int tag_id) except -1
|
||||||
|
|
||||||
|
|
|
@ -96,11 +96,24 @@ cdef class Morphology:
|
||||||
"""Insert a morphological analysis in the morphology table, if not already
|
"""Insert a morphological analysis in the morphology table, if not already
|
||||||
present. Returns the hash of the new analysis.
|
present. Returns the hash of the new analysis.
|
||||||
"""
|
"""
|
||||||
features = intify_features(self.strings, features)
|
features = intify_features(features)
|
||||||
cdef RichTagC tag = create_rich_tag(features)
|
cdef RichTagC tag = create_rich_tag(features)
|
||||||
cdef hash_t key = self.insert(tag)
|
cdef hash_t key = self.insert(tag)
|
||||||
return key
|
return key
|
||||||
|
|
||||||
|
cpdef update_morph_key(self, hash_t morph, features):
|
||||||
|
"""Update a morphological analysis with new feature values."""
|
||||||
|
tag = (<RichTagC*>self.tags.get(morph))[0]
|
||||||
|
cdef univ_morph_t feature
|
||||||
|
cdef int value
|
||||||
|
for feature_, value in features.items():
|
||||||
|
feature = self.strings.as_int(feature_)
|
||||||
|
set_feature(&tag, feature, 1)
|
||||||
|
morph = self.insert_tag(tag)
|
||||||
|
return morph
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def lemmatize(self, const univ_pos_t univ_pos, attr_t orth, morphology):
|
def lemmatize(self, const univ_pos_t univ_pos, attr_t orth, morphology):
|
||||||
if orth not in self.strings:
|
if orth not in self.strings:
|
||||||
return orth
|
return orth
|
||||||
|
@ -188,17 +201,6 @@ cdef class Morphology:
|
||||||
token.pos = attrs.get(POS, token.pos)
|
token.pos = attrs.get(POS, token.pos)
|
||||||
token.lemma = attrs.get(LEMMA, token.lemma)
|
token.lemma = attrs.get(LEMMA, token.lemma)
|
||||||
|
|
||||||
cdef update_morph(self, hash_t morph, features):
|
|
||||||
"""Update a morphological analysis with new feature values."""
|
|
||||||
tag = (<RichTagC*>self.tags.get(morph))[0]
|
|
||||||
cdef univ_morph_t feature
|
|
||||||
cdef int value
|
|
||||||
for feature_, value in features.items():
|
|
||||||
feature = self.strings.as_int(feature_)
|
|
||||||
set_feature(&tag, feature, 1)
|
|
||||||
morph = self.insert_tag(tag)
|
|
||||||
return morph
|
|
||||||
|
|
||||||
def load_morph_exceptions(self, dict exc):
|
def load_morph_exceptions(self, dict exc):
|
||||||
# Map (form, pos) to attributes
|
# Map (form, pos) to attributes
|
||||||
for tag_str, entries in exc.items():
|
for tag_str, entries in exc.items():
|
||||||
|
@ -226,8 +228,8 @@ cdef class Morphology:
|
||||||
cpdef univ_pos_t get_int_tag(pos_):
|
cpdef univ_pos_t get_int_tag(pos_):
|
||||||
return <univ_pos_t>0
|
return <univ_pos_t>0
|
||||||
|
|
||||||
cpdef intify_features(StringStore strings, features):
|
cpdef intify_features(features):
|
||||||
return {strings.as_int(feature) for feature in features}
|
return {IDS.get(feature, feature) for feature in features}
|
||||||
|
|
||||||
cdef hash_t hash_tag(RichTagC tag) nogil:
|
cdef hash_t hash_tag(RichTagC tag) nogil:
|
||||||
return mrmr.hash64(&tag, sizeof(tag), 0)
|
return mrmr.hash64(&tag, sizeof(tag), 0)
|
||||||
|
|
Loading…
Reference in New Issue