mirror of https://github.com/explosion/spaCy.git
Add MorphAnalysisC struct
This commit is contained in:
parent
b69013e2d7
commit
b9ade7d4e0
|
@ -3,7 +3,7 @@ from preshed.maps cimport PreshMap, PreshMapArray
|
|||
from libc.stdint cimport uint64_t
|
||||
from murmurhash cimport mrmr
|
||||
|
||||
from .structs cimport TokenC
|
||||
from .structs cimport TokenC, MorphAnalysisC
|
||||
from .strings cimport StringStore
|
||||
from .typedefs cimport hash_t, attr_t, flags_t
|
||||
from .parts_of_speech cimport univ_pos_t
|
||||
|
@ -24,7 +24,7 @@ cdef class Morphology:
|
|||
cdef readonly int n_tags
|
||||
|
||||
cpdef update(self, hash_t morph, features)
|
||||
cdef hash_t insert(self, RichTagC tag) except 0
|
||||
cdef hash_t insert(self, MorphAnalysisC tag) except 0
|
||||
|
||||
cdef int assign_untagged(self, TokenC* token) except -1
|
||||
cdef int assign_tag(self, TokenC* token, tag) except -1
|
||||
|
@ -416,50 +416,3 @@ cdef enum univ_morph_t:
|
|||
Voice_int # hb
|
||||
end_Voice
|
||||
|
||||
|
||||
cdef struct RichTagC:
|
||||
univ_pos_t pos
|
||||
|
||||
univ_morph_t abbr
|
||||
univ_morph_t adp_type
|
||||
univ_morph_t adv_type
|
||||
univ_morph_t animacy
|
||||
univ_morph_t aspect
|
||||
univ_morph_t case
|
||||
univ_morph_t conj_type
|
||||
univ_morph_t connegative
|
||||
univ_morph_t definite
|
||||
univ_morph_t degree
|
||||
univ_morph_t derivation
|
||||
univ_morph_t echo
|
||||
univ_morph_t foreign
|
||||
univ_morph_t gender
|
||||
univ_morph_t hyph
|
||||
univ_morph_t inf_form
|
||||
univ_morph_t mood
|
||||
univ_morph_t negative
|
||||
univ_morph_t number
|
||||
univ_morph_t name_type
|
||||
univ_morph_t noun_type
|
||||
univ_morph_t num_form
|
||||
univ_morph_t num_type
|
||||
univ_morph_t num_value
|
||||
univ_morph_t part_form
|
||||
univ_morph_t part_type
|
||||
univ_morph_t person
|
||||
univ_morph_t polite
|
||||
univ_morph_t polarity
|
||||
univ_morph_t poss
|
||||
univ_morph_t prefix
|
||||
univ_morph_t prep_case
|
||||
univ_morph_t pron_type
|
||||
univ_morph_t punct_side
|
||||
univ_morph_t punct_type
|
||||
univ_morph_t reflex
|
||||
univ_morph_t style
|
||||
univ_morph_t style_variant
|
||||
univ_morph_t tense
|
||||
univ_morph_t typo
|
||||
univ_morph_t verb_form
|
||||
univ_morph_t voice
|
||||
univ_morph_t verb_type
|
||||
|
|
|
@ -111,13 +111,13 @@ cdef class Morphology:
|
|||
print(list(NAMES.keys())[:10])
|
||||
print(NAMES.get(feature-1), NAMES.get(feature+1))
|
||||
raise KeyError("Unknown feature: %d" % feature)
|
||||
cdef RichTagC tag
|
||||
cdef MorphAnalysisC tag
|
||||
tag = create_rich_tag(features)
|
||||
cdef hash_t key = self.insert(tag)
|
||||
return key
|
||||
|
||||
def get(self, hash_t morph):
|
||||
tag = <RichTagC*>self.tags.get(morph)
|
||||
tag = <MorphAnalysisC*>self.tags.get(morph)
|
||||
if tag == NULL:
|
||||
return []
|
||||
else:
|
||||
|
@ -125,7 +125,7 @@ cdef class Morphology:
|
|||
|
||||
cpdef update(self, hash_t morph, features):
|
||||
"""Update a morphological analysis with new feature values."""
|
||||
tag = (<RichTagC*>self.tags.get(morph))[0]
|
||||
tag = (<MorphAnalysisC*>self.tags.get(morph))[0]
|
||||
features = intify_features(features)
|
||||
cdef univ_morph_t feature
|
||||
for feature in features:
|
||||
|
@ -168,10 +168,10 @@ cdef class Morphology:
|
|||
attrs = intify_attrs(attrs, self.strings, _do_deprecated=True)
|
||||
self.exc[(tag_str, self.strings.add(orth_str))] = attrs
|
||||
|
||||
cdef hash_t insert(self, RichTagC tag) except 0:
|
||||
cdef hash_t insert(self, MorphAnalysisC tag) except 0:
|
||||
cdef hash_t key = hash_tag(tag)
|
||||
if self.tags.get(key) == NULL:
|
||||
tag_ptr = <RichTagC*>self.mem.alloc(1, sizeof(RichTagC))
|
||||
tag_ptr = <MorphAnalysisC*>self.mem.alloc(1, sizeof(MorphAnalysisC))
|
||||
tag_ptr[0] = tag
|
||||
self.tags.set(key, <void*>tag_ptr)
|
||||
return key
|
||||
|
@ -240,7 +240,7 @@ cdef class Morphology:
|
|||
def to_bytes(self):
|
||||
json_tags = []
|
||||
for key in self.tags:
|
||||
tag_ptr = <RichTagC*>self.tags.get(key)
|
||||
tag_ptr = <MorphAnalysisC*>self.tags.get(key)
|
||||
if tag_ptr != NULL:
|
||||
json_tags.append(tag_to_json(tag_ptr[0]))
|
||||
return srsly.json_dumps(json_tags)
|
||||
|
@ -261,18 +261,18 @@ cpdef univ_pos_t get_int_tag(pos_):
|
|||
cpdef intify_features(features):
|
||||
return {IDS.get(feature, feature) for feature in features}
|
||||
|
||||
cdef hash_t hash_tag(RichTagC tag) nogil:
|
||||
cdef hash_t hash_tag(MorphAnalysisC tag) nogil:
|
||||
return mrmr.hash64(&tag, sizeof(tag), 0)
|
||||
|
||||
cdef RichTagC create_rich_tag(features) except *:
|
||||
cdef RichTagC tag
|
||||
cdef MorphAnalysisC create_rich_tag(features) except *:
|
||||
cdef MorphAnalysisC tag
|
||||
cdef univ_morph_t feature
|
||||
memset(&tag, 0, sizeof(tag))
|
||||
for feature in features:
|
||||
set_feature(&tag, feature, 1)
|
||||
return tag
|
||||
|
||||
cdef tag_to_json(RichTagC tag):
|
||||
cdef tag_to_json(MorphAnalysisC tag):
|
||||
features = []
|
||||
if tag.abbr != 0:
|
||||
features.append(NAMES[tag.abbr])
|
||||
|
@ -360,11 +360,11 @@ cdef tag_to_json(RichTagC tag):
|
|||
features.append(NAMES[tag.verb_type])
|
||||
return features
|
||||
|
||||
cdef RichTagC tag_from_json(json_tag):
|
||||
cdef RichTagC tag
|
||||
cdef MorphAnalysisC tag_from_json(json_tag):
|
||||
cdef MorphAnalysisC tag
|
||||
return tag
|
||||
|
||||
cdef int set_feature(RichTagC* tag, univ_morph_t feature, int value) except -1:
|
||||
cdef int set_feature(MorphAnalysisC* tag, univ_morph_t feature, int value) except -1:
|
||||
if value == True:
|
||||
value_ = feature
|
||||
else:
|
||||
|
|
|
@ -74,4 +74,50 @@ cdef struct TokenC:
|
|||
hash_t ent_id
|
||||
|
||||
|
||||
cdef struct MorphAnalysisC:
|
||||
univ_pos_t pos
|
||||
|
||||
attr_t abbr
|
||||
attr_t adp_type
|
||||
attr_t adv_type
|
||||
attr_t animacy
|
||||
attr_t aspect
|
||||
attr_t case
|
||||
attr_t conj_type
|
||||
attr_t connegative
|
||||
attr_t definite
|
||||
attr_t degree
|
||||
attr_t derivation
|
||||
attr_t echo
|
||||
attr_t foreign
|
||||
attr_t gender
|
||||
attr_t hyph
|
||||
attr_t inf_form
|
||||
attr_t mood
|
||||
attr_t negative
|
||||
attr_t number
|
||||
attr_t name_type
|
||||
attr_t noun_type
|
||||
attr_t num_form
|
||||
attr_t num_type
|
||||
attr_t num_value
|
||||
attr_t part_form
|
||||
attr_t part_type
|
||||
attr_t person
|
||||
attr_t polite
|
||||
attr_t polarity
|
||||
attr_t poss
|
||||
attr_t prefix
|
||||
attr_t prep_case
|
||||
attr_t pron_type
|
||||
attr_t punct_side
|
||||
attr_t punct_type
|
||||
attr_t reflex
|
||||
attr_t style
|
||||
attr_t style_variant
|
||||
attr_t tense
|
||||
attr_t typo
|
||||
attr_t verb_form
|
||||
attr_t voice
|
||||
attr_t verb_type
|
||||
|
||||
|
|
|
@ -1,10 +1,14 @@
|
|||
from ..vocab cimport Vocab
|
||||
from ..typedefs cimport hash_t
|
||||
|
||||
|
||||
cdef class Morphanalysis:
|
||||
"""Control access to morphological features for a token."""
|
||||
def __init__(self, Vocab vocab, features=None):
|
||||
pass
|
||||
def __init__(self, Vocab vocab, features=tuple()):
|
||||
self.vocab = vocab
|
||||
self.key = self.vocab.morphology.add(features)
|
||||
analysis = <const MorphAnalysisC*>self.vocab.morphology.tags.get(self.key)
|
||||
self.c = analysis[0]
|
||||
|
||||
@classmethod
|
||||
def from_id(self, Vocab vocab, hash_t key):
|
||||
|
@ -28,6 +32,12 @@ cdef class Morphanalysis:
|
|||
def __hash__(self):
|
||||
pass
|
||||
|
||||
def get(self, name):
|
||||
pass
|
||||
|
||||
def to_json(self):
|
||||
pass
|
||||
|
||||
@property
|
||||
def is_base_form(self):
|
||||
pass
|
||||
|
@ -44,17 +54,354 @@ cdef class Morphanalysis:
|
|||
def id(self):
|
||||
pass
|
||||
|
||||
def get(self, name):
|
||||
pass
|
||||
property abbr:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
def set(self, name, value):
|
||||
pass
|
||||
property adp_type:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
def add(self, feature):
|
||||
pass
|
||||
property adv_type:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
def remove(self, feature):
|
||||
pass
|
||||
property animacy:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
def to_json(self):
|
||||
pass
|
||||
property aspect:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property case:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property conj_type:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property connegative:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property definite:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property degree:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property derivation:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property echo:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property foreign:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property gender:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property hyph:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property inf_form:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property name_type:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property negative:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property mood:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property name_type:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property negative:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property number:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property num_form:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property num_type:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property num_value:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property part_form:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property part_type:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property person:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property polite:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property polarity:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property poss:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property prefix:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property prep_case:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property pron_type:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property punct_side:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property punct_type:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property reflex:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property style:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property style_variant:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property tense:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property typo:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property verb_form:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property voice:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property verb_type:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property abbr_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property adp_type_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property adv_type_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property animacy_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property aspect_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property case_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property conj_type_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property connegative_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property definite_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property degree_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property derivation_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property echo_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property foreign_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property gender_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property hyph_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property inf_form_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property name_type_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property negative_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property mood_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property name_type_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property negative_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property number_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property num_form_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property num_type_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property num_value_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property part_form_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property part_type_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property person_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property polite_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property polarity_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property poss_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property prefix_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property prep_case_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property pron_type_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property punct_side_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property punct_type_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property reflex_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property style_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property style_variant_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property tense_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property typo_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property verb_form_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property voice_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
||||
property verb_type_:
|
||||
def __get__(self):
|
||||
pass
|
||||
|
|
Loading…
Reference in New Issue