mirror of https://github.com/explosion/spaCy.git
Normalize props for morph exceptions
This commit is contained in:
parent
e585b50458
commit
2669190b85
|
@ -15,6 +15,7 @@ from .parts_of_speech import IDS as POS_IDS
|
||||||
from .lexeme cimport Lexeme
|
from .lexeme cimport Lexeme
|
||||||
from .errors import Errors
|
from .errors import Errors
|
||||||
|
|
||||||
|
|
||||||
cdef enum univ_field_t:
|
cdef enum univ_field_t:
|
||||||
Field_Abbr
|
Field_Abbr
|
||||||
Field_AdpType
|
Field_AdpType
|
||||||
|
@ -138,6 +139,7 @@ cdef class Morphology:
|
||||||
self.exc = {}
|
self.exc = {}
|
||||||
if exc is not None:
|
if exc is not None:
|
||||||
for (tag, orth), attrs in exc.items():
|
for (tag, orth), attrs in exc.items():
|
||||||
|
attrs = _normalize_props(attrs)
|
||||||
self.add_special_case(
|
self.add_special_case(
|
||||||
self.strings.as_string(tag), self.strings.as_string(orth), attrs)
|
self.strings.as_string(tag), self.strings.as_string(orth), attrs)
|
||||||
|
|
||||||
|
@ -149,11 +151,13 @@ cdef class Morphology:
|
||||||
"""Insert a morphological analysis in the morphology table, if not already
|
"""Insert a morphological analysis in the morphology table, if not already
|
||||||
present. Returns the hash of the new analysis.
|
present. Returns the hash of the new analysis.
|
||||||
"""
|
"""
|
||||||
|
for f in features:
|
||||||
|
self.strings.add(f)
|
||||||
features = intify_features(features)
|
features = intify_features(features)
|
||||||
cdef attr_t feature
|
cdef attr_t feature
|
||||||
for feature in features:
|
for feature in features:
|
||||||
if feature != 0 and feature not in FEATURE_NAMES:
|
if feature != 0 and feature not in FEATURE_NAMES:
|
||||||
raise KeyError("Unknown feature: %d" % feature)
|
raise KeyError("Unknown feature: %s" % self.strings[feature])
|
||||||
cdef MorphAnalysisC tag
|
cdef MorphAnalysisC tag
|
||||||
tag = create_rich_tag(features)
|
tag = create_rich_tag(features)
|
||||||
cdef hash_t key = self.insert(tag)
|
cdef hash_t key = self.insert(tag)
|
||||||
|
@ -263,8 +267,7 @@ cdef class Morphology:
|
||||||
token.lemma = lemma
|
token.lemma = lemma
|
||||||
token.pos = <univ_pos_t>pos
|
token.pos = <univ_pos_t>pos
|
||||||
token.tag = self.strings[tag_str]
|
token.tag = self.strings[tag_str]
|
||||||
#token.morph = self.add(features)
|
token.morph = self.add(features)
|
||||||
token.morph = 0
|
|
||||||
if (self.tag_names[tag_id], token.lex.orth) in self.exc:
|
if (self.tag_names[tag_id], token.lex.orth) in self.exc:
|
||||||
self._assign_tag_from_exceptions(token, tag_id)
|
self._assign_tag_from_exceptions(token, tag_id)
|
||||||
|
|
||||||
|
@ -412,9 +415,101 @@ cdef tag_to_json(MorphAnalysisC tag):
|
||||||
features.append(FEATURE_NAMES[tag.verb_type])
|
features.append(FEATURE_NAMES[tag.verb_type])
|
||||||
return features
|
return features
|
||||||
|
|
||||||
|
|
||||||
cdef MorphAnalysisC tag_from_json(json_tag):
|
cdef MorphAnalysisC tag_from_json(json_tag):
|
||||||
cdef MorphAnalysisC tag
|
cdef MorphAnalysisC tag
|
||||||
return tag
|
return tag
|
||||||
|
|
||||||
|
|
||||||
|
cdef int check_feature(const MorphAnalysisC* tag, attr_t feature) nogil:
|
||||||
|
if tag.abbr == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.adp_type == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.adv_type == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.animacy == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.aspect == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.case == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.conj_type == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.connegative == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.definite == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.degree == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.derivation == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.echo == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.foreign == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.gender == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.hyph == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.inf_form == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.mood == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.negative == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.number == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.name_type == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.noun_type == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.num_form == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.num_type == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.num_value == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.part_form == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.part_type == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.person == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.polite == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.polarity == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.poss == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.prefix == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.prep_case == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.pron_type == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.punct_side == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.punct_type == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.reflex == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.style == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.style_variant == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.tense == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.typo == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.verb_form == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.voice == feature:
|
||||||
|
return 1
|
||||||
|
elif tag.verb_type == feature:
|
||||||
|
return 1
|
||||||
|
else:
|
||||||
|
return 0
|
||||||
|
|
||||||
cdef int set_feature(MorphAnalysisC* tag,
|
cdef int set_feature(MorphAnalysisC* tag,
|
||||||
univ_field_t field, attr_t feature, int value) except -1:
|
univ_field_t field, attr_t feature, int value) except -1:
|
||||||
|
|
Loading…
Reference in New Issue