From 022dcda9643c3d07b0e8bfa824bf873ffd2247b4 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 26 Sep 2018 21:03:44 +0200 Subject: [PATCH] Fix morphology enum --- spacy/morphology.pxd | 319 +++++++++++++++++++++++++------------------ 1 file changed, 183 insertions(+), 136 deletions(-) diff --git a/spacy/morphology.pxd b/spacy/morphology.pxd index bc8c44417..adc5e5574 100644 --- a/spacy/morphology.pxd +++ b/spacy/morphology.pxd @@ -32,18 +32,22 @@ cdef class Morphology: cdef int _assign_tag_from_exceptions(self, TokenC* token, int tag_id) except -1 + cdef enum univ_morph_t: NIL = 0 + begin_Abbr - Abbr_yes # cz, fi, sl, U + Abbr_yes end_Abbr + begin_AdpType - AdpType_circ # U - AdpType_comprep # cz - AdpType_prep # cz, U - AdpType_post # U - AdpType_voc # cz + AdpType_circ + AdpType_comprep + AdpType_prep + AdpType_post + AdpType_voc end_AdpType + begin_AdvType AdvType_adadj AdvType_cau @@ -55,12 +59,14 @@ cdef enum univ_morph_t: AdvType_sta AdvType_tim end_AdvType + begin_Animacy - Animacy_anim = symbols.Animacy_anim + Animacy_anim Animacy_hum Animacy_inan Animacy_nhum end_Animacy + begin_Aspect Aspect_freq Aspect_imp @@ -68,6 +74,7 @@ cdef enum univ_morph_t: Aspect_none Aspect_perf end_Aspect + begin_Case Case_abe Case_abl @@ -97,6 +104,7 @@ cdef enum univ_morph_t: Case_tra Case_voc end_Case + begin_ConjType ConjType_comp # cz, U ConjType_oper # cz, U @@ -104,6 +112,7 @@ cdef enum univ_morph_t: begin_Connegative Connegative_yes # fi end_Connegative + begin_Definite Definite_cons # U20 Definite_def @@ -111,6 +120,7 @@ cdef enum univ_morph_t: Definite_red Definite_two end_Definite + begin_Degree Degree_abs Degree_cmp @@ -121,6 +131,31 @@ cdef enum univ_morph_t: Degree_com Degree_dim # du end_Degree + + begin_Derivation + Derivation_minen # fi + Derivation_sti # fi + Derivation_inen # fi + Derivation_lainen # fi + Derivation_ja # fi + Derivation_ton # fi + Derivation_vs # fi + Derivation_ttain # fi + Derivation_ttaa # fi + end_Derivation + + begin_Echo + Echo_rdp # U + Echo_ech # U + end_Echo + + begin_Foreign + Foreign_foreign # cz, fi, U + Foreign_fscript # cz, fi, U + Foreign_tscript # cz, U + Foreign_yes # sl + end_Foreign + begin_Gender Gender_com Gender_fem @@ -133,8 +168,18 @@ cdef enum univ_morph_t: Gender_psor_masc # cz, sl, U Gender_psor_fem # cz, sl, U Gender_psor_neut # sl - end_Gender + + begin_Hyph + Hyph_yes # cz, U + end_Hyph + + begin_InfForm + InfForm_one # fi + InfForm_two # fi + InfForm_three # fi + end_InfForm + begin_Mood Mood_cnd Mood_imp @@ -144,15 +189,30 @@ cdef enum univ_morph_t: Mood_sub Mood_opt end_Mood + + begin_NameType + NameType_geo # U, cz + NameType_prs # U, cz + NameType_giv # U, cz + NameType_sur # U, cz + NameType_nat # U, cz + NameType_com # U, cz + NameType_pro # U, cz + NameType_oth # U, cz + end_NameType + begin_Negative Negative_neg Negative_pos Negative_yes end_Negative - begin_Polarity - Polarity_neg # U20 - Polarity_pos # U20 - end_Polarity + + begin_NounType + NounType_com # U + NounType_prop # U + NounType_class # U + end_NounType + begin_Number Number_com Number_dual @@ -171,8 +231,14 @@ cdef enum univ_morph_t: Number_psee_plur # U Number_psor_sing # cz, fi, sl, U Number_psor_plur # cz, fi, sl, U - end_Number + + begin_NumForm + NumForm_digit # cz, sl, U + NumForm_roman # cz, sl, U + NumForm_word # cz, sl, U + end_NumForm + begin_NumType NumType_card NumType_dist @@ -183,7 +249,29 @@ cdef enum univ_morph_t: NumType_ord NumType_sets end_NumType - begin_Person + + begin_NumValue + NumValue_one # cz, U + NumValue_two # cz, U + NumValue_three # cz, U + end_NumValue + + begin_PartForm + PartForm_pres # fi + PartForm_past # fi + PartForm_agt # fi + PartForm_neg # fi + end_PartForm + + begin_PartType + PartType_mod # U + PartType_emp # U + PartType_res # U + PartType_inf # U + PartType_vbp # U + end_PartType + + begin_Person Person_one Person_two Person_three @@ -201,9 +289,36 @@ cdef enum univ_morph_t: Person_psor_two # fi, U Person_psor_three # fi, U end_Person + + begin_Polarity + Polarity_neg # U20 + Polarity_pos # U20 + end_Polarity + + begin_Polite + Polite_inf # bq, U + Polite_pol # bq, U + Polite_abs_inf # bq, U + Polite_abs_pol # bq, U + Polite_erg_inf # bq, U + Polite_erg_pol # bq, U + Polite_dat_inf # bq, U + Polite_dat_pol # bq, U + end_Polite + begin_Poss Poss_yes end_Poss + + begin_Prefix + Prefix_yes # U + end_Prefix + + begin_PrepCase + PrepCase_npr # cz + PrepCase_pre # U + end_PrepCase + begin_PronType PronType_advPart PronType_art @@ -219,15 +334,58 @@ cdef enum univ_morph_t: PronType_clit PronType_exc # es, ca, it, fa end_PronType + + begin_PunctSide + PunctSide_ini # U + PunctSide_fin # U + end_PunctSide + + begin_PunctType + PunctType_peri # U + PunctType_qest # U + PunctType_excl # U + PunctType_quot # U + PunctType_brck # U + PunctType_comm # U + PunctType_colo # U + PunctType_semi # U + PunctType_dash # U + end_PunctType + begin_Reflex Reflex_yes end_Reflex + + begin_Style + Style_arch # cz, fi, U + Style_rare # cz, fi, U + Style_poet # cz, U + Style_norm # cz, U + Style_coll # cz, U + Style_vrnc # cz, U + Style_sing # cz, U + Style_expr # cz, U + Style_derg # cz, U + Style_vulg # cz, U + Style_yes # fi, U + end_Style + + begin_StyleVariant + StyleVariant_styleShort # cz + StyleVariant_styleBound # cz, sl + end_StyleVariant + begin_Tense Tense_fut Tense_imp Tense_past Tense_pres end_Tense + + begin_Typo + Typo_yes + end_Typo + begin_VerbForm VerbForm_fin VerbForm_ger @@ -242,6 +400,14 @@ cdef enum univ_morph_t: VerbForm_conv # U20 VerbForm_gdv # la end_VerbForm + + begin_VerbType + VerbType_aux # U + VerbType_cop # U + VerbType_mod # U + VerbType_light # U + end_VerbType + begin_Voice Voice_act Voice_cau @@ -249,128 +415,7 @@ cdef enum univ_morph_t: Voice_mid # gkc Voice_int # hb end_Voice - begin_Derivation - Derivation_minen # fi - Derivation_sti # fi - Derivation_inen # fi - Derivation_lainen # fi - Derivation_ja # fi - Derivation_ton # fi - Derivation_vs # fi - Derivation_ttain # fi - Derivation_ttaa # fi - end_Derivation - begin_Echo - Echo_rdp # U - Echo_ech # U - end_Echo - begin_Foreign - Foreign_foreign # cz, fi, U - Foreign_fscript # cz, fi, U - Foreign_tscript # cz, U - Foreign_yes # sl - end_Foreign - begin_Hyph - Hyph_yes # cz, U - end_Hyph - begin_InfForm - InfForm_one # fi - InfForm_two # fi - InfForm_three # fi - end_InfForm - begin_NameType - NameType_geo # U, cz - NameType_prs # U, cz - NameType_giv # U, cz - NameType_sur # U, cz - NameType_nat # U, cz - NameType_com # U, cz - NameType_pro # U, cz - NameType_oth # U, cz - end_NameType - begin_NounType - NounType_com # U - NounType_prop # U - NounType_class # U - end_NounType - begin_NumForm - NumForm_digit # cz, sl, U - NumForm_roman # cz, sl, U - NumForm_word # cz, sl, U - end_NumForm - begin_NumValue - NumValue_one # cz, U - NumValue_two # cz, U - NumValue_three # cz, U - end_NumValue - begin_PartForm - PartForm_pres # fi - PartForm_past # fi - PartForm_agt # fi - PartForm_neg # fi - end_PartForm - begin_PartType - PartType_mod # U - PartType_emp # U - PartType_res # U - PartType_inf # U - PartType_vbp # U - end_PartType - begin_Polite - Polite_inf # bq, U - Polite_pol # bq, U - Polite_abs_inf # bq, U - Polite_abs_pol # bq, U - Polite_erg_inf # bq, U - Polite_erg_pol # bq, U - Polite_dat_inf # bq, U - Polite_dat_pol # bq, U - end_Polite - begin_Prefix - Prefix_yes # U - end_Prefix - begin_PrepCase - PrepCase_npr # cz - PrepCase_pre # U - end_PrepCase - begin_PunctSide - PunctSide_ini # U - PunctSide_fin # U - end_PunctSide - begin_PunctType - PunctType_peri # U - PunctType_qest # U - PunctType_excl # U - PunctType_quot # U - PunctType_brck # U - PunctType_comm # U - PunctType_colo # U - PunctType_semi # U - PunctType_dash # U - end_PunctType - begin_Style - Style_arch # cz, fi, U - Style_rare # cz, fi, U - Style_poet # cz, U - Style_norm # cz, U - Style_coll # cz, U - Style_vrnc # cz, U - Style_sing # cz, U - Style_expr # cz, U - Style_derg # cz, U - Style_vulg # cz, U - Style_yes # fi, U - end_Style - begin_StyleVariant - StyleVariant_styleShort # cz - StyleVariant_styleBound # cz, sl - end_StyleVariant - begin_VerbType - VerbType_aux # U - VerbType_cop # U - VerbType_mod # U - VerbType_light # U - end_VerbType + cdef struct RichTagC: univ_pos_t pos @@ -395,6 +440,7 @@ cdef struct RichTagC: univ_morph_t negative univ_morph_t number univ_morph_t name_type + univ_morph_t noun_type univ_morph_t num_form univ_morph_t num_type univ_morph_t num_value @@ -413,6 +459,7 @@ cdef struct RichTagC: univ_morph_t style univ_morph_t style_variant univ_morph_t tense + univ_morph_t typo univ_morph_t verb_form univ_morph_t voice univ_morph_t verb_type