From 6fe7c7256053aea202bfef155d4034149051fc1d Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 25 Sep 2018 17:28:13 +0200 Subject: [PATCH] Reorder morphology enum, and add begin and end markers --- spacy/morphology.pxd | 190 +++++++++++++++++++++++++++++++------------ 1 file changed, 138 insertions(+), 52 deletions(-) diff --git a/spacy/morphology.pxd b/spacy/morphology.pxd index 7ba84d40c..96bba5260 100644 --- a/spacy/morphology.pxd +++ b/spacy/morphology.pxd @@ -34,15 +34,41 @@ cdef class Morphology: cdef enum univ_morph_t: NIL = 0 + begin_Abbr + Abbr_yes # cz, fi, sl, U + end_Abbr + begin_AdpType + AdpType_circ # U + AdpType_comprep # cz + AdpType_prep # cz, U + AdpType_post # U + AdpType_voc # cz + end_AdpType + begin_AdvType + AdvType_adadj + AdvType_cau + AdvType_deg + AdvType_ex + AdvType_loc + AdvType_man + AdvType_mod + AdvType_sta + AdvType_tim + end_AdvType + begin_Animacy Animacy_anim = symbols.Animacy_anim - Animacy_inan Animacy_hum + Animacy_inan Animacy_nhum + end_Animacy + begin_Aspect Aspect_freq Aspect_imp Aspect_mod Aspect_none Aspect_perf + end_Aspect + begin_Case Case_abe Case_abl Case_abs @@ -70,23 +96,46 @@ cdef enum univ_morph_t: Case_ter Case_tra Case_voc - Definite_two - Definite_def - Definite_red + end_Case + begin_ConjType + ConjType_comp # cz, U + ConjType_oper # cz, U + end_ConjType + begin_Connegative + Connegative_yes # fi + end_Connegative + begin_Definite Definite_cons # U20 + Definite_def Definite_ind + Definite_red + Definite_two + end_Definite + begin_Degree + Degree_abs Degree_cmp Degree_comp Degree_none Degree_pos Degree_sup - Degree_abs Degree_com Degree_dim # du + end_Degree + begin_Gender Gender_com Gender_fem Gender_masc Gender_neut + Gender_dat_masc # bq, U + Gender_dat_fem # bq, U + Gender_erg_masc # bq + Gender_erg_fem # bq + Gender_psor_masc # cz, sl, U + Gender_psor_fem # cz, sl, U + Gender_psor_neut # sl + + end_Gender + begin_Mood Mood_cnd Mood_imp Mood_ind @@ -94,11 +143,17 @@ cdef enum univ_morph_t: Mood_pot Mood_sub Mood_opt + end_Mood + begin_Negative Negative_neg Negative_pos Negative_yes + end_Negative + begin_Polarity Polarity_neg # U20 Polarity_pos # U20 + end_Polarity + begin_Number Number_com Number_dual Number_none @@ -106,6 +161,19 @@ cdef enum univ_morph_t: Number_sing Number_ptan # bg Number_count # bg + Number_abs_sing # bq, U + Number_abs_plur # bq, U + Number_dat_sing # bq, U + Number_dat_plur # bq, U + Number_erg_sing # bq, U + Number_erg_plur # bq, U + Number_psee_sing # U + Number_psee_plur # U + Number_psor_sing # cz, fi, sl, U + Number_psor_plur # cz, fi, sl, U + + end_Number + begin_NumType NumType_card NumType_dist NumType_frac @@ -114,11 +182,29 @@ cdef enum univ_morph_t: NumType_none NumType_ord NumType_sets + end_NumType + begin_Person Person_one Person_two Person_three Person_none + Person_abs_one # bq, U + Person_abs_two # bq, U + Person_abs_three # bq, U + Person_dat_one # bq, U + Person_dat_two # bq, U + Person_dat_three # bq, U + Person_erg_one # bq, U + Person_erg_two # bq, U + Person_erg_three # bq, U + Person_psor_one # fi, U + Person_psor_two # fi, U + Person_psor_three # fi, U + end_Person + begin_Poss Poss_yes + end_Poss + begin_PronType PronType_advPart PronType_art PronType_default @@ -132,11 +218,17 @@ cdef enum univ_morph_t: PronType_tot PronType_clit PronType_exc # es, ca, it, fa + end_PronType + begin_Reflex Reflex_yes + end_Reflex + begin_Tense Tense_fut Tense_imp Tense_past Tense_pres + end_Tense + begin_VerbForm VerbForm_fin VerbForm_ger VerbForm_inf @@ -149,29 +241,15 @@ cdef enum univ_morph_t: VerbForm_trans VerbForm_conv # U20 VerbForm_gdv # la + end_VerbForm + begin_Voice Voice_act Voice_cau Voice_pass Voice_mid # gkc Voice_int # hb - Abbr_yes # cz, fi, sl, U - AdpType_prep # cz, U - AdpType_post # U - AdpType_voc # cz - AdpType_comprep # cz - AdpType_circ # U - AdvType_man - AdvType_loc - AdvType_tim - AdvType_deg - AdvType_cau - AdvType_mod - AdvType_sta - AdvType_ex - AdvType_adadj - ConjType_oper # cz, U - ConjType_comp # cz, U - Connegative_yes # fi + end_Voice + begin_Derivation Derivation_minen # fi Derivation_sti # fi Derivation_inen # fi @@ -181,23 +259,26 @@ cdef enum univ_morph_t: Derivation_vs # fi Derivation_ttain # fi Derivation_ttaa # fi + end_Derivation + begin_Echo Echo_rdp # U Echo_ech # U + end_Echo + begin_Foreign Foreign_foreign # cz, fi, U Foreign_fscript # cz, fi, U Foreign_tscript # cz, U Foreign_yes # sl - Gender_dat_masc # bq, U - Gender_dat_fem # bq, U - Gender_erg_masc # bq - Gender_erg_fem # bq - Gender_psor_masc # cz, sl, U - Gender_psor_fem # cz, sl, U - Gender_psor_neut # sl + end_Foreign + begin_Hyph Hyph_yes # cz, U + end_Hyph + begin_InfForm InfForm_one # fi InfForm_two # fi InfForm_three # fi + end_InfForm + begin_NameType NameType_geo # U, cz NameType_prs # U, cz NameType_giv # U, cz @@ -206,46 +287,36 @@ cdef enum univ_morph_t: NameType_com # U, cz NameType_pro # U, cz NameType_oth # U, cz + end_NameType + begin_NounType NounType_com # U NounType_prop # U NounType_class # U - Number_abs_sing # bq, U - Number_abs_plur # bq, U - Number_dat_sing # bq, U - Number_dat_plur # bq, U - Number_erg_sing # bq, U - Number_erg_plur # bq, U - Number_psee_sing # U - Number_psee_plur # U - Number_psor_sing # cz, fi, sl, U - Number_psor_plur # cz, fi, sl, U + end_NounType + begin_NumForm NumForm_digit # cz, sl, U NumForm_roman # cz, sl, U NumForm_word # cz, sl, U + end_NumForm + begin_NumValue NumValue_one # cz, U NumValue_two # cz, U NumValue_three # cz, U + end_NumValue + begin_PartForm PartForm_pres # fi PartForm_past # fi PartForm_agt # fi PartForm_neg # fi + end_PartForm + begin_PartType PartType_mod # U PartType_emp # U PartType_res # U PartType_inf # U PartType_vbp # U - Person_abs_one # bq, U - Person_abs_two # bq, U - Person_abs_three # bq, U - Person_dat_one # bq, U - Person_dat_two # bq, U - Person_dat_three # bq, U - Person_erg_one # bq, U - Person_erg_two # bq, U - Person_erg_three # bq, U - Person_psor_one # fi, U - Person_psor_two # fi, U - Person_psor_three # fi, U + end_PartType + begin_Polite Polite_inf # bq, U Polite_pol # bq, U Polite_abs_inf # bq, U @@ -254,11 +325,19 @@ cdef enum univ_morph_t: Polite_erg_pol # bq, U Polite_dat_inf # bq, U Polite_dat_pol # bq, U + end_Polite + begin_Prefix Prefix_yes # U + end_Prefix + begin_PrepCase PrepCase_npr # cz PrepCase_pre # U + end_PrepCase + begin_PunctSide PunctSide_ini # U PunctSide_fin # U + end_PunctSide + begin_PunctType PunctType_peri # U PunctType_qest # U PunctType_excl # U @@ -268,6 +347,8 @@ cdef enum univ_morph_t: PunctType_colo # U PunctType_semi # U PunctType_dash # U + end_PunctType + begin_Style Style_arch # cz, fi, U Style_rare # cz, fi, U Style_poet # cz, U @@ -279,12 +360,17 @@ cdef enum univ_morph_t: Style_derg # cz, U Style_vulg # cz, U Style_yes # fi, U + end_Style + begin_StyleVariant StyleVariant_styleShort # cz StyleVariant_styleBound # cz, sl + end_StyleVariant + begin_VerbType VerbType_aux # U VerbType_cop # U VerbType_mod # U VerbType_light # U + end_VerbType cdef struct RichTagC: univ_pos_t pos