spaCy/spacy/morphology.pxd

756 lines
11 KiB
Cython

from cymem.cymem cimport Pool
from preshed.maps cimport PreshMapArray
from libc.stdint cimport uint64_t
from .structs cimport TokenC
from .strings cimport StringStore
from .typedefs cimport attr_t
from .parts_of_speech cimport univ_pos_t
cdef struct RichTagC:
uint64_t morph
int id
univ_pos_t pos
attr_t name
cdef struct MorphAnalysisC:
RichTagC tag
attr_t lemma
cdef class Morphology:
cdef readonly Pool mem
cdef readonly StringStore strings
cdef public object lemmatizer
cdef public object n_tags
cdef public object reverse_index
cdef public object tag_names
cdef RichTagC* rich_tags
cdef PreshMapArray _cache
cdef int assign_tag(self, TokenC* token, tag) except -1
cdef int assign_feature(self, uint64_t* morph, feature, value) except -1
#
#cpdef enum Feature_t:
# Abbr
# AdpType
# AdvType
# ConjType
# Connegative
# Derivation
# Echo
# Foreign
# Gender_dat
# Gender_erg
# Gender_psor
# Hyph
# InfForm
# NameType
# NounType
# NumberAbs
# NumberDat
# NumberErg
# NumberPsee
# NumberPsor
# NumForm
# NumValue
# PartForm
# PartType
# Person_abs
# Person_dat
# Person_psor
# Polite
# Polite_abs
# Polite_dat
# Prefix
# PrepCase
# PunctSide
# PunctType
# Style
# Typo
# Variant
# VerbType
#
#
#cpdef enum Animacy:
# Anim
# Inam
#
#
#cpdef enum Aspect:
# Freq
# Imp
# Mod
# None_
# Perf
#
#
#cpdef enum Case1:
# Nom
# Gen
# Acc
# Dat
# Voc
# Abl
#
#cdef enum Case2:
# Abe
# Abs
# Ade
# All
# Cau
# Com
# Del
# Dis
#
#cdef enum Case3:
# Ela
# Ess
# Ill
# Ine
# Ins
# Loc
# Lat
# Par
#
#cdef enum Case4:
# Sub
# Sup
# Tem
# Ter
# Tra
#
#
#cpdef enum Definite:
# Two
# Def
# Red
# Ind
#
#
#cpdef enum Degree:
# Cmp
# Comp
# None_
# Pos
# Sup
# Abs
# Com
# Degree # du
#
#
#cpdef enum Gender:
# Com
# Fem
# Masc
# Neut
#
#
#cpdef enum Mood:
# Cnd
# Imp
# Ind
# N
# Pot
# Sub
# Opt
#
#
#cpdef enum Negative:
# Neg
# Pos
# Yes
#
#
#cpdef enum Number:
# Com
# Dual
# None_
# Plur
# Sing
# Ptan # bg
# Count # bg
#
#
#cpdef enum NumType:
# Card
# Dist
# Frac
# Gen
# Mult
# None_
# Ord
# Sets
#
#
#cpdef enum Person:
# One
# Two
# Three
# None_
#
#
#cpdef enum Poss:
# Yes
#
#
#cpdef enum PronType1:
# AdvPart
# Art
# Default
# Dem
# Ind
# Int
# Neg
#
#cpdef enum PronType2:
# Prs
# Rcp
# Rel
# Tot
# Clit
# Exc # es, ca, it, fa
# Clit # it
#
#
#cpdef enum Reflex:
# Yes
#
#
#cpdef enum Tense:
# Fut
# Imp
# Past
# Pres
#
#cpdef enum VerbForm1:
# Fin
# Ger
# Inf
# None_
# Part
# PartFut
# PartPast
#
#cpdef enum VerbForm2:
# PartPres
# Sup
# Trans
# Gdv # la
#
#
#cpdef enum Voice:
# Act
# Cau
# Pass
# Mid # gkc
# Int # hb
#
#
#cpdef enum Abbr:
# Yes # cz, fi, sl, U
#
#cpdef enum AdpType:
# Prep # cz, U
# Post # U
# Voc # cz
# Comprep # cz
# Circ # U
# Voc # U
#
#
#cpdef enum AdvType1:
# # U
# Man
# Loc
# Tim
# Deg
# Cau
# Mod
# Sta
# Ex
#
#cpdef enum AdvType2:
# Adadj
#
#cpdef enum ConjType:
# Oper # cz, U
# Comp # cz, U
#
#cpdef enum Connegative:
# Yes # fi
#
#
#cpdef enum Derivation1:
# Minen # fi
# Sti # fi
# Inen # fi
# Lainen # fi
# Ja # fi
# Ton # fi
# Vs # fi
# Ttain # fi
#
#cpdef enum Derivation2:
# Ttaa
#
#
#cpdef enum Echo:
# Rdp # U
# Ech # U
#
#
#cpdef enum Foreign:
# Foreign # cz, fi, U
# Fscript # cz, fi, U
# Tscript # cz, U
# Yes # sl
#
#
#cpdef enum Gender_dat:
# Masc # bq, U
# Fem # bq, U
#
#
#cpdef enum Gender_erg:
# Masc # bq
# Fem # bq
#
#
#cpdef enum Gender_psor:
# Masc # cz, sl, U
# Fem # cz, sl, U
# Neut # sl
#
#
#cpdef enum Hyph:
# Yes # cz, U
#
#
#cpdef enum InfForm:
# One # fi
# Two # fi
# Three # fi
#
#
#cpdef enum NameType:
# Geo # U, cz
# Prs # U, cz
# Giv # U, cz
# Sur # U, cz
# Nat # U, cz
# Com # U, cz
# Pro # U, cz
# Oth # U, cz
#
#
#cpdef enum NounType:
# Com # U
# Prop # U
# Class # U
#
#cpdef enum Number_abs:
# Sing # bq, U
# Plur # bq, U
#
#cpdef enum Number_dat:
# Sing # bq, U
# Plur # bq, U
#
#cpdef enum Number_erg:
# Sing # bq, U
# Plur # bq, U
#
#cpdef enum Number_psee:
# Sing # U
# Plur # U
#
#
#cpdef enum Number_psor:
# Sing # cz, fi, sl, U
# Plur # cz, fi, sl, U
#
#
#cpdef enum NumForm:
# Digit # cz, sl, U
# Roman # cz, sl, U
# Word # cz, sl, U
#
#
#cpdef enum NumValue:
# One # cz, U
# Two # cz, U
# Three # cz, U
#
#
#cpdef enum PartForm:
# Pres # fi
# Past # fi
# Agt # fi
# Neg # fi
#
#
#cpdef enum PartType:
# Mod # U
# Emp # U
# Res # U
# Inf # U
# Vbp # U
#
#cpdef enum Person_abs:
# One # bq, U
# Two # bq, U
# Three # bq, U
#
#
#cpdef enum Person_dat:
# One # bq, U
# Two # bq, U
# Three # bq, U
#
#
#cpdef enum Person_erg:
# One # bq, U
# Two # bq, U
# Three # bq, U
#
#
#cpdef enum Person_psor:
# One # fi, U
# Two # fi, U
# Three # fi, U
#
#
#cpdef enum Polite:
# Inf # bq, U
# Pol # bq, U
#
#
#cpdef enum Polite_abs:
# Inf # bq, U
# Pol # bq, U
#
#
#cpdef enum Polite_erg:
# Inf # bq, U
# Pol # bq, U
#
#
#cpdef enum Polite_dat:
# Inf # bq, U
# Pol # bq, U
#
#
#cpdef enum Prefix:
# Yes # U
#
#
#cpdef enum PrepCase:
# Npr # cz
# Pre # U
#
#
#cpdef enum PunctSide:
# Ini # U
# Fin # U
#
#cpdef enum PunctType1:
# Peri # U
# Qest # U
# Excl # U
# Quot # U
# Brck # U
# Comm # U
# Colo # U
# Semi # U
#
#cpdef enum PunctType2:
# Dash # U
#
#
#cpdef enum Style1:
# Arch # cz, fi, U
# Rare # cz, fi, U
# Poet # cz, U
# Norm # cz, U
# Coll # cz, U
# Vrnc # cz, U
# Sing # cz, U
# Expr # cz, U
#
#
#cpdef enum Style2:
# Derg # cz, U
# Vulg # cz, U
#
#
#cpdef enum Typo:
# Yes # fi, U
#
#
#cpdef enum Variant:
# Short # cz
# Bound # cz, sl
#
#
#cpdef enum VerbType:
# Aux # U
# Cop # U
# Mod # U
# Light # U
#
cpdef enum Value_t:
Animacy_Anim
Animacy_Inam
Aspect_Freq
Aspect_Imp
Aspect_Mod
Aspect_None_
Aspect_Perf
Case_Abe
Case_Abl
Case_Abs
Case_Acc
Case_Ade
Case_All
Case_Cau
Case_Com
Case_Dat
Case_Del
Case_Dis
Case_Ela
Case_Ess
Case_Gen
Case_Ill
Case_Ine
Case_Ins
Case_Loc
Case_Lat
Case_Nom
Case_Par
Case_Sub
Case_Sup
Case_Tem
Case_Ter
Case_Tra
Case_Voc
Definite_Two
Definite_Def
Definite_Red
Definite_Ind
Degree_Cmp
Degree_Comp
Degree_None
Degree_Pos
Degree_Sup
Degree_Abs
Degree_Com
Degree_Dim # du
Gender_Com
Gender_Fem
Gender_Masc
Gender_Neut
Mood_Cnd
Mood_Imp
Mood_Ind
Mood_N
Mood_Pot
Mood_Sub
Mood_Opt
Negative_Neg
Negative_Pos
Negative_Yes
Number_Com
Number_Dual
Number_None
Number_Plur
Number_Sing
Number_Ptan # bg
Number_Count # bg
NumType_Card
NumType_Dist
NumType_Frac
NumType_Gen
NumType_Mult
NumType_None
NumType_Ord
NumType_Sets
Person_One
Person_Two
Person_Three
Person_None
Poss_Yes
PronType_AdvPart
PronType_Art
PronType_Default
PronType_Dem
PronType_Ind
PronType_Int
PronType_Neg
PronType_Prs
PronType_Rcp
PronType_Rel
PronType_Tot
PronType_Clit
PronType_Exc # es, ca, it, fa
Reflex_Yes
Tense_Fut
Tense_Imp
Tense_Past
Tense_Pres
VerbForm_Fin
VerbForm_Ger
VerbForm_Inf
VerbForm_None
VerbForm_Part
VerbForm_PartFut
VerbForm_PartPast
VerbForm_PartPres
VerbForm_Sup
VerbForm_Trans
VerbForm_Gdv # la
Voice_Act
Voice_Cau
Voice_Pass
Voice_Mid # gkc
Voice_Int # hb
Abbr_Yes # cz, fi, sl, U
AdpType_Prep # cz, U
AdpType_Post # U
AdpType_Voc # cz
AdpType_Comprep # cz
AdpType_Circ # U
AdvType_Man
AdvType_Loc
AdvType_Tim
AdvType_Deg
AdvType_Cau
AdvType_Mod
AdvType_Sta
AdvType_Ex
AdvType_Adadj
ConjType_Oper # cz, U
ConjType_Comp # cz, U
Connegative_Yes # fi
Derivation_Minen # fi
Derivation_Sti # fi
Derivation_Inen # fi
Derivation_Lainen # fi
Derivation_Ja # fi
Derivation_Ton # fi
Derivation_Vs # fi
Derivation_Ttain # fi
Derivation_Ttaa # fi
Echo_Rdp # U
Echo_Ech # U
Foreign_Foreign # cz, fi, U
Foreign_Fscript # cz, fi, U
Foreign_Tscript # cz, U
Foreign_Yes # sl
Gender_dat_Masc # bq, U
Gender_dat_Fem # bq, U
Gender_erg_Masc # bq
Gender_erg_Fem # bq
Gender_psor_Masc # cz, sl, U
Gender_psor_Fem # cz, sl, U
Gender_psor_Neut # sl
Hyph_Yes # cz, U
InfForm_One # fi
InfForm_Two # fi
InfForm_Three # fi
NameType_Geo # U, cz
NameType_Prs # U, cz
NameType_Giv # U, cz
NameType_Sur # U, cz
NameType_Nat # U, cz
NameType_Com # U, cz
NameType_Pro # U, cz
NameType_Oth # U, cz
NounType_Com # U
NounType_Prop # U
NounType_Class # U
Number_abs_Sing # bq, U
Number_abs_Plur # bq, U
Number_dat_Sing # bq, U
Number_dat_Plur # bq, U
Number_erg_Sing # bq, U
Number_erg_Plur # bq, U
Number_psee_Sing # U
Number_psee_Plur # U
Number_psor_Sing # cz, fi, sl, U
Number_psor_Plur # cz, fi, sl, U
NumForm_Digit # cz, sl, U
NumForm_Roman # cz, sl, U
NumForm_Word # cz, sl, U
NumValue_One # cz, U
NumValue_Two # cz, U
NumValue_Three # cz, U
PartForm_Pres # fi
PartForm_Past # fi
PartForm_Agt # fi
PartForm_Neg # fi
PartType_Mod # U
PartType_Emp # U
PartType_Res # U
PartType_Inf # U
PartType_Vbp # U
Person_abs_One # bq, U
Person_abs_Two # bq, U
Person_abs_Three # bq, U
Person_dat_One # bq, U
Person_dat_Two # bq, U
Person_dat_Three # bq, U
Person_erg_One # bq, U
Person_erg_Two # bq, U
Person_erg_Three # bq, U
Person_psor_One # fi, U
Person_psor_Two # fi, U
Person_psor_Three # fi, U
Polite_Inf # bq, U
Polite_Pol # bq, U
Polite_abs_Inf # bq, U
Polite_abs_Pol # bq, U
Polite_erg_Inf # bq, U
Polite_erg_Pol # bq, U
Polite_dat_Inf # bq, U
Polite_dat_Pol # bq, U
Prefix_Yes # U
PrepCase_Npr # cz
PrepCase_Pre # U
PunctSide_Ini # U
PunctSide_Fin # U
PunctType_Peri # U
PunctType_Qest # U
PunctType_Excl # U
PunctType_Quot # U
PunctType_Brck # U
PunctType_Comm # U
PunctType_Colo # U
PunctType_Semi # U
PunctType_Dash # U
Style_Arch # cz, fi, U
Style_Rare # cz, fi, U
Style_Poet # cz, U
Style_Norm # cz, U
Style_Coll # cz, U
Style_Vrnc # cz, U
Style_Sing # cz, U
Style_Expr # cz, U
Style_Derg # cz, U
Style_Vulg # cz, U
Style_Yes # fi, U
StyleVariant_StyleShort # cz
StyleVariant_StyleBound # cz, sl
VerbType_Aux # U
VerbType_Cop # U
VerbType_Mod # U
VerbType_Light # U