spaCy/spacy/lexeme.pxd

150 lines
2.4 KiB
Cython

from .typedefs cimport hash_t, flags_t, id_t, len_t, tag_t, attr_t
from .utf8string cimport StringStore
# Reserve 64 values for flag features
cpdef enum attr_id_t:
FLAG0
FLAG1
FLAG2
FLAG3
FLAG4
FLAG5
FLAG6
FLAG7
FLAG8
FLAG9
FLAG10
FLAG11
FLAG12
FLAG13
FLAG14
FLAG15
FLAG16
FLAG17
FLAG18
FLAG19
FLAG20
FLAG21
FLAG22
FLAG23
FLAG24
FLAG25
FLAG26
FLAG27
FLAG28
FLAG29
FLAG30
FLAG31
FLAG32
FLAG33
FLAG34
FLAG35
FLAG36
FLAG37
FLAG38
FLAG39
FLAG40
FLAG41
FLAG42
FLAG43
FLAG44
FLAG45
FLAG46
FLAG47
FLAG48
FLAG49
FLAG50
FLAG51
FLAG52
FLAG53
FLAG54
FLAG55
FLAG56
FLAG57
FLAG58
FLAG59
FLAG60
FLAG61
FLAG62
FLAG63
ID
SIC
STEM
DENSE
SHAPE
ASCIIED
PREFIX
SUFFIX
LENGTH
CLUSTER
POS_TYPE
SENSE_TYPE
cdef struct Lexeme:
flags_t flags
attr_t id
attr_t sic
attr_t stem
attr_t dense
attr_t shape
attr_t asciied
attr_t prefix
attr_t suffix
attr_t length
attr_t cluster
attr_t pos_type
attr_t sense_type
float prob
float upper_pc
float title_pc
cdef Lexeme EMPTY_LEXEME
cpdef Lexeme init(id_t i, unicode string, hash_t hashed, StringStore store,
dict props) except *
cdef inline bint check_flag(const Lexeme* lexeme, attr_id_t flag_id) nogil:
return lexeme.flags & (1 << flag_id)
cdef inline attr_t get_attr(const Lexeme* lex, attr_id_t feat_name) nogil:
if feat_name < (sizeof(flags_t) * 8):
return check_flag(lex, feat_name)
elif feat_name == ID:
return lex.id
elif feat_name == SIC:
return lex.sic
elif feat_name == DENSE:
return lex.dense
elif feat_name == STEM:
return lex.stem
elif feat_name == SHAPE:
return lex.shape
elif feat_name == ASCIIED:
return lex.asciied
elif feat_name == PREFIX:
return lex.prefix
elif feat_name == SUFFIX:
return lex.suffix
elif feat_name == LENGTH:
return lex.length
elif feat_name == CLUSTER:
return lex.cluster
elif feat_name == POS_TYPE:
return lex.pos_type
elif feat_name == SENSE_TYPE:
return lex.sense_type
else:
return 0