spaCy/spacy/lexeme.pxd

84 lines
1.6 KiB
Cython
Raw Normal View History

2014-08-02 20:51:52 +00:00
from libc.stdint cimport uint32_t
from libc.stdint cimport uint64_t
# Put these above import to avoid circular import problem
ctypedef int ClusterID
2014-08-02 20:51:52 +00:00
ctypedef uint32_t StringHash
ctypedef size_t Lexeme_addr
ctypedef char Bits8
ctypedef uint64_t Bits64
2014-08-19 00:40:37 +00:00
cdef enum OrthFlag:
IS_ALPHA
IS_DIGIT
IS_PUNCT
IS_WHITE
IS_LOWER
IS_UPPER
IS_TITLE
IS_ASCII
cdef enum DistFlag:
OFT_UPPER
OFT_TITLE
DIST_FLAG3
DIST_FLAG4
DIST_FLAG5
DIST_FLAG6
DIST_FLAG7
DIST_FLAG8
cdef struct Orthography:
StringHash last3
StringHash shape
StringHash norm
2014-08-02 20:26:44 +00:00
size_t length
2014-08-19 00:40:37 +00:00
Py_UNICODE first
Bits8 flags
cdef struct Distribution:
double prob
ClusterID cluster
Bits64 tagdict
Bits8 flags
cdef struct Lexeme:
2014-08-19 00:40:37 +00:00
StringHash lex # Hash of the word
Orthography* orth # Extra orthographic views
2014-08-19 00:40:37 +00:00
Distribution* dist # Distribution info
2014-08-19 00:40:37 +00:00
cdef Lexeme BLANK_WORD = Lexeme(0, NULL, NULL)
2014-07-07 18:27:02 +00:00
cdef enum StringAttr:
LEX
NORM
SHAPE
LAST3
2014-08-02 20:26:44 +00:00
LENGTH
2014-07-07 18:27:02 +00:00
cpdef StringHash attr_of(size_t lex_id, StringAttr attr) except 0
cpdef StringHash lex_of(size_t lex_id) except 0
2014-08-19 00:40:37 +00:00
cpdef StringHash norm_of(size_t lex_id) except 0
2014-07-07 17:12:19 +00:00
cpdef StringHash shape_of(size_t lex_id) except 0
2014-07-07 18:27:02 +00:00
cpdef StringHash last3_of(size_t lex_id) except 0
2014-08-19 00:40:37 +00:00
cpdef size_t length_of(size_t lex_id) except *
cpdef Py_UNICODE first_of(size_t lex_id) except *
cpdef double prob_of(size_t lex_id) except 0
cpdef ClusterID cluster_of(size_t lex_id) except 0
cpdef bint check_orth_flag(size_t lex, OrthFlag flag) except *
cpdef bint check_dist_flag(size_t lex, DistFlag flag) except *