2014-09-11 14:57:08 +00:00
|
|
|
from spacy.lexeme cimport LexemeC
|
2014-09-15 01:22:40 +00:00
|
|
|
from libcpp.vector cimport vector
|
2014-09-11 14:57:08 +00:00
|
|
|
|
|
|
|
|
2014-10-14 04:21:03 +00:00
|
|
|
cdef struct Token:
|
|
|
|
int i
|
|
|
|
int pos
|
|
|
|
LexemeC* lex
|
|
|
|
|
|
|
|
|
2014-09-15 01:22:40 +00:00
|
|
|
cdef class Tokens:
|
2014-10-14 04:21:03 +00:00
|
|
|
cdef vector[Token] v
|
|
|
|
|
|
|
|
cdef int extend(self, int i, LexemeC** lexemes, int n) except -1
|
|
|
|
cdef int push_back(self, int i, LexemeC* lexeme) except -1
|
2014-09-10 16:11:13 +00:00
|
|
|
|
2014-10-10 08:17:22 +00:00
|
|
|
cpdef int id(self, size_t i) except -1
|
2014-10-09 21:11:31 +00:00
|
|
|
cpdef float prob(self, size_t i) except 1
|
|
|
|
cpdef int cluster(self, size_t i) except *
|
|
|
|
cpdef bint check_orth_flag(self, size_t i, size_t flag_id) except *
|
|
|
|
cpdef bint check_dist_flag(self, size_t i, size_t flag_id) except *
|
2014-09-10 16:11:13 +00:00
|
|
|
cpdef unicode string_view(self, size_t i, size_t view_id)
|
2014-09-14 23:31:44 +00:00
|
|
|
|
2014-10-14 04:21:03 +00:00
|
|
|
cpdef unicode orig(self, size_t i)
|
|
|
|
cpdef unicode norm(self, size_t i)
|
|
|
|
cpdef unicode shape(self, size_t i)
|
2014-10-09 21:11:31 +00:00
|
|
|
cpdef unicode unsparse(self, size_t i)
|
|
|
|
cpdef unicode asciied(self, size_t i)
|
2014-09-15 01:22:40 +00:00
|
|
|
cpdef bint is_alpha(self, size_t i) except *
|
|
|
|
cpdef bint is_ascii(self, size_t i) except *
|
|
|
|
cpdef bint is_digit(self, size_t i) except *
|
|
|
|
cpdef bint is_lower(self, size_t i) except *
|
|
|
|
cpdef bint is_punct(self, size_t i) except *
|
|
|
|
cpdef bint is_space(self, size_t i) except *
|
|
|
|
cpdef bint is_title(self, size_t i) except *
|
|
|
|
cpdef bint is_upper(self, size_t i) except *
|
|
|
|
cpdef bint can_adj(self, size_t i) except *
|
|
|
|
cpdef bint can_adp(self, size_t i) except *
|
|
|
|
cpdef bint can_adv(self, size_t i) except *
|
|
|
|
cpdef bint can_conj(self, size_t i) except *
|
|
|
|
cpdef bint can_det(self, size_t i) except *
|
|
|
|
cpdef bint can_noun(self, size_t i) except *
|
|
|
|
cpdef bint can_num(self, size_t i) except *
|
|
|
|
cpdef bint can_pdt(self, size_t i) except *
|
|
|
|
cpdef bint can_pos(self, size_t i) except *
|
|
|
|
cpdef bint can_pron(self, size_t i) except *
|
|
|
|
cpdef bint can_prt(self, size_t i) except *
|
|
|
|
cpdef bint can_punct(self, size_t i) except *
|
|
|
|
cpdef bint can_verb(self, size_t i) except *
|
|
|
|
cpdef bint oft_lower(self, size_t i) except *
|
|
|
|
cpdef bint oft_title(self, size_t i) except *
|
|
|
|
cpdef bint oft_upper(self, size_t i) except *
|