spaCy/spacy/tokens.pxd

90 lines
2.2 KiB
Cython
Raw Normal View History

2014-12-16 11:44:43 +00:00
from libc.stdint cimport uint32_t
2015-01-19 08:59:55 +00:00
from numpy cimport ndarray
cimport numpy as np
2014-12-02 12:48:05 +00:00
from cymem.cymem cimport Pool
2014-12-21 20:25:43 +00:00
from thinc.typedefs cimport atom_t
from .typedefs cimport flags_t, attr_id_t, attr_t
from .parts_of_speech cimport univ_pos_t
from .structs cimport Morphology, TokenC, LexemeC
2014-12-21 20:25:43 +00:00
from .vocab cimport Vocab
2014-12-19 20:03:26 +00:00
from .strings cimport StringStore
ctypedef const LexemeC* const_Lexeme_ptr
ctypedef TokenC* TokenC_ptr
ctypedef fused LexemeOrToken:
const_Lexeme_ptr
TokenC_ptr
cdef attr_t get_lex_attr(const LexemeC* lex, attr_id_t feat_name) nogil
2014-12-24 06:42:00 +00:00
cdef attr_t get_token_attr(const TokenC* lex, attr_id_t feat_name) nogil
cdef inline bint check_flag(const LexemeC* lexeme, attr_id_t flag_id) nogil:
2014-12-24 06:42:00 +00:00
return lexeme.flags & (1 << flag_id)
2015-07-08 16:53:00 +00:00
cdef class Doc:
cdef Pool mem
2014-12-21 20:25:43 +00:00
cdef Vocab vocab
2015-04-19 08:31:31 +00:00
cdef TokenC* data
2015-04-19 08:31:31 +00:00
2015-01-30 07:04:41 +00:00
cdef list _py_tokens
2015-01-21 07:57:09 +00:00
cdef unicode _string
cdef tuple _tag_strings
cdef public bint is_tagged
cdef public bint is_parsed
cdef int length
cdef int max_length
cdef int push_back(self, int i, LexemeOrToken lex_or_tok) except -1
cpdef np.ndarray to_array(self, object features)
2014-12-02 12:48:05 +00:00
cdef int set_parse(self, const TokenC* parsed) except -1
2015-03-13 23:21:16 +00:00
cdef class Token:
cdef Vocab vocab
cdef unicode _string
cdef const TokenC* c
2015-01-31 05:37:13 +00:00
cdef readonly int i
cdef int array_len
cdef bint _owns_c_data
2015-04-19 08:31:31 +00:00
2015-07-08 16:53:00 +00:00
cdef Doc _seq
@staticmethod
cdef inline Token cinit(Vocab vocab, unicode string,
const TokenC* token, int offset, int array_len,
2015-07-08 16:53:00 +00:00
Doc parent_seq):
if offset < 0 or offset >= array_len:
msg = "Attempt to access token at %d, max length %d"
raise IndexError(msg % (offset, array_len))
if parent_seq._py_tokens[offset] is not None:
return parent_seq._py_tokens[offset]
cdef Token self = Token.__new__(Token, vocab, string)
self.c = token
self.i = offset
self.array_len = array_len
self._seq = parent_seq
self._seq._py_tokens[offset] = self
return self
cdef int take_ownership_of_c_data(self) except -1
cpdef bint check_flag(self, attr_id_t flag_id) except -1