2016-10-14 01:24:13 +00:00
|
|
|
from numpy cimport ndarray
|
2015-07-13 17:20:48 +00:00
|
|
|
from ..vocab cimport Vocab
|
|
|
|
from ..structs cimport TokenC
|
2015-07-16 09:23:25 +00:00
|
|
|
from ..attrs cimport attr_id_t
|
2015-07-13 22:10:11 +00:00
|
|
|
from .doc cimport Doc
|
2015-07-13 17:20:48 +00:00
|
|
|
|
|
|
|
|
|
|
|
cdef class Token:
|
2016-09-21 12:54:55 +00:00
|
|
|
cdef readonly Vocab vocab
|
2016-03-11 16:31:06 +00:00
|
|
|
cdef TokenC* c
|
2015-07-13 17:20:48 +00:00
|
|
|
cdef readonly int i
|
2015-07-13 22:10:11 +00:00
|
|
|
cdef readonly Doc doc
|
2015-07-13 17:20:48 +00:00
|
|
|
|
|
|
|
@staticmethod
|
2015-07-13 22:10:11 +00:00
|
|
|
cdef inline Token cinit(Vocab vocab, const TokenC* token, int offset, Doc doc):
|
|
|
|
if offset < 0 or offset >= doc.length:
|
2015-07-13 17:20:48 +00:00
|
|
|
msg = "Attempt to access token at %d, max length %d"
|
2015-07-13 22:10:11 +00:00
|
|
|
raise IndexError(msg % (offset, doc.length))
|
|
|
|
if doc._py_tokens[offset] != None:
|
|
|
|
return doc._py_tokens[offset]
|
|
|
|
cdef Token self = Token.__new__(Token, vocab, doc, offset)
|
|
|
|
doc._py_tokens[offset] = self
|
2015-07-13 17:20:48 +00:00
|
|
|
return self
|
|
|
|
|
|
|
|
cpdef bint check_flag(self, attr_id_t flag_id) except -1
|