2015-06-08 23:39:54 +00:00
|
|
|
from libc.string cimport memcpy, memset
|
|
|
|
|
|
|
|
from cymem.cymem cimport Pool
|
|
|
|
|
2015-06-09 21:23:28 +00:00
|
|
|
from ..structs cimport TokenC, Entity
|
2015-06-08 23:39:54 +00:00
|
|
|
|
2015-06-09 19:20:14 +00:00
|
|
|
from ..vocab cimport EMPTY_LEXEME
|
2016-02-01 00:16:14 +00:00
|
|
|
from ._state cimport StateC
|
|
|
|
|
2015-06-08 23:39:54 +00:00
|
|
|
|
|
|
|
|
|
|
|
cdef class StateClass:
|
|
|
|
cdef Pool mem
|
|
|
|
cdef int* _stack
|
|
|
|
cdef int* _buffer
|
2015-06-10 08:13:03 +00:00
|
|
|
cdef bint* shifted
|
2016-02-01 00:16:14 +00:00
|
|
|
cdef StateC* c
|
2015-06-08 23:39:54 +00:00
|
|
|
cdef TokenC* _sent
|
2015-06-09 21:23:28 +00:00
|
|
|
cdef Entity* _ents
|
2015-06-09 19:20:14 +00:00
|
|
|
cdef TokenC _empty_token
|
2015-06-08 23:39:54 +00:00
|
|
|
cdef int length
|
|
|
|
cdef int _s_i
|
|
|
|
cdef int _b_i
|
2015-06-09 21:23:28 +00:00
|
|
|
cdef int _e_i
|
2015-06-10 10:33:55 +00:00
|
|
|
cdef int _break
|
2015-06-08 23:39:54 +00:00
|
|
|
|
2015-06-10 02:20:23 +00:00
|
|
|
@staticmethod
|
|
|
|
cdef inline StateClass init(const TokenC* sent, int length):
|
|
|
|
cdef StateClass self = StateClass(length)
|
|
|
|
cdef int i
|
|
|
|
for i in range(length):
|
|
|
|
self._sent[i] = sent[i]
|
|
|
|
self._buffer[i] = i
|
2015-06-23 02:13:09 +00:00
|
|
|
for i in range(length, length + 5):
|
|
|
|
self._sent[i].lex = &EMPTY_LEXEME
|
2016-02-01 00:16:14 +00:00
|
|
|
|
|
|
|
self.c = new StateC(sent, length)
|
2015-06-10 02:20:23 +00:00
|
|
|
return self
|
|
|
|
|
2015-06-10 05:22:33 +00:00
|
|
|
cdef inline int S(self, int i) nogil:
|
2016-02-01 00:16:14 +00:00
|
|
|
self.c.S(i)
|
2015-06-10 05:22:33 +00:00
|
|
|
if i >= self._s_i:
|
|
|
|
return -1
|
|
|
|
return self._stack[self._s_i - (i+1)]
|
|
|
|
|
|
|
|
cdef inline int B(self, int i) nogil:
|
2016-02-01 00:16:14 +00:00
|
|
|
self.c.B(i)
|
2015-06-10 05:22:33 +00:00
|
|
|
if (i + self._b_i) >= self.length:
|
|
|
|
return -1
|
|
|
|
return self._buffer[self._b_i + i]
|
2015-06-28 23:06:28 +00:00
|
|
|
|
|
|
|
cdef inline const TokenC* S_(self, int i) nogil:
|
2016-02-01 00:16:14 +00:00
|
|
|
self.c.S_(i)
|
2015-06-28 23:06:28 +00:00
|
|
|
return self.safe_get(self.S(i))
|
|
|
|
|
|
|
|
cdef inline const TokenC* B_(self, int i) nogil:
|
2016-02-01 00:16:14 +00:00
|
|
|
self.c.B_(i)
|
2015-06-28 23:06:28 +00:00
|
|
|
return self.safe_get(self.B(i))
|
|
|
|
|
|
|
|
cdef inline const TokenC* H_(self, int i) nogil:
|
2016-02-01 00:16:14 +00:00
|
|
|
self.c.H_(i)
|
2015-06-28 23:06:28 +00:00
|
|
|
return self.safe_get(self.H(i))
|
|
|
|
|
|
|
|
cdef inline const TokenC* E_(self, int i) nogil:
|
2016-02-01 00:16:14 +00:00
|
|
|
self.c.E_(i)
|
2015-06-28 23:06:28 +00:00
|
|
|
return self.safe_get(self.E(i))
|
|
|
|
|
|
|
|
cdef inline const TokenC* L_(self, int i, int idx) nogil:
|
2016-02-01 00:16:14 +00:00
|
|
|
self.c.L_(i, idx)
|
2015-06-28 23:06:28 +00:00
|
|
|
return self.safe_get(self.L(i, idx))
|
|
|
|
|
|
|
|
cdef inline const TokenC* R_(self, int i, int idx) nogil:
|
2016-02-01 00:16:14 +00:00
|
|
|
self.c.R_(i, idx)
|
2015-06-28 23:06:28 +00:00
|
|
|
return self.safe_get(self.R(i, idx))
|
|
|
|
|
|
|
|
cdef inline const TokenC* safe_get(self, int i) nogil:
|
2016-02-01 00:16:14 +00:00
|
|
|
self.c.safe_get(i)
|
2015-06-28 23:06:28 +00:00
|
|
|
if i < 0 or i >= self.length:
|
|
|
|
return &self._empty_token
|
|
|
|
else:
|
|
|
|
return &self._sent[i]
|
|
|
|
|
|
|
|
cdef inline int H(self, int i) nogil:
|
2016-02-01 01:22:21 +00:00
|
|
|
return self.c.H(i)
|
2015-06-28 23:06:28 +00:00
|
|
|
if i < 0 or i >= self.length:
|
|
|
|
return -1
|
|
|
|
return self._sent[i].head + i
|
|
|
|
|
2015-06-09 21:23:28 +00:00
|
|
|
cdef int E(self, int i) nogil
|
2015-06-28 23:06:28 +00:00
|
|
|
|
|
|
|
cdef int R(self, int i, int idx) nogil
|
2015-06-08 23:39:54 +00:00
|
|
|
|
|
|
|
cdef int L(self, int i, int idx) nogil
|
|
|
|
|
2015-06-28 23:06:28 +00:00
|
|
|
cdef inline bint empty(self) nogil:
|
2016-02-01 00:16:14 +00:00
|
|
|
self.c.empty()
|
2015-06-28 23:06:28 +00:00
|
|
|
return self._s_i <= 0
|
2015-06-09 19:20:14 +00:00
|
|
|
|
2015-06-28 23:06:28 +00:00
|
|
|
cdef inline bint eol(self) nogil:
|
2016-02-01 00:16:14 +00:00
|
|
|
self.c.eol()
|
2015-06-28 23:06:28 +00:00
|
|
|
return self.buffer_length() == 0
|
2015-06-09 19:20:14 +00:00
|
|
|
|
2015-06-28 23:06:28 +00:00
|
|
|
cdef inline bint at_break(self) nogil:
|
2016-02-01 00:16:14 +00:00
|
|
|
self.c.at_break()
|
2015-06-28 23:06:28 +00:00
|
|
|
return self._break != -1
|
2015-06-09 19:20:14 +00:00
|
|
|
|
2015-06-28 23:06:28 +00:00
|
|
|
cdef inline bint is_final(self) nogil:
|
2016-02-01 00:16:14 +00:00
|
|
|
self.c.is_final()
|
2015-06-28 23:06:28 +00:00
|
|
|
return self.stack_depth() <= 0 and self._b_i >= self.length
|
2015-06-09 19:20:14 +00:00
|
|
|
|
2015-06-28 23:06:28 +00:00
|
|
|
cdef inline bint has_head(self, int i) nogil:
|
2016-02-01 01:22:21 +00:00
|
|
|
#return self.c.has_head(i)
|
2015-06-28 23:06:28 +00:00
|
|
|
return self.safe_get(i).head != 0
|
2015-06-08 23:39:54 +00:00
|
|
|
|
2015-06-28 23:06:28 +00:00
|
|
|
cdef inline int n_L(self, int i) nogil:
|
2016-02-01 00:16:14 +00:00
|
|
|
self.c.n_L(i)
|
2015-06-28 23:06:28 +00:00
|
|
|
return self.safe_get(i).l_kids
|
2015-06-08 23:39:54 +00:00
|
|
|
|
2015-06-28 23:06:28 +00:00
|
|
|
cdef inline int n_R(self, int i) nogil:
|
2016-02-01 00:16:14 +00:00
|
|
|
self.c.n_R(i)
|
2015-06-28 23:06:28 +00:00
|
|
|
return self.safe_get(i).r_kids
|
2015-06-08 23:39:54 +00:00
|
|
|
|
2015-06-28 23:06:28 +00:00
|
|
|
cdef inline bint stack_is_connected(self) nogil:
|
|
|
|
return False
|
2015-06-09 19:20:14 +00:00
|
|
|
|
2015-06-28 23:06:28 +00:00
|
|
|
cdef inline bint entity_is_open(self) nogil:
|
2016-02-01 00:16:14 +00:00
|
|
|
self.c.entity_is_open()
|
2015-06-28 23:06:28 +00:00
|
|
|
if self._e_i < 1:
|
|
|
|
return False
|
|
|
|
return self._ents[self._e_i-1].end == -1
|
2015-06-09 19:20:14 +00:00
|
|
|
|
2015-06-28 23:06:28 +00:00
|
|
|
cdef inline int stack_depth(self) nogil:
|
2016-02-01 00:16:14 +00:00
|
|
|
self.c.stack_depth()
|
2015-06-28 23:06:28 +00:00
|
|
|
return self._s_i
|
2015-06-08 23:39:54 +00:00
|
|
|
|
2015-06-28 23:06:28 +00:00
|
|
|
cdef inline int buffer_length(self) nogil:
|
2016-02-01 00:16:14 +00:00
|
|
|
self.c.buffer_length()
|
2015-06-28 23:06:28 +00:00
|
|
|
if self._break != -1:
|
|
|
|
return self._break - self._b_i
|
|
|
|
else:
|
|
|
|
return self.length - self._b_i
|
2015-06-08 23:39:54 +00:00
|
|
|
|
|
|
|
cdef void push(self) nogil
|
|
|
|
|
|
|
|
cdef void pop(self) nogil
|
2015-06-10 08:13:03 +00:00
|
|
|
|
|
|
|
cdef void unshift(self) nogil
|
2015-06-08 23:39:54 +00:00
|
|
|
|
|
|
|
cdef void add_arc(self, int head, int child, int label) nogil
|
|
|
|
|
|
|
|
cdef void del_arc(self, int head, int child) nogil
|
2015-08-05 22:35:40 +00:00
|
|
|
|
2015-06-09 21:23:28 +00:00
|
|
|
cdef void open_ent(self, int label) nogil
|
|
|
|
|
|
|
|
cdef void close_ent(self) nogil
|
|
|
|
|
|
|
|
cdef void set_ent_tag(self, int i, int ent_iob, int ent_type) nogil
|
2015-06-08 23:39:54 +00:00
|
|
|
|
2015-06-10 10:33:55 +00:00
|
|
|
cdef void set_break(self, int i) nogil
|
2015-06-08 23:39:54 +00:00
|
|
|
|
|
|
|
cdef void clone(self, StateClass src) nogil
|
2015-06-10 12:08:30 +00:00
|
|
|
|
|
|
|
cdef void fast_forward(self) nogil
|