From 2169bbb7ea0e05bf7bb3a29ef143a07f6902e880 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 1 Feb 2016 01:16:14 +0100 Subject: [PATCH] * Shadow StateClass with StateC, to start proxying --- spacy/syntax/stateclass.pxd | 25 +++++++++++++++++++++++++ spacy/syntax/stateclass.pyx | 14 ++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/spacy/syntax/stateclass.pxd b/spacy/syntax/stateclass.pxd index 8a10f5a39..9fd8277f5 100644 --- a/spacy/syntax/stateclass.pxd +++ b/spacy/syntax/stateclass.pxd @@ -5,6 +5,8 @@ from cymem.cymem cimport Pool from ..structs cimport TokenC, Entity from ..vocab cimport EMPTY_LEXEME +from ._state cimport StateC + cdef class StateClass: @@ -12,6 +14,7 @@ cdef class StateClass: cdef int* _stack cdef int* _buffer cdef bint* shifted + cdef StateC* c cdef TokenC* _sent cdef Entity* _ents cdef TokenC _empty_token @@ -30,43 +33,55 @@ cdef class StateClass: self._buffer[i] = i for i in range(length, length + 5): self._sent[i].lex = &EMPTY_LEXEME + + self.c = new StateC(sent, length) return self cdef inline int S(self, int i) nogil: + self.c.S(i) if i >= self._s_i: return -1 return self._stack[self._s_i - (i+1)] cdef inline int B(self, int i) nogil: + self.c.B(i) if (i + self._b_i) >= self.length: return -1 return self._buffer[self._b_i + i] cdef inline const TokenC* S_(self, int i) nogil: + self.c.S_(i) return self.safe_get(self.S(i)) cdef inline const TokenC* B_(self, int i) nogil: + self.c.B_(i) return self.safe_get(self.B(i)) cdef inline const TokenC* H_(self, int i) nogil: + self.c.H_(i) return self.safe_get(self.H(i)) cdef inline const TokenC* E_(self, int i) nogil: + self.c.E_(i) return self.safe_get(self.E(i)) cdef inline const TokenC* L_(self, int i, int idx) nogil: + self.c.L_(i, idx) return self.safe_get(self.L(i, idx)) cdef inline const TokenC* R_(self, int i, int idx) nogil: + self.c.R_(i, idx) return self.safe_get(self.R(i, idx)) cdef inline const TokenC* safe_get(self, int i) nogil: + self.c.safe_get(i) if i < 0 or i >= self.length: return &self._empty_token else: return &self._sent[i] cdef inline int H(self, int i) nogil: + self.c.H(i) if i < 0 or i >= self.length: return -1 return self._sent[i].head + i @@ -78,38 +93,48 @@ cdef class StateClass: cdef int L(self, int i, int idx) nogil cdef inline bint empty(self) nogil: + self.c.empty() return self._s_i <= 0 cdef inline bint eol(self) nogil: + self.c.eol() return self.buffer_length() == 0 cdef inline bint at_break(self) nogil: + self.c.at_break() return self._break != -1 cdef inline bint is_final(self) nogil: + self.c.is_final() return self.stack_depth() <= 0 and self._b_i >= self.length cdef inline bint has_head(self, int i) nogil: + self.c.has_head(i) return self.safe_get(i).head != 0 cdef inline int n_L(self, int i) nogil: + self.c.n_L(i) return self.safe_get(i).l_kids cdef inline int n_R(self, int i) nogil: + self.c.n_R(i) return self.safe_get(i).r_kids cdef inline bint stack_is_connected(self) nogil: return False cdef inline bint entity_is_open(self) nogil: + self.c.entity_is_open() if self._e_i < 1: return False return self._ents[self._e_i-1].end == -1 cdef inline int stack_depth(self) nogil: + self.c.stack_depth() return self._s_i cdef inline int buffer_length(self) nogil: + self.c.buffer_length() if self._break != -1: return self._break - self._b_i else: diff --git a/spacy/syntax/stateclass.pyx b/spacy/syntax/stateclass.pyx index 1ee103f61..ec7032127 100644 --- a/spacy/syntax/stateclass.pyx +++ b/spacy/syntax/stateclass.pyx @@ -47,6 +47,7 @@ cdef class StateClass: return {self.B(i) for i in range(self._b_i)} cdef int E(self, int i) nogil: + self.c.E(i) if self._e_i <= 0 or self._e_i >= self.length: return 0 if i < 0 or i >= self._e_i: @@ -54,6 +55,7 @@ cdef class StateClass: return self._ents[self._e_i - (i+1)].start cdef int L(self, int i, int idx) nogil: + self.c.L(i, idx) if idx < 1: return -1 if i < 0 or i >= self.length: @@ -80,6 +82,7 @@ cdef class StateClass: return -1 cdef int R(self, int i, int idx) nogil: + self.c.R(i, idx) if idx < 1: return -1 if i < 0 or i >= self.length: @@ -104,6 +107,7 @@ cdef class StateClass: return -1 cdef void push(self) nogil: + self.c.push() if self.B(0) != -1: self._stack[self._s_i] = self.B(0) self._s_i += 1 @@ -112,16 +116,19 @@ cdef class StateClass: self._break = -1 cdef void pop(self) nogil: + self.c.pop() if self._s_i >= 1: self._s_i -= 1 cdef void unshift(self) nogil: + self.c.unshift() self._b_i -= 1 self._buffer[self._b_i] = self.S(0) self._s_i -= 1 self.shifted[self.B(0)] = True cdef void fast_forward(self) nogil: + self.c.fast_forward() while self.buffer_length() == 0 \ or self.stack_depth() == 0 \ or Lexeme.c_check_flag(self.S_(0).lex, IS_SPACE): @@ -144,6 +151,7 @@ cdef class StateClass: break cdef void add_arc(self, int head, int child, int label) nogil: + self.c.add_arc(head, child, label) if self.has_head(child): self.del_arc(self.H(child), child) @@ -166,6 +174,7 @@ cdef class StateClass: self._sent[head].l_edge = self._sent[child].l_edge cdef void del_arc(self, int h_i, int c_i) nogil: + self.c.del_arc(h_i, c_i) cdef int dist = h_i - c_i cdef TokenC* h = &self._sent[h_i] if c_i > h_i: @@ -176,28 +185,33 @@ cdef class StateClass: h.l_kids -= 1 cdef void open_ent(self, int label) nogil: + self.c.open_ent(label) self._ents[self._e_i].start = self.B(0) self._ents[self._e_i].label = label self._ents[self._e_i].end = -1 self._e_i += 1 cdef void close_ent(self) nogil: + self.c.close_ent() # Note that we don't decrement _e_i here! We want to maintain all # entities, not over-write them... self._ents[self._e_i-1].end = self.B(0)+1 self._sent[self.B(0)].ent_iob = 1 cdef void set_ent_tag(self, int i, int ent_iob, int ent_type) nogil: + self.c.set_ent_tag(i, ent_iob, ent_type) if 0 <= i < self.length: self._sent[i].ent_iob = ent_iob self._sent[i].ent_type = ent_type cdef void set_break(self, int _) nogil: + self.c.set_break(_) if 0 <= self.B(0) < self.length: self._sent[self.B(0)].sent_start = True self._break = self._b_i cdef void clone(self, StateClass src) nogil: + self.c.clone(src.c) memcpy(self._sent, src._sent, self.length * sizeof(TokenC)) memcpy(self._stack, src._stack, self.length * sizeof(int)) memcpy(self._buffer, src._buffer, self.length * sizeof(int))