From 7bf6b7de3ecf9a4fa96ca52a8e97700a448343fb Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 10 Jun 2015 10:13:03 +0200 Subject: [PATCH] * Add unshift action to StateClass, and track which moves have been shifted --- spacy/syntax/stateclass.pxd | 5 ++++- spacy/syntax/stateclass.pyx | 19 +++++++++++++------ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/spacy/syntax/stateclass.pxd b/spacy/syntax/stateclass.pxd index be0380ecb..e94e74f0c 100644 --- a/spacy/syntax/stateclass.pxd +++ b/spacy/syntax/stateclass.pxd @@ -11,10 +11,12 @@ cdef class StateClass: cdef Pool mem cdef int* _stack cdef int* _buffer + cdef bint* shifted cdef TokenC* _sent cdef Entity* _ents cdef TokenC _empty_token cdef int length + cdef bint at_sent_end cdef int _s_i cdef int _b_i cdef int _e_i @@ -64,7 +66,6 @@ cdef class StateClass: cdef bint is_final(self) nogil cdef bint has_head(self, int i) nogil - cdef int n_L(self, int i) nogil @@ -79,6 +80,8 @@ cdef class StateClass: cdef void push(self) nogil cdef void pop(self) nogil + + cdef void unshift(self) nogil cdef void add_arc(self, int head, int child, int label) nogil diff --git a/spacy/syntax/stateclass.pyx b/spacy/syntax/stateclass.pyx index 4d7cc0fea..23098b70d 100644 --- a/spacy/syntax/stateclass.pyx +++ b/spacy/syntax/stateclass.pyx @@ -9,6 +9,7 @@ cdef class StateClass: cdef Pool mem = Pool() self._buffer = mem.alloc(length, sizeof(int)) self._stack = mem.alloc(length, sizeof(int)) + self.shifted = mem.alloc(length, sizeof(bint)) self._sent = mem.alloc(length, sizeof(TokenC)) self._ents = mem.alloc(length, sizeof(Entity)) self.mem = mem @@ -103,10 +104,10 @@ cdef class StateClass: return self._s_i <= 0 cdef bint eol(self) nogil: - return self._b_i >= self.length + return self._b_i >= self.length or self.at_sent_end cdef bint is_final(self) nogil: - return self.eol() and self.stack_depth() <= 1 + return self.stack_depth() <= 1 and self.buffer_length() == 0 cdef bint has_head(self, int i) nogil: return self.safe_get(i).head != 0 @@ -133,12 +134,18 @@ cdef class StateClass: cdef void push(self) nogil: self._stack[self._s_i] = self.B(0) + self.shifted[self.B(0)] = True self._s_i += 1 self._b_i += 1 cdef void pop(self) nogil: self._s_i -= 1 + cdef void unshift(self) nogil: + self._b_i -= 1 + self._buffer[self._b_i] = self.S(0) + self._s_i -= 1 + cdef void add_arc(self, int head, int child, int label) nogil: if self.has_head(child): self.del_arc(self.H(child), child) @@ -190,12 +197,12 @@ cdef class StateClass: def print_state(self, words): words = list(words) + ['_'] - top = words[self.S(0)] + '_%d' % self.H(self.S(0)) - second = words[self.S(1)] + '_%d' % self.H(self.S(1)) - third = words[self.S(2)] + '_%d' % self.H(self.S(2)) + top = words[self.S(0)] + '_%d' % self.S_(0).head + second = words[self.S(1)] + '_%d' % self.S_(1).head + third = words[self.S(2)] + '_%d' % self.S_(2).head n0 = words[self.B(0)] n1 = words[self.B(1)] - return ' '.join((str(self.stack_depth()), third, second, top, '|', n0, n1)) + return ' '.join((str(self.buffer_length()), str(self.stack_depth()), third, second, top, '|', n0, n1)) # From https://en.wikipedia.org/wiki/Hamming_weight