From 0e24d099a15dae3f96210a6bb8b93ddb9b2ea51d Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 9 Sep 2015 03:39:46 +0200 Subject: [PATCH] * Fix L/R edge bug, by ensuring l_edge and r_edge are preset, and fixing the way the edge update in del_arc. Bugs keep arising here because the edges are absolute positions, where everything else is relative. I'm also not 100% convinced that del_arc is handled correctly. Do we need to update the parents? --- spacy/syntax/stateclass.pyx | 9 ++++----- spacy/tokens/doc.pyx | 6 ++++++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/spacy/syntax/stateclass.pyx b/spacy/syntax/stateclass.pyx index 6f7951987..81c31be97 100644 --- a/spacy/syntax/stateclass.pyx +++ b/spacy/syntax/stateclass.pyx @@ -16,12 +16,11 @@ cdef class StateClass: cdef int i for i in range(length + (PADDING * 2)): self._ents[i].end = -1 + self._sent[i].l_edge = i + self._sent[i].r_edge = i for i in range(length, length + (PADDING * 2)): self._sent[i].lex = &EMPTY_LEXEME self._sent += PADDING - for i in range(length): - self._sent[i].l_edge = i - self._sent[i].r_edge = i self._ents += PADDING self._buffer += PADDING self._stack += PADDING @@ -162,11 +161,11 @@ cdef class StateClass: cdef int dist = h_i - c_i cdef TokenC* h = &self._sent[h_i] if c_i > h_i: + h.r_edge = self.R_(h_i, 2).r_edge if h.r_kids >= 2 else h_i h.r_kids -= 1 - h.r_edge = self.R_(h_i, 2).r_edge if h.r_kids >= 1 else h_i else: + h.l_edge = self.L_(h_i, 2).l_edge if h.l_kids >= 2 else h_i h.l_kids -= 1 - h.l_edge = self.L_(h_i, 2).l_edge if h.l_kids >= 1 else h_i cdef void open_ent(self, int label) nogil: self._ents[self._e_i].start = self.B(0) diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 41d24d8ac..ccde5d599 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -67,6 +67,8 @@ cdef class Doc: cdef int i for i in range(size + (PADDING*2)): data_start[i].lex = &EMPTY_LEXEME + data_start[i].l_edge = i + data_start[i].r_edge = i self.data = data_start + PADDING self.max_length = size self.length = 0 @@ -219,6 +221,8 @@ cdef class Doc: t.idx = 0 else: t.idx = (t-1).idx + (t-1).lex.length + (t-1).spacy + t.l_edge = self.length + t.r_edge = self.length assert t.lex.orth != 0 t.spacy = has_space self.length += 1 @@ -310,6 +314,8 @@ cdef class Doc: self.is_parsed = True for i in range(self.length): self.data[i] = parsed[i] + assert self.data[i].l_edge <= i + assert self.data[i].r_edge >= i def from_array(self, attrs, array): cdef int i, col