diff --git a/spacy/structs.pxd b/spacy/structs.pxd index 4f46ff1a2..a26c87e2f 100644 --- a/spacy/structs.pxd +++ b/spacy/structs.pxd @@ -68,7 +68,7 @@ cdef struct TokenC: int sense int head int dep - bint sent_end + bint sent_start uint32_t l_kids uint32_t r_kids diff --git a/spacy/syntax/arc_eager.pyx b/spacy/syntax/arc_eager.pyx index 663ffd2cb..6808e8689 100644 --- a/spacy/syntax/arc_eager.pyx +++ b/spacy/syntax/arc_eager.pyx @@ -114,7 +114,7 @@ cdef bint _is_gold_root(const GoldParseC* gold, int word) nogil: cdef class Shift: @staticmethod cdef bint is_valid(StateClass st, int label) nogil: - return st.buffer_length() >= 2 and not st.shifted[st.B(0)] and not st.B_(0).sent_end + return st.buffer_length() >= 2 and not st.shifted[st.B(0)] and not st.B_(0).sent_start @staticmethod cdef int transition(StateClass st, int label) nogil: @@ -163,7 +163,7 @@ cdef class Reduce: cdef class LeftArc: @staticmethod cdef bint is_valid(StateClass st, int label) nogil: - return not st.B_(0).sent_end + return not st.B_(0).sent_start @staticmethod cdef int transition(StateClass st, int label) nogil: @@ -196,7 +196,7 @@ cdef class LeftArc: cdef class RightArc: @staticmethod cdef bint is_valid(StateClass st, int label) nogil: - return not st.B_(0).sent_end + return not st.B_(0).sent_start @staticmethod cdef int transition(StateClass st, int label) nogil: @@ -367,9 +367,9 @@ cdef class ArcEager(TransitionSystem): return t cdef int initialize_state(self, StateClass st) except -1: - # Ensure sent_end is set to 0 throughout + # Ensure sent_start is set to 0 throughout for i in range(st.length): - st._sent[i].sent_end = False + st._sent[i].sent_start = False st.fast_forward() cdef int finalize_state(self, StateClass st) except -1: diff --git a/spacy/syntax/stateclass.pyx b/spacy/syntax/stateclass.pyx index cbcebac11..da37ae7ae 100644 --- a/spacy/syntax/stateclass.pyx +++ b/spacy/syntax/stateclass.pyx @@ -219,7 +219,7 @@ cdef class StateClass: cdef void set_break(self, int _) nogil: if 0 <= self.B(0) < self.length: - self._sent[self.B(0)].sent_end = True + self._sent[self.B(0)].sent_start = True self._break = self._b_i cdef void clone(self, StateClass src) nogil: diff --git a/spacy/tokens.pyx b/spacy/tokens.pyx index afda93b79..55389cdde 100644 --- a/spacy/tokens.pyx +++ b/spacy/tokens.pyx @@ -186,15 +186,12 @@ cdef class Tokens: """ cdef int i cdef Tokens sent = Tokens(self.vocab, self._string[self.data[0].idx:]) - start = None - for i in range(self.length): - if start is None: + start = 0 + for i in range(1, self.length): + if self.data[i].sent_start: + yield Span(self, start, i) start = i - if self.data[i].sent_end: - yield Span(self, start, i+1) - start = None - if start is not None: - yield Span(self, start, self.length) + yield Span(self, start, self.length) cdef int push_back(self, int idx, LexemeOrToken lex_or_tok) except -1: if self.length == self.max_length: