diff --git a/spacy/syntax/arc_eager.pyx b/spacy/syntax/arc_eager.pyx index 07595d4ab..561308928 100644 --- a/spacy/syntax/arc_eager.pyx +++ b/spacy/syntax/arc_eager.pyx @@ -380,10 +380,17 @@ cdef class ArcEager(TransitionSystem): st.fast_forward() cdef int finalize_state(self, StateClass st) nogil: + cdef int i for i in range(st.length): # Always attach spaces to the previous word if Lexeme.c_check_flag(st._sent[i].lex, IS_SPACE): st._sent[i].head = -1 if (i >= 1) else 1 + if st._sent[i].sent_start and st._sent[i].head == -1: + st._sent[i].sent_start = False + # If we had this space token as the start of a sentence, + # move that sentence start forward one + if (i + 1) < st.length and not st._sent[i+1].sent_start: + st._sent[i+1].sent_start = True elif st._sent[i].head == 0 and st._sent[i].dep == 0: st._sent[i].dep = self.root_label # If we're not using the Break transition, we segment via root-labelled