diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx index 53653de5a..1e72fb8c9 100644 --- a/spacy/tokens/span.pyx +++ b/spacy/tokens/span.pyx @@ -164,8 +164,28 @@ cdef class Span: cdef const TokenC* start = &self.doc.c[self.start] cdef const TokenC* end = &self.doc.c[self.end] head = start + cdef int nr_iter = 0 while start <= (head + head.head) < end and head.head != 0: head += head.head + # Guard against infinite loops + if nr_iter >= (self.doc.length+1): + # Retrieve the words without getting the Python tokens, to + # avoid potential problems + try: + words = [self.doc.vocab.strings[self.doc.c[i].lex.orth] for i + in range(self.doc.length)] + except: + words = '' + try: + heads = [self.doc.c[i].head for i in range(self.doc.length)] + except: + heads = '' + raise RuntimeError( + "Invalid dependency parse, leading to potentially infinite loop. " + + "Please report this error on the issue tracker.\n" + + ("Words: %s\n" % repr(words)) + + ("Heads: %s\n" % repr(heads))) + nr_iter += 1 return self.doc[head - self.doc.c] property lefts: