From ab7c45b12d982d596a2306d343dd3aa94595039a Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 15 Jan 2018 15:21:11 +0100 Subject: [PATCH] Fix error message and handling of doc.sents --- spacy/tokens/doc.pyx | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 4900a363d..df6a17521 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -508,13 +508,18 @@ cdef class Doc: yield from self.user_hooks['sents'](self) return - if not self.is_parsed: - raise ValueError( - "Sentence boundary detection requires the dependency " - "parse, which requires a statistical model to be " - "installed and loaded. For more info, see the " - "documentation: \n%s\n" % about.__docs_models__) cdef int i + if not self.is_parsed: + for i in range(1, self.length): + if self.c[i].sent_start != 0: + break + else: + raise ValueError( + "Sentence boundaries unset. You can add the 'sentencizer' " + "component to the pipeline with: " + "nlp.add_pipe(nlp.create_pipe('sentencizer')) " + "Alternatively, add the dependency parser, or set " + "sentence boundaries by setting doc[i].sent_start") start = 0 for i in range(1, self.length): if self.c[i].sent_start == 1: