From de772088e675eca7f06da720a7f9ac2ae2ed2e01 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 2 Feb 2015 12:17:32 +1100 Subject: [PATCH] * Use parse tree for sbd in Tokens.sents --- spacy/tokens.pyx | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/spacy/tokens.pyx b/spacy/tokens.pyx index 1463be17b..29e75941f 100644 --- a/spacy/tokens.pyx +++ b/spacy/tokens.pyx @@ -212,20 +212,22 @@ cdef class Tokens: def sents(self): """This is really only a place-holder for a proper solution.""" cdef int i - sentences = [] cdef Tokens sent = Tokens(self.vocab, self._string[self.data[0].idx:]) - cdef attr_t period = self.vocab.strings['.'] - cdef attr_t question = self.vocab.strings['?'] - cdef attr_t exclamation = self.vocab.strings['!'] + #cdef attr_t period = self.vocab.strings['.'] + #cdef attr_t question = self.vocab.strings['?'] + #cdef attr_t exclamation = self.vocab.strings['!'] spans = [] start = None for i in range(self.length): if start is None: start = i - if self.data[i].lex.orth == period or self.data[i].lex.orth == exclamation or \ - self.data[i].lex.orth == question: + if self.data[i].sent_end: spans.append((start, i+1)) start = None + #if self.data[i].lex.orth == period or self.data[i].lex.orth == exclamation or \ + # self.data[i].lex.orth == question: + # spans.append((start, i+1)) + # start = None if start is not None: spans.append((start, self.length)) return spans