Re-refactor Sentencizer with Pipe API (#7176)

Reapply the refactoring (#4721) so that `Sentencizer` uses the faster `predict` and `set_annotations` for both `__call__` and `pipe`.
2021-02-26 09:48:14 +01:00 · 2021-02-26 09:48:14 +01:00 · 10c930cc96
parent 592678fb7d
commit 10c930cc96
1 changed files with 2 additions and 16 deletions
--- a/spacy/pipeline/sentencizer.pyx
+++ b/spacy/pipeline/sentencizer.pyx
@ -66,26 +66,12 @@ class Sentencizer(Pipe):
        """
        error_handler = self.get_error_handler()
        try:
-            self._call(doc)
+            tags = self.predict([doc])
            self.set_annotations([doc], tags)
            return doc
        except Exception as e:
            error_handler(self.name, self, [doc], e)
    def _call(self, doc):
        start = 0
        seen_period = False
        for i, token in enumerate(doc):
            is_in_punct_chars = token.text in self.punct_chars
            token.is_sent_start = i == 0
            if seen_period and not token.is_punct and not is_in_punct_chars:
                doc[start].is_sent_start = True
                start = token.i
                seen_period = False
            elif is_in_punct_chars:
                seen_period = True
        if start < len(doc):
            doc[start].is_sent_start = True
    def predict(self, docs):
        """Apply the pipe to a batch of docs, without modifying them.