From ae4af52ce7dd9dda0eb0f1b8eeb0cba7d20facdf Mon Sep 17 00:00:00 2001 From: adrianeboyd Date: Wed, 8 Apr 2020 12:58:39 +0200 Subject: [PATCH] Add ideographic stops to sentencizer (#5263) Add ideographic half- and fullwidth full stops to default sentencizer punctuation. --- spacy/pipeline/pipes.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spacy/pipeline/pipes.pyx b/spacy/pipeline/pipes.pyx index a20c9b6df..f2a86d56e 100644 --- a/spacy/pipeline/pipes.pyx +++ b/spacy/pipeline/pipes.pyx @@ -1444,7 +1444,8 @@ class Sentencizer(object): '𑃁', '𑅁', '𑅂', '𑅃', '𑇅', '𑇆', '𑇍', '𑇞', '𑇟', '𑈸', '𑈹', '𑈻', '𑈼', '𑊩', '𑑋', '𑑌', '𑗂', '𑗃', '𑗉', '𑗊', '𑗋', '𑗌', '𑗍', '𑗎', '𑗏', '𑗐', '𑗑', '𑗒', '𑗓', '𑗔', '𑗕', '𑗖', '𑗗', '𑙁', '𑙂', '𑜼', '𑜽', '𑜾', '𑩂', - '𑩃', '𑪛', '𑪜', '𑱁', '𑱂', '𖩮', '𖩯', '𖫵', '𖬷', '𖬸', '𖭄', '𛲟', '𝪈'] + '𑩃', '𑪛', '𑪜', '𑱁', '𑱂', '𖩮', '𖩯', '𖫵', '𖬷', '𖬸', '𖭄', '𛲟', '𝪈', + '。', '。'] def __init__(self, punct_chars=None, **kwargs): """Initialize the sentencizer.