From d1f0b397b5a8cba5e59dd5448a831932055c7f45 Mon Sep 17 00:00:00 2001 From: questoph Date: Thu, 13 Feb 2020 22:18:51 +0100 Subject: [PATCH] Update punctuation.py --- spacy/lang/lb/punctuation.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/spacy/lang/lb/punctuation.py b/spacy/lang/lb/punctuation.py index 1571e13d7..2a4587856 100644 --- a/spacy/lang/lb/punctuation.py +++ b/spacy/lang/lb/punctuation.py @@ -5,11 +5,13 @@ from ..char_classes import LIST_ELLIPSES, LIST_ICONS, ALPHA, ALPHA_LOWER, ALPHA_ ELISION = " ' ’ ".strip().replace(" ", "") +abbrev = ("d", "D") + _infixes = ( LIST_ELLIPSES + LIST_ICONS + [ - r"(?<=[{a}][{el}])(?=[{a}])".format(a=ALPHA, el=ELISION), + r"(?<=^[{ab}][{el}])(?=[{a}])".format(ab=abbrev, a=ALPHA, el=ELISION), r"(?<=[{al}])\.(?=[{au}])".format(al=ALPHA_LOWER, au=ALPHA_UPPER), r"(?<=[{a}])[,!?](?=[{a}])".format(a=ALPHA), r"(?<=[{a}])[:<>=](?=[{a}])".format(a=ALPHA),