From 45bc78461c9512c2ab68b4b0785d157686a1a763 Mon Sep 17 00:00:00 2001 From: shuvanon Date: Wed, 8 Mar 2017 17:27:12 +0600 Subject: [PATCH] update tokenizertokenizer --- spacy/bn/tokenizer_exceptions.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/spacy/bn/tokenizer_exceptions.py b/spacy/bn/tokenizer_exceptions.py index 7722c9dcc..169608d0b 100644 --- a/spacy/bn/tokenizer_exceptions.py +++ b/spacy/bn/tokenizer_exceptions.py @@ -31,6 +31,10 @@ ABBREVIATIONS = { {ORTH: "কি.মি", LEMMA: "কিলোমিটার"}, {ORTH: "কি.মি.", LEMMA: "কিলোমিটার"}, ], + "সে.মি": [ + {ORTH: "সে.মি", LEMMA: "সেন্টিমিটার"}, + {ORTH: "সে.মি.", LEMMA: "সেন্টিমিটার"}, + ], } TOKENIZER_EXCEPTIONS.update(ABBREVIATIONS)