update tokenizertokenizer

This commit is contained in:
shuvanon 2017-03-08 17:27:12 +06:00
parent 34801a0725
commit 45bc78461c
1 changed files with 4 additions and 0 deletions

View File

@ -31,6 +31,10 @@ ABBREVIATIONS = {
{ORTH: "কি.মি", LEMMA: "কিলোমিটার"}, {ORTH: "কি.মি", LEMMA: "কিলোমিটার"},
{ORTH: "কি.মি.", LEMMA: "কিলোমিটার"}, {ORTH: "কি.মি.", LEMMA: "কিলোমিটার"},
], ],
"সে.মি": [
{ORTH: "সে.মি", LEMMA: "সেন্টিমিটার"},
{ORTH: "সে.মি.", LEMMA: "সেন্টিমিটার"},
],
} }
TOKENIZER_EXCEPTIONS.update(ABBREVIATIONS) TOKENIZER_EXCEPTIONS.update(ABBREVIATIONS)