update tokenizertokenizer

This commit is contained in:
shuvanon 2017-03-08 17:27:12 +06:00
parent 34801a0725
commit 45bc78461c
1 changed files with 4 additions and 0 deletions

View File

@ -31,6 +31,10 @@ ABBREVIATIONS = {
{ORTH: "কি.মি", LEMMA: "কিলোমিটার"},
{ORTH: "কি.মি.", LEMMA: "কিলোমিটার"},
],
"সে.মি": [
{ORTH: "সে.মি", LEMMA: "সেন্টিমিটার"},
{ORTH: "সে.মি.", LEMMA: "সেন্টিমিটার"},
],
}
TOKENIZER_EXCEPTIONS.update(ABBREVIATIONS)