mirror of https://github.com/explosion/spaCy.git
update tokenizertokenizer
This commit is contained in:
parent
34801a0725
commit
45bc78461c
|
@ -31,6 +31,10 @@ ABBREVIATIONS = {
|
||||||
{ORTH: "কি.মি", LEMMA: "কিলোমিটার"},
|
{ORTH: "কি.মি", LEMMA: "কিলোমিটার"},
|
||||||
{ORTH: "কি.মি.", LEMMA: "কিলোমিটার"},
|
{ORTH: "কি.মি.", LEMMA: "কিলোমিটার"},
|
||||||
],
|
],
|
||||||
|
"সে.মি": [
|
||||||
|
{ORTH: "সে.মি", LEMMA: "সেন্টিমিটার"},
|
||||||
|
{ORTH: "সে.মি.", LEMMA: "সেন্টিমিটার"},
|
||||||
|
],
|
||||||
}
|
}
|
||||||
|
|
||||||
TOKENIZER_EXCEPTIONS.update(ABBREVIATIONS)
|
TOKENIZER_EXCEPTIONS.update(ABBREVIATIONS)
|
||||||
|
|
Loading…
Reference in New Issue