enable tokenizer exceptions

This commit is contained in:
Jim Geovedi 2017-07-24 14:11:10 +07:00
parent ad56c9179a
commit 7aad6718bc
1 changed files with 8 additions and 1 deletions

View File

@ -1,4 +1,11 @@
# coding: utf8 # coding: utf8
from __future__ import unicode_literals from __future__ import unicode_literals
TOKENIZER_EXCEPTIONS = {} from ._tokenizer_exceptions_list import FR_BASE_EXCEPTIONS
_exc = {}
for orth in FR_BASE_EXCEPTIONS + ["etc."]:
_exc[orth] = [{ORTH: orth}]
TOKENIZER_EXCEPTIONS = dict(_exc)