From 7aad6718bc461c963dc47aabd9e9b80181e775b5 Mon Sep 17 00:00:00 2001 From: Jim Geovedi Date: Mon, 24 Jul 2017 14:11:10 +0700 Subject: [PATCH] enable tokenizer exceptions --- spacy/lang/id/tokenizer_exceptions.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/spacy/lang/id/tokenizer_exceptions.py b/spacy/lang/id/tokenizer_exceptions.py index 339f5a54c..915197cd8 100644 --- a/spacy/lang/id/tokenizer_exceptions.py +++ b/spacy/lang/id/tokenizer_exceptions.py @@ -1,4 +1,11 @@ # coding: utf8 from __future__ import unicode_literals -TOKENIZER_EXCEPTIONS = {} \ No newline at end of file +from ._tokenizer_exceptions_list import FR_BASE_EXCEPTIONS + +_exc = {} + +for orth in FR_BASE_EXCEPTIONS + ["etc."]: + _exc[orth] = [{ORTH: orth}] + +TOKENIZER_EXCEPTIONS = dict(_exc) \ No newline at end of file