added prefix & suffix rules

This commit is contained in:
Jim Geovedi 2017-07-23 23:46:40 +07:00
parent ba922e30e8
commit 0e590c711f
1 changed files with 3 additions and 0 deletions

View File

@ -2,6 +2,7 @@
from __future__ import unicode_literals
from .stop_words import STOP_WORDS
from .punctuation import TOKENIZER_SUFFIXES, TOKENIZER_PREFIXES
from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS
from .norm_exceptions import NORM_EXCEPTIONS
from .lex_attrs import LEX_ATTRS
@ -20,6 +21,8 @@ class IndonesianDefaults(Language.Defaults):
tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS)
stop_words = set(STOP_WORDS)
prefixes = tuple(TOKENIZER_PREFIXES)
suffixes = tuple(TOKENIZER_SUFFIXES)
class Indonesian(Language):