From 0e590c711f0b2e610964a72a856574598ce4a810 Mon Sep 17 00:00:00 2001 From: Jim Geovedi Date: Sun, 23 Jul 2017 23:46:40 +0700 Subject: [PATCH] added prefix & suffix rules --- spacy/lang/id/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/spacy/lang/id/__init__.py b/spacy/lang/id/__init__.py index 685972006..71f42d712 100644 --- a/spacy/lang/id/__init__.py +++ b/spacy/lang/id/__init__.py @@ -2,6 +2,7 @@ from __future__ import unicode_literals from .stop_words import STOP_WORDS +from .punctuation import TOKENIZER_SUFFIXES, TOKENIZER_PREFIXES from .tokenizer_exceptions import TOKENIZER_EXCEPTIONS from .norm_exceptions import NORM_EXCEPTIONS from .lex_attrs import LEX_ATTRS @@ -20,6 +21,8 @@ class IndonesianDefaults(Language.Defaults): tokenizer_exceptions = update_exc(BASE_EXCEPTIONS, TOKENIZER_EXCEPTIONS) stop_words = set(STOP_WORDS) + prefixes = tuple(TOKENIZER_PREFIXES) + suffixes = tuple(TOKENIZER_SUFFIXES) class Indonesian(Language):