diff --git a/spacy/de/__init__.py b/spacy/de/__init__.py index 5b1cca578..24143eaea 100644 --- a/spacy/de/__init__.py +++ b/spacy/de/__init__.py @@ -8,9 +8,9 @@ from ..attrs import LANG from . import language_data from ..language_data import update_exc +from ..language_data import strings_to_exc from ..language_data import EMOTICONS from .language_data import ORTH_ONLY -from .language_data import strings_to_exc TOKENIZER_EXCEPTIONS = dict(language_data.TOKENIZER_EXCEPTIONS) diff --git a/spacy/de/language_data.py b/spacy/de/language_data.py index b7e25ca4a..8cadecd8f 100644 --- a/spacy/de/language_data.py +++ b/spacy/de/language_data.py @@ -2,18 +2,12 @@ from __future__ import unicode_literals from ..symbols import * +from ..language_data import PRON_LEMMA from ..language_data import TOKENIZER_PREFIXES from ..language_data import TOKENIZER_SUFFIXES from ..language_data import TOKENIZER_INFIXES -def strings_to_exc(orths): - return {orth: [{ORTH: orth}] for orth in orths} - - -PRON_LEMMA = "-PRON-" - - TAG_MAP = { "$(": {TAG: PUNCT, "PunctType": "brck"}, "$,": {TAG: PUNCT, "PunctType": "comm"}, diff --git a/spacy/en/__init__.py b/spacy/en/__init__.py index 9268b6746..2a7fbaf8c 100644 --- a/spacy/en/__init__.py +++ b/spacy/en/__init__.py @@ -12,9 +12,9 @@ from ..tokenizer import Tokenizer from ..attrs import LANG from ..language_data import update_exc +from ..language_data import strings_to_exc from ..language_data import EMOTICONS from .language_data import ORTH_ONLY -from .language_data import strings_to_exc from .language_data import get_time_exc diff --git a/spacy/en/language_data.py b/spacy/en/language_data.py index aec221de8..edf44468b 100644 --- a/spacy/en/language_data.py +++ b/spacy/en/language_data.py @@ -2,15 +2,12 @@ from __future__ import unicode_literals from ..symbols import * +from ..language_data import PRON_LEMMA from ..language_data import TOKENIZER_PREFIXES from ..language_data import TOKENIZER_SUFFIXES from ..language_data import TOKENIZER_INFIXES -def strings_to_exc(orths): - return {orth: [{ORTH: orth}] for orth in orths} - - def get_time_exc(hours): exc = {} for hour in hours: @@ -36,9 +33,6 @@ def get_time_exc(hours): return exc -PRON_LEMMA = "-PRON-" - - TAG_MAP = { ".": {POS: PUNCT, "PunctType": "peri"}, ",": {POS: PUNCT, "PunctType": "comm"}, diff --git a/spacy/es/__init__.py b/spacy/es/__init__.py index 55a952c0b..0b54e1746 100644 --- a/spacy/es/__init__.py +++ b/spacy/es/__init__.py @@ -8,9 +8,9 @@ from . import language_data from ..attrs import LANG from ..language_data import update_exc +from ..language_data import strings_to_exc from ..language_data import EMOTICONS from .language_data import ORTH_ONLY -from .language_data import strings_to_exc TOKENIZER_EXCEPTIONS = dict(language_data.TOKENIZER_EXCEPTIONS) diff --git a/spacy/es/language_data.py b/spacy/es/language_data.py index f8e144341..344adf59b 100644 --- a/spacy/es/language_data.py +++ b/spacy/es/language_data.py @@ -2,18 +2,12 @@ from __future__ import unicode_literals from ..symbols import * +from ..language_data import PRON_LEMMA from ..language_data import TOKENIZER_PREFIXES from ..language_data import TOKENIZER_SUFFIXES from ..language_data import TOKENIZER_INFIXES -def strings_to_exc(orths): - return {orth: [{ORTH: orth}] for orth in orths} - - -PRON_LEMMA = "-PRON-" - - TAG_MAP = { } diff --git a/spacy/fr/__init__.py b/spacy/fr/__init__.py index edc9b2f9c..536c925d0 100644 --- a/spacy/fr/__init__.py +++ b/spacy/fr/__init__.py @@ -8,9 +8,9 @@ from . import language_data from ..attrs import LANG from ..language_data import update_exc +from ..language_data import strings_to_exc from ..language_data import EMOTICONS from .language_data import ORTH_ONLY -from .language_data import strings_to_exc TOKENIZER_EXCEPTIONS = dict(language_data.TOKENIZER_EXCEPTIONS) diff --git a/spacy/fr/language_data.py b/spacy/fr/language_data.py index 1b8d2bc02..80419402d 100644 --- a/spacy/fr/language_data.py +++ b/spacy/fr/language_data.py @@ -2,18 +2,12 @@ from __future__ import unicode_literals from ..symbols import * +from ..language_data import PRON_LEMMA from ..language_data import TOKENIZER_PREFIXES from ..language_data import TOKENIZER_SUFFIXES from ..language_data import TOKENIZER_INFIXES -def strings_to_exc(orths): - return {orth: [{ORTH: orth}] for orth in orths} - - -PRON_LEMMA = "-PRON-" - - TAG_MAP = { } diff --git a/spacy/it/__init__.py b/spacy/it/__init__.py index 44d97ebff..49f8d6780 100644 --- a/spacy/it/__init__.py +++ b/spacy/it/__init__.py @@ -8,9 +8,9 @@ from . import language_data from ..attrs import LANG from ..language_data import update_exc +from ..language_data import strings_to_exc from ..language_data import EMOTICONS from .language_data import ORTH_ONLY -from .language_data import strings_to_exc TOKENIZER_EXCEPTIONS = dict(language_data.TOKENIZER_EXCEPTIONS) diff --git a/spacy/it/language_data.py b/spacy/it/language_data.py index 85326b8cf..162d26022 100644 --- a/spacy/it/language_data.py +++ b/spacy/it/language_data.py @@ -2,18 +2,12 @@ from __future__ import unicode_literals from ..symbols import * +from ..language_data import PRON_LEMMA from ..language_data import TOKENIZER_PREFIXES from ..language_data import TOKENIZER_SUFFIXES from ..language_data import TOKENIZER_INFIXES -def strings_to_exc(orths): - return {orth: [{ORTH: orth}] for orth in orths} - - -PRON_LEMMA = "-PRON-" - - TAG_MAP = { } diff --git a/spacy/pt/__init__.py b/spacy/pt/__init__.py index 92aba26aa..26b19831f 100644 --- a/spacy/pt/__init__.py +++ b/spacy/pt/__init__.py @@ -8,9 +8,9 @@ from . import language_data from ..attrs import LANG from ..language_data import update_exc +from ..language_data import strings_to_exc from ..language_data import EMOTICONS from .language_data import ORTH_ONLY -from .language_data import strings_to_exc TOKENIZER_EXCEPTIONS = dict(language_data.TOKENIZER_EXCEPTIONS) diff --git a/spacy/pt/language_data.py b/spacy/pt/language_data.py index add07c3cc..9ebd5e678 100644 --- a/spacy/pt/language_data.py +++ b/spacy/pt/language_data.py @@ -2,18 +2,12 @@ from __future__ import unicode_literals from ..symbols import * +from ..language_data import PRON_LEMMA from ..language_data import TOKENIZER_PREFIXES from ..language_data import TOKENIZER_SUFFIXES from ..language_data import TOKENIZER_INFIXES -def strings_to_exc(orths): - return {orth: [{ORTH: orth}] for orth in orths} - - -PRON_LEMMA = "-PRON-" - - TAG_MAP = { }