mirror of https://github.com/explosion/spaCy.git
Move shared functions and constants to global language data
This commit is contained in:
parent
6a60a61086
commit
08162dce67
|
@ -8,9 +8,9 @@ from ..attrs import LANG
|
|||
from . import language_data
|
||||
|
||||
from ..language_data import update_exc
|
||||
from ..language_data import strings_to_exc
|
||||
from ..language_data import EMOTICONS
|
||||
from .language_data import ORTH_ONLY
|
||||
from .language_data import strings_to_exc
|
||||
|
||||
|
||||
TOKENIZER_EXCEPTIONS = dict(language_data.TOKENIZER_EXCEPTIONS)
|
||||
|
|
|
@ -2,18 +2,12 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from ..symbols import *
|
||||
from ..language_data import PRON_LEMMA
|
||||
from ..language_data import TOKENIZER_PREFIXES
|
||||
from ..language_data import TOKENIZER_SUFFIXES
|
||||
from ..language_data import TOKENIZER_INFIXES
|
||||
|
||||
|
||||
def strings_to_exc(orths):
|
||||
return {orth: [{ORTH: orth}] for orth in orths}
|
||||
|
||||
|
||||
PRON_LEMMA = "-PRON-"
|
||||
|
||||
|
||||
TAG_MAP = {
|
||||
"$(": {TAG: PUNCT, "PunctType": "brck"},
|
||||
"$,": {TAG: PUNCT, "PunctType": "comm"},
|
||||
|
|
|
@ -12,9 +12,9 @@ from ..tokenizer import Tokenizer
|
|||
from ..attrs import LANG
|
||||
|
||||
from ..language_data import update_exc
|
||||
from ..language_data import strings_to_exc
|
||||
from ..language_data import EMOTICONS
|
||||
from .language_data import ORTH_ONLY
|
||||
from .language_data import strings_to_exc
|
||||
from .language_data import get_time_exc
|
||||
|
||||
|
||||
|
|
|
@ -2,15 +2,12 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from ..symbols import *
|
||||
from ..language_data import PRON_LEMMA
|
||||
from ..language_data import TOKENIZER_PREFIXES
|
||||
from ..language_data import TOKENIZER_SUFFIXES
|
||||
from ..language_data import TOKENIZER_INFIXES
|
||||
|
||||
|
||||
def strings_to_exc(orths):
|
||||
return {orth: [{ORTH: orth}] for orth in orths}
|
||||
|
||||
|
||||
def get_time_exc(hours):
|
||||
exc = {}
|
||||
for hour in hours:
|
||||
|
@ -36,9 +33,6 @@ def get_time_exc(hours):
|
|||
return exc
|
||||
|
||||
|
||||
PRON_LEMMA = "-PRON-"
|
||||
|
||||
|
||||
TAG_MAP = {
|
||||
".": {POS: PUNCT, "PunctType": "peri"},
|
||||
",": {POS: PUNCT, "PunctType": "comm"},
|
||||
|
|
|
@ -8,9 +8,9 @@ from . import language_data
|
|||
from ..attrs import LANG
|
||||
|
||||
from ..language_data import update_exc
|
||||
from ..language_data import strings_to_exc
|
||||
from ..language_data import EMOTICONS
|
||||
from .language_data import ORTH_ONLY
|
||||
from .language_data import strings_to_exc
|
||||
|
||||
|
||||
TOKENIZER_EXCEPTIONS = dict(language_data.TOKENIZER_EXCEPTIONS)
|
||||
|
|
|
@ -2,18 +2,12 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from ..symbols import *
|
||||
from ..language_data import PRON_LEMMA
|
||||
from ..language_data import TOKENIZER_PREFIXES
|
||||
from ..language_data import TOKENIZER_SUFFIXES
|
||||
from ..language_data import TOKENIZER_INFIXES
|
||||
|
||||
|
||||
def strings_to_exc(orths):
|
||||
return {orth: [{ORTH: orth}] for orth in orths}
|
||||
|
||||
|
||||
PRON_LEMMA = "-PRON-"
|
||||
|
||||
|
||||
TAG_MAP = {
|
||||
|
||||
}
|
||||
|
|
|
@ -8,9 +8,9 @@ from . import language_data
|
|||
from ..attrs import LANG
|
||||
|
||||
from ..language_data import update_exc
|
||||
from ..language_data import strings_to_exc
|
||||
from ..language_data import EMOTICONS
|
||||
from .language_data import ORTH_ONLY
|
||||
from .language_data import strings_to_exc
|
||||
|
||||
|
||||
TOKENIZER_EXCEPTIONS = dict(language_data.TOKENIZER_EXCEPTIONS)
|
||||
|
|
|
@ -2,18 +2,12 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from ..symbols import *
|
||||
from ..language_data import PRON_LEMMA
|
||||
from ..language_data import TOKENIZER_PREFIXES
|
||||
from ..language_data import TOKENIZER_SUFFIXES
|
||||
from ..language_data import TOKENIZER_INFIXES
|
||||
|
||||
|
||||
def strings_to_exc(orths):
|
||||
return {orth: [{ORTH: orth}] for orth in orths}
|
||||
|
||||
|
||||
PRON_LEMMA = "-PRON-"
|
||||
|
||||
|
||||
TAG_MAP = {
|
||||
|
||||
}
|
||||
|
|
|
@ -8,9 +8,9 @@ from . import language_data
|
|||
from ..attrs import LANG
|
||||
|
||||
from ..language_data import update_exc
|
||||
from ..language_data import strings_to_exc
|
||||
from ..language_data import EMOTICONS
|
||||
from .language_data import ORTH_ONLY
|
||||
from .language_data import strings_to_exc
|
||||
|
||||
|
||||
TOKENIZER_EXCEPTIONS = dict(language_data.TOKENIZER_EXCEPTIONS)
|
||||
|
|
|
@ -2,18 +2,12 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from ..symbols import *
|
||||
from ..language_data import PRON_LEMMA
|
||||
from ..language_data import TOKENIZER_PREFIXES
|
||||
from ..language_data import TOKENIZER_SUFFIXES
|
||||
from ..language_data import TOKENIZER_INFIXES
|
||||
|
||||
|
||||
def strings_to_exc(orths):
|
||||
return {orth: [{ORTH: orth}] for orth in orths}
|
||||
|
||||
|
||||
PRON_LEMMA = "-PRON-"
|
||||
|
||||
|
||||
TAG_MAP = {
|
||||
|
||||
}
|
||||
|
|
|
@ -8,9 +8,9 @@ from . import language_data
|
|||
from ..attrs import LANG
|
||||
|
||||
from ..language_data import update_exc
|
||||
from ..language_data import strings_to_exc
|
||||
from ..language_data import EMOTICONS
|
||||
from .language_data import ORTH_ONLY
|
||||
from .language_data import strings_to_exc
|
||||
|
||||
|
||||
TOKENIZER_EXCEPTIONS = dict(language_data.TOKENIZER_EXCEPTIONS)
|
||||
|
|
|
@ -2,18 +2,12 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
from ..symbols import *
|
||||
from ..language_data import PRON_LEMMA
|
||||
from ..language_data import TOKENIZER_PREFIXES
|
||||
from ..language_data import TOKENIZER_SUFFIXES
|
||||
from ..language_data import TOKENIZER_INFIXES
|
||||
|
||||
|
||||
def strings_to_exc(orths):
|
||||
return {orth: [{ORTH: orth}] for orth in orths}
|
||||
|
||||
|
||||
PRON_LEMMA = "-PRON-"
|
||||
|
||||
|
||||
TAG_MAP = {
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue