mirror of https://github.com/explosion/spaCy.git
Convert exceptions to Python list
This commit is contained in:
parent
51eb190ef4
commit
0e2e331b58
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -8,6 +8,7 @@ from ..language_data.tokenizer_exceptions import _URL_PATTERN
|
|||
from ..language_data.punctuation import ALPHA_LOWER
|
||||
|
||||
from .punctuation import ELISION, HYPHENS
|
||||
from ._tokenizer_exceptions_list import BASE_EXCEPTIONS
|
||||
|
||||
from ..symbols import *
|
||||
|
||||
|
@ -16,11 +17,13 @@ import io
|
|||
import re
|
||||
|
||||
|
||||
def iter_exceptions():
|
||||
with io.open(os.path.join(os.path.dirname(__file__), 'resources/tokenizer_exceptions'),
|
||||
'rt', encoding='utf8') as f:
|
||||
for line in f:
|
||||
yield line.strip('\n')
|
||||
def get_exceptions():
|
||||
return BASE_EXCEPTIONS
|
||||
|
||||
# with io.open(os.path.join(os.path.dirname(__file__), 'resources/tokenizer_exceptions'),
|
||||
# 'rt', encoding='utf8') as f:
|
||||
# for line in f:
|
||||
# yield line.strip('\n')
|
||||
|
||||
|
||||
def upper_first_letter(text):
|
||||
|
@ -142,7 +145,7 @@ def get_tokenizer_exceptions():
|
|||
|
||||
HYPHEN = ['-', '‐']
|
||||
|
||||
base_exceptions = list(iter_exceptions())
|
||||
base_exceptions = get_exceptions()
|
||||
infixes_exceptions = []
|
||||
|
||||
for elision_char in ELISION:
|
||||
|
|
Loading…
Reference in New Issue