mirror of https://github.com/explosion/spaCy.git
Expand tokenizer exceptions with unicode apostrophe (fixes #685)
This commit is contained in:
parent
e0a7b5c612
commit
5445074cbd
|
@ -13,6 +13,7 @@ from ..attrs import LANG
|
||||||
|
|
||||||
from ..language_data import update_exc
|
from ..language_data import update_exc
|
||||||
from ..language_data import strings_to_exc
|
from ..language_data import strings_to_exc
|
||||||
|
from ..language_data import expand_exc
|
||||||
from ..language_data import EMOTICONS
|
from ..language_data import EMOTICONS
|
||||||
|
|
||||||
from .language_data import ORTH_ONLY
|
from .language_data import ORTH_ONLY
|
||||||
|
@ -30,6 +31,7 @@ STOP_WORDS = set(language_data.STOP_WORDS)
|
||||||
update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(EMOTICONS))
|
update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(EMOTICONS))
|
||||||
update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(ORTH_ONLY))
|
update_exc(TOKENIZER_EXCEPTIONS, strings_to_exc(ORTH_ONLY))
|
||||||
update_exc(TOKENIZER_EXCEPTIONS, get_time_exc(range(1, 12 + 1)))
|
update_exc(TOKENIZER_EXCEPTIONS, get_time_exc(range(1, 12 + 1)))
|
||||||
|
update_exc(TOKENIZER_EXCEPTIONS, expand_exc(TOKENIZER_EXCEPTIONS, "'", "’"))
|
||||||
|
|
||||||
|
|
||||||
class English(Language):
|
class English(Language):
|
||||||
|
|
Loading…
Reference in New Issue