diff --git a/spacy/language_data/tokenizer_exceptions.py b/spacy/language_data/tokenizer_exceptions.py index 7d623cbb3..f01c2fdf5 100644 --- a/spacy/language_data/tokenizer_exceptions.py +++ b/spacy/language_data/tokenizer_exceptions.py @@ -45,6 +45,6 @@ _URL_PATTERN = ( r"$" ).strip() -TOKEN_MATCH = re.compile(_URL_PATTERN).match +TOKEN_MATCH = re.compile(_URL_PATTERN, re.UNICODE).match __all__ = ['TOKEN_MATCH']