From ae09b6a6cf4fc47b9ea6385a9b62bc73fad1e46a Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sat, 9 Mar 2019 02:37:50 +0100 Subject: [PATCH] Try fixing unicode inconsistencies on Python 2 --- spacy/lang/fr/tokenizer_exceptions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/lang/fr/tokenizer_exceptions.py b/spacy/lang/fr/tokenizer_exceptions.py index 997a81534..b0f188278 100644 --- a/spacy/lang/fr/tokenizer_exceptions.py +++ b/spacy/lang/fr/tokenizer_exceptions.py @@ -423,5 +423,5 @@ _regular_exp.append(URL_PATTERN) TOKENIZER_EXCEPTIONS = _exc TOKEN_MATCH = re.compile( - "|".join("(?:{})".format(m) for m in _regular_exp), re.IGNORECASE + "|".join("(?:{})".format(m) for m in _regular_exp), re.IGNORECASE, re.UNICODE ).match