From de5aa92bc2bedf415c468b49c4bb3c15cf00a970 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sun, 8 Jan 2017 20:33:28 +0100 Subject: [PATCH] Handle deprecated tokenizer prefix data --- spacy/util.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/spacy/util.py b/spacy/util.py index afed4142e..457534302 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -94,8 +94,13 @@ def read_regex(path): def compile_prefix_regex(entries): - expression = '|'.join(['^' + re.escape(piece) for piece in entries if piece.strip()]) - return re.compile(expression) + if '(' in entries: + # Handle deprecated data + expression = '|'.join(['^' + re.escape(piece) for piece in entries if piece.strip()]) + return re.compile(expression) + else: + expression = '|'.join(['^' + piece for piece in entries if piece.strip()]) + return re.compile(expression) def compile_suffix_regex(entries):