mirror of https://github.com/explosion/spaCy.git
Handle deprecated tokenizer prefix data
This commit is contained in:
parent
abb09782f9
commit
de5aa92bc2
|
@ -94,8 +94,13 @@ def read_regex(path):
|
|||
|
||||
|
||||
def compile_prefix_regex(entries):
|
||||
expression = '|'.join(['^' + re.escape(piece) for piece in entries if piece.strip()])
|
||||
return re.compile(expression)
|
||||
if '(' in entries:
|
||||
# Handle deprecated data
|
||||
expression = '|'.join(['^' + re.escape(piece) for piece in entries if piece.strip()])
|
||||
return re.compile(expression)
|
||||
else:
|
||||
expression = '|'.join(['^' + piece for piece in entries if piece.strip()])
|
||||
return re.compile(expression)
|
||||
|
||||
|
||||
def compile_suffix_regex(entries):
|
||||
|
|
Loading…
Reference in New Issue