mirror of https://github.com/explosion/spaCy.git
Fix tokenizer serialization
This commit is contained in:
parent
4a398c15b7
commit
0561df2a9d
|
@ -357,7 +357,7 @@ cdef class Tokenizer:
|
||||||
"""
|
"""
|
||||||
serializers = {
|
serializers = {
|
||||||
'vocab': lambda: self.vocab.to_bytes(),
|
'vocab': lambda: self.vocab.to_bytes(),
|
||||||
'prefix': lambda: self.prefix_search.__self__.pattern,
|
'prefix_search': lambda: self.prefix_search.__self__.pattern,
|
||||||
'suffix_search': lambda: self.suffix_search.__self__.pattern,
|
'suffix_search': lambda: self.suffix_search.__self__.pattern,
|
||||||
'infix_finditer': lambda: self.infix_finditer.__self__.pattern,
|
'infix_finditer': lambda: self.infix_finditer.__self__.pattern,
|
||||||
'token_match': lambda: self.token_match.__self__.pattern,
|
'token_match': lambda: self.token_match.__self__.pattern,
|
||||||
|
@ -375,19 +375,19 @@ cdef class Tokenizer:
|
||||||
data = {}
|
data = {}
|
||||||
deserializers = {
|
deserializers = {
|
||||||
'vocab': lambda b: self.vocab.from_bytes(b),
|
'vocab': lambda b: self.vocab.from_bytes(b),
|
||||||
'prefix': lambda b: data.setdefault('prefix', b),
|
'prefix_search': lambda b: data.setdefault('prefix', b),
|
||||||
'suffix_search': lambda b: data.setdefault('suffix_search', b),
|
'suffix_search': lambda b: data.setdefault('suffix_search', b),
|
||||||
'infix_finditer': lambda b: data.setdefault('infix_finditer', b),
|
'infix_finditer': lambda b: data.setdefault('infix_finditer', b),
|
||||||
'token_match': lambda b: data.setdefault('token_match', b),
|
'token_match': lambda b: data.setdefault('token_match', b),
|
||||||
'exceptions': lambda b: data.setdefault('rules', b)
|
'exceptions': lambda b: data.setdefault('rules', b)
|
||||||
}
|
}
|
||||||
msg = util.from_bytes(bytes_data, deserializers, exclude)
|
msg = util.from_bytes(bytes_data, deserializers, exclude)
|
||||||
if 'prefix' in data:
|
if 'prefix_search' in data:
|
||||||
self.prefix_search = re.compile(data['prefix']).search
|
self.prefix_search = re.compile(data['prefix_search']).search
|
||||||
if 'suffix' in data:
|
if 'suffix_search' in data:
|
||||||
self.suffix_search = re.compile(data['suffix']).search
|
self.suffix_search = re.compile(data['suffix_search']).search
|
||||||
if 'infix' in data:
|
if 'infix_finditer' in data:
|
||||||
self.infix_finditer = re.compile(data['infix']).finditer
|
self.infix_finditer = re.compile(data['infix_finditer']).finditer
|
||||||
if 'token_match' in data:
|
if 'token_match' in data:
|
||||||
self.token_match = re.compile(data['token_match']).search
|
self.token_match = re.compile(data['token_match']).search
|
||||||
for string, substrings in data.get('rules', {}).items():
|
for string, substrings in data.get('rules', {}).items():
|
||||||
|
|
Loading…
Reference in New Issue