mirror of https://github.com/explosion/spaCy.git
Check that patterns aren't null before compiling regex for tokenizer
This commit is contained in:
parent
5ac735df33
commit
22647c2423
|
@ -62,9 +62,18 @@ class BaseDefaults(object):
|
|||
@classmethod
|
||||
def create_tokenizer(cls, nlp=None):
|
||||
rules = cls.tokenizer_exceptions
|
||||
if cls.prefixes:
|
||||
prefix_search = util.compile_prefix_regex(cls.prefixes).search
|
||||
else:
|
||||
prefix_search = None
|
||||
if cls.suffixes:
|
||||
suffix_search = util.compile_suffix_regex(cls.suffixes).search
|
||||
else:
|
||||
suffix_search = None
|
||||
if cls.infixes:
|
||||
infix_finditer = util.compile_infix_regex(cls.infixes).finditer
|
||||
else:
|
||||
infix_finditer = None
|
||||
vocab = nlp.vocab if nlp is not None else cls.create_vocab(nlp)
|
||||
return Tokenizer(nlp.vocab, rules=rules,
|
||||
prefix_search=prefix_search, suffix_search=suffix_search,
|
||||
|
|
Loading…
Reference in New Issue