mirror of https://github.com/explosion/spaCy.git
* Add string length cap in Tokenizer.__call__
This commit is contained in:
parent
17fffb4c57
commit
3ba66f2dc7
|
@ -72,6 +72,10 @@ cdef class Tokenizer:
|
|||
Returns:
|
||||
tokens (Doc): A Doc object, giving access to a sequence of LexemeCs.
|
||||
"""
|
||||
if len(string) >= (2 ** 30):
|
||||
raise ValueError(
|
||||
"String is too long: %d characters. Max is 2**30." % len(string)
|
||||
)
|
||||
cdef int length = len(string)
|
||||
cdef Doc tokens = Doc(self.vocab)
|
||||
if length == 0:
|
||||
|
|
Loading…
Reference in New Issue