mirror of https://github.com/explosion/spaCy.git
Fix spacing after token_match
The boolean flag indicating a space after the token was being set incorrectly after the token_match regex was applied. Fixes #859.
This commit is contained in:
parent
cd33b39a04
commit
0a6d7ca200
|
@ -275,7 +275,10 @@ cdef class Tokenizer:
|
||||||
if cache_hit:
|
if cache_hit:
|
||||||
pass
|
pass
|
||||||
elif self.token_match and self.token_match(string):
|
elif self.token_match and self.token_match(string):
|
||||||
tokens.push_back(self.vocab.get(tokens.mem, string), not suffixes.size())
|
# We're always saying 'no' to spaces here -- the caller will
|
||||||
|
# fix up the outermost one, with reference to the original.
|
||||||
|
# See Issue #859
|
||||||
|
tokens.push_back(self.vocab.get(tokens.mem, string), False)
|
||||||
else:
|
else:
|
||||||
matches = self.find_infix(string)
|
matches = self.find_infix(string)
|
||||||
if not matches:
|
if not matches:
|
||||||
|
|
Loading…
Reference in New Issue