mirror of https://github.com/explosion/spaCy.git
Use special matcher for exceptions with spaces (#6668)
Use the special cases phrase matcher for exceptions that include space characters so that exceptions including spaces are supported.
This commit is contained in:
parent
afc5714d32
commit
0041dfbc7f
|
@ -180,3 +180,9 @@ def test_tokenizer_special_cases_idx(tokenizer):
|
|||
doc = tokenizer(text)
|
||||
assert doc[1].idx == 4
|
||||
assert doc[2].idx == 7
|
||||
|
||||
|
||||
def test_tokenizer_special_cases_spaces(tokenizer):
|
||||
assert [t.text for t in tokenizer("a b c")] == ["a", "b", "c"]
|
||||
tokenizer.add_special_case("a b c", [{"ORTH": "a b c"}])
|
||||
assert [t.text for t in tokenizer("a b c")] == ["a b c"]
|
||||
|
|
|
@ -611,7 +611,7 @@ cdef class Tokenizer:
|
|||
self.mem.free(stale_special)
|
||||
self._rules[string] = substrings
|
||||
self._flush_cache()
|
||||
if self.find_prefix(string) or self.find_infix(string) or self.find_suffix(string):
|
||||
if self.find_prefix(string) or self.find_infix(string) or self.find_suffix(string) or " " in string:
|
||||
self._special_matcher.add(string, None, self._tokenize_affixes(string, False))
|
||||
|
||||
def _reload_special_cases(self):
|
||||
|
|
Loading…
Reference in New Issue