mirror of https://github.com/explosion/spaCy.git
Fix ValueError exception on empty Korean text. (#4245)
This commit is contained in:
parent
232a029de6
commit
a55f5a744f
|
@ -58,7 +58,8 @@ def check_spaces(text, tokens):
|
|||
yield prev_end != idx
|
||||
prev_end = idx + len(token)
|
||||
start = prev_end
|
||||
yield False
|
||||
if start > 0:
|
||||
yield False
|
||||
|
||||
|
||||
class KoreanTokenizer(DummyTokenizer):
|
||||
|
|
|
@ -45,3 +45,8 @@ def test_ko_tokenizer_full_tags(ko_tokenizer, text, expected_tags):
|
|||
def test_ko_tokenizer_pos(ko_tokenizer, text, expected_pos):
|
||||
pos = [token.pos_ for token in ko_tokenizer(text)]
|
||||
assert pos == expected_pos.split()
|
||||
|
||||
|
||||
def test_ko_empty_doc(ko_tokenizer):
|
||||
tokens = ko_tokenizer("")
|
||||
assert len(tokens) == 0
|
||||
|
|
Loading…
Reference in New Issue