mirror of https://github.com/explosion/spaCy.git
Fix ValueError exception on empty Korean text. (#4245)
This commit is contained in:
parent
232a029de6
commit
a55f5a744f
|
@ -58,6 +58,7 @@ def check_spaces(text, tokens):
|
||||||
yield prev_end != idx
|
yield prev_end != idx
|
||||||
prev_end = idx + len(token)
|
prev_end = idx + len(token)
|
||||||
start = prev_end
|
start = prev_end
|
||||||
|
if start > 0:
|
||||||
yield False
|
yield False
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -45,3 +45,8 @@ def test_ko_tokenizer_full_tags(ko_tokenizer, text, expected_tags):
|
||||||
def test_ko_tokenizer_pos(ko_tokenizer, text, expected_pos):
|
def test_ko_tokenizer_pos(ko_tokenizer, text, expected_pos):
|
||||||
pos = [token.pos_ for token in ko_tokenizer(text)]
|
pos = [token.pos_ for token in ko_tokenizer(text)]
|
||||||
assert pos == expected_pos.split()
|
assert pos == expected_pos.split()
|
||||||
|
|
||||||
|
|
||||||
|
def test_ko_empty_doc(ko_tokenizer):
|
||||||
|
tokens = ko_tokenizer("")
|
||||||
|
assert len(tokens) == 0
|
||||||
|
|
Loading…
Reference in New Issue