Fix ValueError exception on empty Korean text. (#4245)

This commit is contained in:
Bae Yong-Ju 2019-09-06 17:29:40 +09:00 committed by Ines Montani
parent 232a029de6
commit a55f5a744f
2 changed files with 7 additions and 1 deletions

View File

@ -58,7 +58,8 @@ def check_spaces(text, tokens):
yield prev_end != idx yield prev_end != idx
prev_end = idx + len(token) prev_end = idx + len(token)
start = prev_end start = prev_end
yield False if start > 0:
yield False
class KoreanTokenizer(DummyTokenizer): class KoreanTokenizer(DummyTokenizer):

View File

@ -45,3 +45,8 @@ def test_ko_tokenizer_full_tags(ko_tokenizer, text, expected_tags):
def test_ko_tokenizer_pos(ko_tokenizer, text, expected_pos): def test_ko_tokenizer_pos(ko_tokenizer, text, expected_pos):
pos = [token.pos_ for token in ko_tokenizer(text)] pos = [token.pos_ for token in ko_tokenizer(text)]
assert pos == expected_pos.split() assert pos == expected_pos.split()
def test_ko_empty_doc(ko_tokenizer):
tokens = ko_tokenizer("")
assert len(tokens) == 0