mirror of https://github.com/explosion/spaCy.git
Disable sentence segmentation in ja tokenizer (#5566)
This commit is contained in:
parent
86112d2168
commit
b7e6e1b9a7
|
@ -209,7 +209,6 @@ class JapaneseTokenizer(DummyTokenizer):
|
|||
token.lemma_ = lemma
|
||||
doc.user_data["unidic_tags"] = unidic_tags
|
||||
|
||||
separate_sentences(doc)
|
||||
return doc
|
||||
|
||||
def _get_config(self):
|
||||
|
|
|
@ -58,6 +58,7 @@ def test_ja_tokenizer_pos(ja_tokenizer, text, expected_pos):
|
|||
assert pos == expected_pos
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="sentence segmentation in tokenizer is buggy")
|
||||
@pytest.mark.parametrize("text,expected_sents", SENTENCE_TESTS)
|
||||
def test_ja_tokenizer_pos(ja_tokenizer, text, expected_sents):
|
||||
sents = [str(sent) for sent in ja_tokenizer(text).sents]
|
||||
|
|
Loading…
Reference in New Issue