mirror of https://github.com/explosion/spaCy.git
Adding unitest for tokenization in french (with title)
This commit is contained in:
parent
ad8129cb45
commit
44cb486849
|
@ -38,3 +38,26 @@ def test_tokenizer_handles_exc_in_text_2(fr_tokenizer):
|
||||||
assert len(tokens) == 11
|
assert len(tokens) == 11
|
||||||
assert tokens[1].text == "après-midi"
|
assert tokens[1].text == "après-midi"
|
||||||
assert tokens[9].text == "italo-mexicain"
|
assert tokens[9].text == "italo-mexicain"
|
||||||
|
|
||||||
|
def test_tokenizer_handles_title(fr_tokenizer):
|
||||||
|
text = "N'est-ce pas génial?"
|
||||||
|
tokens = fr_tokenizer(text)
|
||||||
|
assert len(tokens) == 6
|
||||||
|
assert tokens[0].text == "N'"
|
||||||
|
assert tokens[0].lemma_ == "ne"
|
||||||
|
assert tokens[2].text == "-ce"
|
||||||
|
assert tokens[2].lemma_ == "ce"
|
||||||
|
|
||||||
|
def test_tokenizer_handles_title_2(fr_tokenizer):
|
||||||
|
text = "Est-ce pas génial?"
|
||||||
|
tokens = fr_tokenizer(text)
|
||||||
|
assert len(tokens) == 6
|
||||||
|
assert tokens[0].text == "Est"
|
||||||
|
assert tokens[0].lemma_ == "être"
|
||||||
|
|
||||||
|
def test_tokenizer_handles_title_2(fr_tokenizer):
|
||||||
|
text = "Qu'est-ce que tu fais?"
|
||||||
|
tokens = fr_tokenizer(text)
|
||||||
|
assert len(tokens) == 7
|
||||||
|
assert tokens[0].text == "Qu'"
|
||||||
|
assert tokens[0].lemma_ == "que"
|
Loading…
Reference in New Issue