mirror of https://github.com/explosion/spaCy.git
Adding unitest for tokenization in french (with title)
This commit is contained in:
parent
ad8129cb45
commit
44cb486849
|
@ -37,4 +37,27 @@ def test_tokenizer_handles_exc_in_text_2(fr_tokenizer):
|
|||
tokens = fr_tokenizer(text)
|
||||
assert len(tokens) == 11
|
||||
assert tokens[1].text == "après-midi"
|
||||
assert tokens[9].text == "italo-mexicain"
|
||||
assert tokens[9].text == "italo-mexicain"
|
||||
|
||||
def test_tokenizer_handles_title(fr_tokenizer):
|
||||
text = "N'est-ce pas génial?"
|
||||
tokens = fr_tokenizer(text)
|
||||
assert len(tokens) == 6
|
||||
assert tokens[0].text == "N'"
|
||||
assert tokens[0].lemma_ == "ne"
|
||||
assert tokens[2].text == "-ce"
|
||||
assert tokens[2].lemma_ == "ce"
|
||||
|
||||
def test_tokenizer_handles_title_2(fr_tokenizer):
|
||||
text = "Est-ce pas génial?"
|
||||
tokens = fr_tokenizer(text)
|
||||
assert len(tokens) == 6
|
||||
assert tokens[0].text == "Est"
|
||||
assert tokens[0].lemma_ == "être"
|
||||
|
||||
def test_tokenizer_handles_title_2(fr_tokenizer):
|
||||
text = "Qu'est-ce que tu fais?"
|
||||
tokens = fr_tokenizer(text)
|
||||
assert len(tokens) == 7
|
||||
assert tokens[0].text == "Qu'"
|
||||
assert tokens[0].lemma_ == "que"
|
Loading…
Reference in New Issue