mirror of https://github.com/explosion/spaCy.git
Additional abbreviation tests.
This commit is contained in:
parent
90d22db023
commit
0289b8ceaa
|
@ -13,9 +13,15 @@ def hu_tokenizer(HU):
|
||||||
return HU.tokenizer
|
return HU.tokenizer
|
||||||
|
|
||||||
|
|
||||||
def test_abbreviations(hu_tokenizer):
|
@pytest.mark.parametrize(("input_str", "expected_length"), [
|
||||||
tokens = hu_tokenizer("A vs. egy")
|
("A vs. egy", 3),
|
||||||
assert len(tokens) == 3
|
("A dr. egy", 3),
|
||||||
|
("A .hu egy tld.", 5),
|
||||||
tokens = hu_tokenizer("A dr. egy")
|
("A .hu.", 3),
|
||||||
assert len(tokens) == 3
|
("Az egy.ketto pelda.", 4),
|
||||||
|
("A pl. rovidites.", 4),
|
||||||
|
("A S.M.A.R.T. szo.", 4)
|
||||||
|
])
|
||||||
|
def test_abbreviations(hu_tokenizer, input_str, expected_length):
|
||||||
|
tokens = hu_tokenizer(input_str)
|
||||||
|
assert len(tokens) == expected_length
|
||||||
|
|
Loading…
Reference in New Issue