mirror of https://github.com/explosion/spaCy.git
Additional abbreviation tests.
This commit is contained in:
parent
90d22db023
commit
0289b8ceaa
|
@ -13,9 +13,15 @@ def hu_tokenizer(HU):
|
|||
return HU.tokenizer
|
||||
|
||||
|
||||
def test_abbreviations(hu_tokenizer):
|
||||
tokens = hu_tokenizer("A vs. egy")
|
||||
assert len(tokens) == 3
|
||||
|
||||
tokens = hu_tokenizer("A dr. egy")
|
||||
assert len(tokens) == 3
|
||||
@pytest.mark.parametrize(("input_str", "expected_length"), [
|
||||
("A vs. egy", 3),
|
||||
("A dr. egy", 3),
|
||||
("A .hu egy tld.", 5),
|
||||
("A .hu.", 3),
|
||||
("Az egy.ketto pelda.", 4),
|
||||
("A pl. rovidites.", 4),
|
||||
("A S.M.A.R.T. szo.", 4)
|
||||
])
|
||||
def test_abbreviations(hu_tokenizer, input_str, expected_length):
|
||||
tokens = hu_tokenizer(input_str)
|
||||
assert len(tokens) == expected_length
|
||||
|
|
Loading…
Reference in New Issue