From 0289b8ceaaa514116ca81b12cbf33729807a3ddd Mon Sep 17 00:00:00 2001 From: Gyorgy Orosz Date: Thu, 8 Dec 2016 12:17:44 +0100 Subject: [PATCH] Additional abbreviation tests. --- spacy/tests/hu/test_tokenizer.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/spacy/tests/hu/test_tokenizer.py b/spacy/tests/hu/test_tokenizer.py index f1d0124c3..4cbf1757d 100644 --- a/spacy/tests/hu/test_tokenizer.py +++ b/spacy/tests/hu/test_tokenizer.py @@ -13,9 +13,15 @@ def hu_tokenizer(HU): return HU.tokenizer -def test_abbreviations(hu_tokenizer): - tokens = hu_tokenizer("A vs. egy") - assert len(tokens) == 3 - - tokens = hu_tokenizer("A dr. egy") - assert len(tokens) == 3 +@pytest.mark.parametrize(("input_str", "expected_length"), [ + ("A vs. egy", 3), + ("A dr. egy", 3), + ("A .hu egy tld.", 5), + ("A .hu.", 3), + ("Az egy.ketto pelda.", 4), + ("A pl. rovidites.", 4), + ("A S.M.A.R.T. szo.", 4) +]) +def test_abbreviations(hu_tokenizer, input_str, expected_length): + tokens = hu_tokenizer(input_str) + assert len(tokens) == expected_length