Mark most Hungarian tokenizer test cases as slow (#4883)

* Mark most Hungarian tokenizer test cases as slow

Mark most Hungarian tokenizer test cases as slow to reduce the runtime
of the test suite in ordinary usage:

* for normal tests: run default tests plus 10% of the detailed tests
* for slow tests: run all tests

* Rework to mark individual tests as slow
This commit is contained in:
adrianeboyd 2020-01-08 12:34:06 +01:00 committed by Matthew Honnibal
parent 7b96a5e10f
commit aef83e8070
1 changed files with 11 additions and 4 deletions

View File

@ -296,9 +296,8 @@ WIKI_TESTS = [
("cérium(IV)-oxid", ["cérium", "(", "IV", ")", "-oxid"]),
]
TESTCASES = (
DEFAULT_TESTS
+ DOT_TESTS
EXTRA_TESTS = (
DOT_TESTS
+ QUOTE_TESTS
+ NUMBER_TESTS
+ HYPHEN_TESTS
@ -306,8 +305,16 @@ TESTCASES = (
+ TYPO_TESTS
)
# normal: default tests + 10% of extra tests
TESTS = DEFAULT_TESTS
TESTS.extend([x for i, x in enumerate(EXTRA_TESTS) if i % 10 == 0])
@pytest.mark.parametrize("text,expected_tokens", TESTCASES)
# slow: remaining 90% of extra tests
SLOW_TESTS = [x for i, x in enumerate(EXTRA_TESTS) if i % 10 != 0]
TESTS.extend([pytest.param(x[0], x[1], marks=pytest.mark.slow()) if not isinstance(x[0], tuple) else x for x in SLOW_TESTS])
@pytest.mark.parametrize("text,expected_tokens", TESTS)
def test_hu_tokenizer_handles_testcases(hu_tokenizer, text, expected_tokens):
tokens = hu_tokenizer(text)
token_list = [token.text for token in tokens if not token.is_space]