spaCy/spacy/tests/lang/ga/test_tokenizer.py

# coding: utf8
from __future__ import unicode_literals

import pytest


GA_TOKEN_EXCEPTION_TESTS = [
    ('Niall Ó Domhnaill, Rialtas na hÉireann 1977 (lch. 600).', ['Niall', 'Ó', 'Domhnaill', ',', 'Rialtas', 'na', 'hÉireann', '1977', '('. 'lch.', '600', ')', '.']),
    ('Daoine a bhfuil Gaeilge acu, m.sh. tusa agus mise', ['Daoine', 'a', 'bhfuil', 'Gaeilge', 'acu', ',', 'm.sh.', 'tusa', 'agus', 'mise'])
]


@pytest.mark.parametrize('text,expected_tokens', GA_TOKEN_EXCEPTION_TESTS)
def test_tokenizer_handles_exception_cases(ga_tokenizer, text, expected_tokens):
    tokens = ga_tokenizer(text)
    token_list = [token.text for token in tokens if not token.is_space]
    assert expected_tokens == token_list
first stab at test 2017-09-11 07:57:48 +00:00			`# coding: utf8`
			`from __future__ import unicode_literals`

			`import pytest`


copy/paste error 2017-09-11 08:33:17 +00:00			`GA_TOKEN_EXCEPTION_TESTS = [`
Ó, not O 2017-10-31 22:54:42 +00:00			`('Niall Ó Domhnaill, Rialtas na hÉireann 1977 (lch. 600).', ['Niall', 'Ó', 'Domhnaill', ',', 'Rialtas', 'na', 'hÉireann', '1977', '('. 'lch.', '600', ')', '.']),`
first stab at test 2017-09-11 07:57:48 +00:00			`('Daoine a bhfuil Gaeilge acu, m.sh. tusa agus mise', ['Daoine', 'a', 'bhfuil', 'Gaeilge', 'acu', ',', 'm.sh.', 'tusa', 'agus', 'mise'])`
			`]`


			`@pytest.mark.parametrize('text,expected_tokens', GA_TOKEN_EXCEPTION_TESTS)`
			`def test_tokenizer_handles_exception_cases(ga_tokenizer, text, expected_tokens):`
			`tokens = ga_tokenizer(text)`
			`token_list = [token.text for token in tokens if not token.is_space]`
			`assert expected_tokens == token_list`