diff --git a/spacy/tests/conftest.py b/spacy/tests/conftest.py index 28b5f4ab9..5f3fea342 100644 --- a/spacy/tests/conftest.py +++ b/spacy/tests/conftest.py @@ -11,8 +11,12 @@ from ..strings import StringStore from .. import util +# These languages are used for generic tokenizer tests – only add a language +# here if it's using spaCy's tokenizer (not a different library) +# TODO: re-implement generic tokenizer tests _languages = ['bn', 'da', 'de', 'en', 'es', 'fi', 'fr', 'he', 'hu', 'id', - 'it', 'nb', 'nl', 'pl', 'pt', 'sv', 'th','xx'] + 'it', 'nb', 'nl', 'pl', 'pt', 'sv', 'xx'] + _models = {'en': ['en_core_web_sm'], 'de': ['de_core_news_md'], 'fr': ['fr_depvec_web_lg'], @@ -42,6 +46,7 @@ def FR(request): #lang = util.get_lang_class(request.param) #return lang.Defaults.create_tokenizer() + @pytest.fixture def tokenizer(): return util.get_lang_class('xx').Defaults.create_tokenizer() @@ -87,10 +92,12 @@ def hu_tokenizer(): def fi_tokenizer(): return util.get_lang_class('fi').Defaults.create_tokenizer() + @pytest.fixture def id_tokenizer(): return util.get_lang_class('id').Defaults.create_tokenizer() + @pytest.fixture def sv_tokenizer(): return util.get_lang_class('sv').Defaults.create_tokenizer() @@ -105,6 +112,7 @@ def bn_tokenizer(): def he_tokenizer(): return util.get_lang_class('he').Defaults.create_tokenizer() + @pytest.fixture def nb_tokenizer(): return util.get_lang_class('nb').Defaults.create_tokenizer() @@ -129,6 +137,7 @@ def en_entityrecognizer(): def text_file(): return StringIO() + @pytest.fixture def text_file_b(): return BytesIO()