spaCy/spacy/tests/conftest.py

# coding: utf-8
from __future__ import unicode_literals

from ..en import English
from ..de import German
from ..es import Spanish
from ..it import Italian
from ..fr import French
from ..pt import Portuguese
from ..nl import Dutch
from ..sv import Swedish
from ..hu import Hungarian
from ..tokens import Doc
from ..attrs import ORTH, TAG, HEAD, DEP

from StringIO import StringIO
import pytest


LANGUAGES = [English, German, Spanish, Italian, French, Portuguese, Dutch,
             Swedish, Hungarian]


@pytest.fixture(params=LANGUAGES)
def tokenizer(request):
    lang = request.param
    return lang.Defaults.create_tokenizer()


@pytest.fixture
def en_tokenizer():
    return English.Defaults.create_tokenizer()


@pytest.fixture
def en_vocab():
    return English.Defaults.create_vocab()


@pytest.fixture
def de_tokenizer():
    return German.Defaults.create_tokenizer()


@pytest.fixture
def hu_tokenizer():
    return Hungarian.Defaults.create_tokenizer()


@pytest.fixture
def text_file():
    return StringIO()


# deprecated, to be replaced with more specific instances
@pytest.fixture(scope="session")
def EN():
    return English()


# deprecated, to be replaced with more specific instances
@pytest.fixture(scope="session")
def DE():
    return German()


def pytest_addoption(parser):
    parser.addoption("--models", action="store_true",
        help="include tests that require full models")
    parser.addoption("--vectors", action="store_true",
        help="include word vectors tests")
    parser.addoption("--slow", action="store_true",
        help="include slow tests")


def pytest_runtest_setup(item):
    for opt in ['models', 'vectors', 'slow']:
        if opt in item.keywords and not item.config.getoption("--%s" % opt):
            pytest.skip("need --%s option to run" % opt)