added tests for Russian language

added tests of creating Russian Language instance and Russian tokenizer
This commit is contained in:
yuukos 2017-10-13 14:04:37 +07:00
parent 622b6d6270
commit a229b6e0de
1 changed files with 25 additions and 6 deletions

View File

@ -16,7 +16,7 @@ from ..bn import Bengali
from ..he import Hebrew
from ..nb import Norwegian
from ..th import Thai
from ..ru import Russian
from ..tokens import Doc
from ..strings import StringStore
@ -30,7 +30,7 @@ import pytest
# These languages get run through generic tokenizer tests
LANGUAGES = [English, German, Spanish, Italian, French, Portuguese, Dutch,
Swedish, Hungarian, Finnish, Bengali, Norwegian]
Swedish, Hungarian, Finnish, Bengali, Norwegian, Russian]
@pytest.fixture(params=LANGUAGES)
@ -53,6 +53,7 @@ def en_vocab():
def en_parser():
return English.Defaults.create_parser()
@pytest.fixture
def es_tokenizer():
return Spanish.Defaults.create_tokenizer()
@ -83,11 +84,13 @@ def ja_tokenizer():
pytest.importorskip("MeCab")
return Japanese.Defaults.create_tokenizer()
@pytest.fixture
def japanese():
pytest.importorskip("MeCab")
return Japanese()
@pytest.fixture
def sv_tokenizer():
return Swedish.Defaults.create_tokenizer()
@ -102,15 +105,30 @@ def bn_tokenizer():
def he_tokenizer():
return Hebrew.Defaults.create_tokenizer()
@pytest.fixture
def nb_tokenizer():
return Norwegian.Defaults.create_tokenizer()
@pytest.fixture
def th_tokenizer():
pythainlp = pytest.importorskip("pythainlp")
return Thai.Defaults.create_tokenizer()
@pytest.fixture
def ru_tokenizer():
pytest.importorskip("pymorphy2")
return Russian.Defaults.create_tokenizer()
@pytest.fixture
def russian():
pytest.importorskip("pymorphy2")
return Russian()
@pytest.fixture
def stringstore():
return StringStore()
@ -118,7 +136,7 @@ def stringstore():
@pytest.fixture
def en_entityrecognizer():
return English.Defaults.create_entity()
return English.Defaults.create_entity()
@pytest.fixture
@ -130,6 +148,7 @@ def lemmatizer():
def text_file():
return StringIO()
@pytest.fixture
def text_file_b():
return BytesIO()
@ -149,11 +168,11 @@ def DE():
def pytest_addoption(parser):
parser.addoption("--models", action="store_true",
help="include tests that require full models")
help="include tests that require full models")
parser.addoption("--vectors", action="store_true",
help="include word vectors tests")
help="include word vectors tests")
parser.addoption("--slow", action="store_true",
help="include slow tests")
help="include slow tests")
def pytest_runtest_setup(item):