add model sanity test

2016-05-03 12:51:47 +02:00 · 2016-05-03 12:51:47 +02:00 · 1786331cd8
parent 1f1532142f
commit 1786331cd8
3 changed files with 68 additions and 8 deletions
--- a/spacy/tests/conftest.py
+++ b/spacy/tests/conftest.py
@ -1,17 +1,15 @@
 from spacy.en import English
 import pytest
 import os
 import spacy
@pytest.fixture(scope="session")
 def EN():
-    if os.environ.get('SPACY_DATA'):
+    return spacy.load("en")
-        data_dir = os.environ.get('SPACY_DATA')
+
-    else:
+@pytest.fixture(score="session")
-        data_dir = None
+def DE():
-    print("Load EN from %s" % data_dir)
+    return spacy.load("de")
    return English(data_dir=data_dir)
 def pytest_addoption(parser):
--- a/spacy/tests/integration/init.py
+++ b/spacy/tests/integration/init.py
--- a/spacy/tests/integration/test_model_sanity.py
+++ b/spacy/tests/integration/test_model_sanity.py
@ -0,0 +1,62 @@
 # -*- coding: utf-8 -*-
 import pytest
 import numpy
@pytest.mark.models
 class TestModelSanity:
 	"""
 	This is to make sure the model works as expected. The tests make sure that values are properly set.
 	Tests are not meant to evaluate the content of the output, only make sure the output is formally okay.
 	"""
 	@pytest.fixture(scope='class', params=['en','de'])
 	def example(self, request, EN, DE):
 		if request.param == 'en':
 			return EN(u'There was a stranger standing at the big street talking to herself.')
 		elif request.param == 'de':
 			return DE(u'An der großen Straße stand eine merkwürdige Gestalt und führte Selbstgespräche.')
 	def test_tokenization(self, example):
 		# tokenization should split the document into tokens
 		assert len(example) > 1
 	def test_tagging(self, example):
 		# if tagging was done properly, pos tags shouldn't be empty
 		assert example.is_tagged
 		assert all( t.pos != 0 for t in example )
 		assert all( t.tag != 0 for t in example )
 	def test_parsing(self, example):
 		# if parsing was done properly
 		# - dependency labels shouldn't be empty
 		# - the head of some tokens should not be root
 		assert example.is_parsed
 		assert all( t.dep != 0 for t in example )
 		assert any( t.dep != i for i,t in enumerate(example) )
 	def test_ner(self, example):
 		# if ner was done properly, ent_iob shouldn't be empty
 		assert all( t.ent_iob != 0 for t in example )
 	def test_vectors(self, example):
 		# if vectors are available, they should differ on different words
 		# this isn't a perfect test since this could in principle fail in a sane model as well,
 		# but that's very unlikely and a good indicator if something is wrong
 		vector0 = example[0].vector
 		vector1 = example[1].vector
 		vector2 = example[2].vector
 		assert not numpy.array_equal(vector0,vector1)
 		assert not numpy.array_equal(vector0,vector2)
 		assert not numpy.array_equal(vector1,vector2)
 	def test_probs(self, example):
 		# if frequencies/probabilities are okay, they should differ for different words
 		# this isn't a perfect test since this could in principle fail in a sane model as well,
 		# but that's very unlikely and a good indicator if something is wrong
 		prob0 = example[0].prob
 		prob1 = example[1].prob
 		prob2 = example[2].prob
 		assert not prob0 == prob1
 		assert not prob0 == prob2
 		assert not prob1 == prob2