* Update tests after refactor

This commit is contained in:
Matthew Honnibal 2015-07-13 22:30:01 +02:00
parent d87d71caf4
commit 98382bd7a0
2 changed files with 6 additions and 18 deletions

View File

@ -2,13 +2,15 @@ from __future__ import unicode_literals
import pytest
import gc
from spacy.en import English
from spacy.en import English, LOCAL_DATA_DIR
import os
data_dir = os.environ.get('SPACY_DATA', LOCAL_DATA_DIR)
# Let this have its own instances, as we have to be careful about memory here
# that's the point, after all
def get_orphan_token(text, i):
nlp = English(load_vectors=False)
nlp = English(load_vectors=False, data_dir=data_dir)
tokens = nlp(text)
gc.collect()
token = tokens[i]
@ -22,7 +24,7 @@ def test_orphan():
dummy = get_orphan_token('Load and flush the memory', 0)
dummy = get_orphan_token('Load again...', 0)
assert orphan.orth_ == 'orphan'
assert orphan.pos_ == 'NOUN'
assert orphan.pos_ in ('ADJ', 'NOUN')
assert orphan.head.orth_ == 'token'
@ -36,7 +38,7 @@ def _orphan_from_list(toks):
def test_list_orphans():
# Test case from NSchrading
nlp = English(load_vectors=False)
nlp = English(load_vectors=False, data_dir=data_dir)
samples = ["a", "test blah wat okay"]
lst = []
for sample in samples:

View File

@ -13,19 +13,6 @@ def test_getitem(EN):
tokens[len(tokens)]
def test_trailing_spaces(EN):
tokens = EN(u' Give it back! He pleaded. ')
assert tokens[0].orth_ == ' '
assert not tokens._has_trailing_space(0)
assert tokens._has_trailing_space(1)
assert tokens._has_trailing_space(2)
assert not tokens._has_trailing_space(3)
assert tokens._has_trailing_space(4)
assert tokens._has_trailing_space(5)
assert not tokens._has_trailing_space(6)
assert tokens._has_trailing_space(7)
def test_serialize(EN):
tokens = EN(u' Give it back! He pleaded. ')
packed = tokens.serialize()
@ -33,4 +20,3 @@ def test_serialize(EN):
assert tokens.string == new_tokens.string
assert [t.orth_ for t in tokens] == [t.orth_ for t in new_tokens]
assert [t.orth for t in tokens] == [t.orth for t in new_tokens]
assert [tokens._has_trailing_space(t.i) for t in tokens] == [new_tokens._has_trailing_space(t.i) for t in new_tokens]