diff --git a/tests/tokens/test_token_references.py b/tests/tokens/test_token_references.py index 265327fb0..fd5444623 100644 --- a/tests/tokens/test_token_references.py +++ b/tests/tokens/test_token_references.py @@ -2,13 +2,15 @@ from __future__ import unicode_literals import pytest import gc -from spacy.en import English +from spacy.en import English, LOCAL_DATA_DIR +import os +data_dir = os.environ.get('SPACY_DATA', LOCAL_DATA_DIR) # Let this have its own instances, as we have to be careful about memory here # that's the point, after all def get_orphan_token(text, i): - nlp = English(load_vectors=False) + nlp = English(load_vectors=False, data_dir=data_dir) tokens = nlp(text) gc.collect() token = tokens[i] @@ -22,7 +24,7 @@ def test_orphan(): dummy = get_orphan_token('Load and flush the memory', 0) dummy = get_orphan_token('Load again...', 0) assert orphan.orth_ == 'orphan' - assert orphan.pos_ == 'NOUN' + assert orphan.pos_ in ('ADJ', 'NOUN') assert orphan.head.orth_ == 'token' @@ -36,7 +38,7 @@ def _orphan_from_list(toks): def test_list_orphans(): # Test case from NSchrading - nlp = English(load_vectors=False) + nlp = English(load_vectors=False, data_dir=data_dir) samples = ["a", "test blah wat okay"] lst = [] for sample in samples: diff --git a/tests/tokens/test_tokens_api.py b/tests/tokens/test_tokens_api.py index 85a3d93d6..42fc977c7 100644 --- a/tests/tokens/test_tokens_api.py +++ b/tests/tokens/test_tokens_api.py @@ -13,19 +13,6 @@ def test_getitem(EN): tokens[len(tokens)] -def test_trailing_spaces(EN): - tokens = EN(u' Give it back! He pleaded. ') - assert tokens[0].orth_ == ' ' - assert not tokens._has_trailing_space(0) - assert tokens._has_trailing_space(1) - assert tokens._has_trailing_space(2) - assert not tokens._has_trailing_space(3) - assert tokens._has_trailing_space(4) - assert tokens._has_trailing_space(5) - assert not tokens._has_trailing_space(6) - assert tokens._has_trailing_space(7) - - def test_serialize(EN): tokens = EN(u' Give it back! He pleaded. ') packed = tokens.serialize() @@ -33,4 +20,3 @@ def test_serialize(EN): assert tokens.string == new_tokens.string assert [t.orth_ for t in tokens] == [t.orth_ for t in new_tokens] assert [t.orth for t in tokens] == [t.orth for t in new_tokens] - assert [tokens._has_trailing_space(t.i) for t in tokens] == [new_tokens._has_trailing_space(t.i) for t in new_tokens]