mirror of https://github.com/explosion/spaCy.git
* Update tests after refactor
This commit is contained in:
parent
d87d71caf4
commit
98382bd7a0
|
@ -2,13 +2,15 @@ from __future__ import unicode_literals
|
||||||
import pytest
|
import pytest
|
||||||
import gc
|
import gc
|
||||||
|
|
||||||
from spacy.en import English
|
from spacy.en import English, LOCAL_DATA_DIR
|
||||||
|
import os
|
||||||
|
|
||||||
|
data_dir = os.environ.get('SPACY_DATA', LOCAL_DATA_DIR)
|
||||||
# Let this have its own instances, as we have to be careful about memory here
|
# Let this have its own instances, as we have to be careful about memory here
|
||||||
# that's the point, after all
|
# that's the point, after all
|
||||||
|
|
||||||
def get_orphan_token(text, i):
|
def get_orphan_token(text, i):
|
||||||
nlp = English(load_vectors=False)
|
nlp = English(load_vectors=False, data_dir=data_dir)
|
||||||
tokens = nlp(text)
|
tokens = nlp(text)
|
||||||
gc.collect()
|
gc.collect()
|
||||||
token = tokens[i]
|
token = tokens[i]
|
||||||
|
@ -22,7 +24,7 @@ def test_orphan():
|
||||||
dummy = get_orphan_token('Load and flush the memory', 0)
|
dummy = get_orphan_token('Load and flush the memory', 0)
|
||||||
dummy = get_orphan_token('Load again...', 0)
|
dummy = get_orphan_token('Load again...', 0)
|
||||||
assert orphan.orth_ == 'orphan'
|
assert orphan.orth_ == 'orphan'
|
||||||
assert orphan.pos_ == 'NOUN'
|
assert orphan.pos_ in ('ADJ', 'NOUN')
|
||||||
assert orphan.head.orth_ == 'token'
|
assert orphan.head.orth_ == 'token'
|
||||||
|
|
||||||
|
|
||||||
|
@ -36,7 +38,7 @@ def _orphan_from_list(toks):
|
||||||
|
|
||||||
def test_list_orphans():
|
def test_list_orphans():
|
||||||
# Test case from NSchrading
|
# Test case from NSchrading
|
||||||
nlp = English(load_vectors=False)
|
nlp = English(load_vectors=False, data_dir=data_dir)
|
||||||
samples = ["a", "test blah wat okay"]
|
samples = ["a", "test blah wat okay"]
|
||||||
lst = []
|
lst = []
|
||||||
for sample in samples:
|
for sample in samples:
|
||||||
|
|
|
@ -13,19 +13,6 @@ def test_getitem(EN):
|
||||||
tokens[len(tokens)]
|
tokens[len(tokens)]
|
||||||
|
|
||||||
|
|
||||||
def test_trailing_spaces(EN):
|
|
||||||
tokens = EN(u' Give it back! He pleaded. ')
|
|
||||||
assert tokens[0].orth_ == ' '
|
|
||||||
assert not tokens._has_trailing_space(0)
|
|
||||||
assert tokens._has_trailing_space(1)
|
|
||||||
assert tokens._has_trailing_space(2)
|
|
||||||
assert not tokens._has_trailing_space(3)
|
|
||||||
assert tokens._has_trailing_space(4)
|
|
||||||
assert tokens._has_trailing_space(5)
|
|
||||||
assert not tokens._has_trailing_space(6)
|
|
||||||
assert tokens._has_trailing_space(7)
|
|
||||||
|
|
||||||
|
|
||||||
def test_serialize(EN):
|
def test_serialize(EN):
|
||||||
tokens = EN(u' Give it back! He pleaded. ')
|
tokens = EN(u' Give it back! He pleaded. ')
|
||||||
packed = tokens.serialize()
|
packed = tokens.serialize()
|
||||||
|
@ -33,4 +20,3 @@ def test_serialize(EN):
|
||||||
assert tokens.string == new_tokens.string
|
assert tokens.string == new_tokens.string
|
||||||
assert [t.orth_ for t in tokens] == [t.orth_ for t in new_tokens]
|
assert [t.orth_ for t in tokens] == [t.orth_ for t in new_tokens]
|
||||||
assert [t.orth for t in tokens] == [t.orth for t in new_tokens]
|
assert [t.orth for t in tokens] == [t.orth for t in new_tokens]
|
||||||
assert [tokens._has_trailing_space(t.i) for t in tokens] == [new_tokens._has_trailing_space(t.i) for t in new_tokens]
|
|
||||||
|
|
Loading…
Reference in New Issue