Merge branch 'master' of ssh://github.com/honnibal/spaCy

This commit is contained in:
Matthew Honnibal 2015-11-03 18:07:25 +11:00
commit 389a373807
2 changed files with 9 additions and 5 deletions

7
fabfile.py vendored
View File

@ -54,10 +54,10 @@ def prebuild(build_dir='/tmp/build_spacy'):
local('pip install --no-cache-dir -r requirements.txt') local('pip install --no-cache-dir -r requirements.txt')
local('fab clean make') local('fab clean make')
local('cp -r %s/corpora/en/wordnet corpora/en/' % spacy_dir) local('cp -r %s/corpora/en/wordnet corpora/en/' % spacy_dir)
local('cp %s/corpora/en/freqs.txt.gz corpora/en/' % spacy_dir)
local('PYTHONPATH=`pwd` python bin/init_model.py en lang_data corpora spacy/en/data') local('PYTHONPATH=`pwd` python bin/init_model.py en lang_data corpora spacy/en/data')
local('fab test') local('fab test')
local('python setup.py sdist') local('PYTHONPATH=`pwd` python -m spacy.en.download --force all')
local('py.test --models spacy/tests/')
def docs(): def docs():
@ -121,9 +121,8 @@ def clean():
def test(): def test():
with virtualenv(VENV_DIR): with virtualenv(VENV_DIR):
# Run each test file separately. pytest is performing poorly, not sure why
with lcd(path.dirname(__file__)): with lcd(path.dirname(__file__)):
local('py.test -x tests/') local('py.test -x spacy/tests')
def train(json_dir=None, dev_loc=None, model_dir=None): def train(json_dir=None, dev_loc=None, model_dir=None):

View File

@ -5,6 +5,11 @@ import pickle
import pytest import pytest
import tempfile import tempfile
try:
unicode
except NameError:
unicode = str
@pytest.mark.models @pytest.mark.models
def test_pickle_english(EN): def test_pickle_english(EN):
file_ = io.BytesIO() file_ = io.BytesIO()
@ -21,7 +26,7 @@ def test_cloudpickle_to_file(EN):
p = cloudpickle.CloudPickler(f) p = cloudpickle.CloudPickler(f)
p.dump(EN) p.dump(EN)
f.close() f.close()
loaded_en = cloudpickle.load(open(f.name)) loaded_en = cloudpickle.load(open(f.name, 'rb'))
os.unlink(f.name) os.unlink(f.name)
doc = loaded_en(unicode('test parse')) doc = loaded_en(unicode('test parse'))
assert len(doc) == 2 assert len(doc) == 2