spaCy/fabfile.py

162 lines
5.2 KiB
Python
Raw Normal View History

2015-10-25 12:15:51 +00:00
from __future__ import print_function
from fabric.api import local, lcd, env, settings, prefix
2015-01-03 10:02:21 +00:00
from os.path import exists as file_exists
2015-01-04 18:30:24 +00:00
from fabtools.python import virtualenv
2015-01-03 10:02:21 +00:00
from os import path
import os
import shutil
2015-10-25 12:15:51 +00:00
from pathlib import Path
2015-01-03 10:02:21 +00:00
PWD = path.dirname(__file__)
VENV_DIR = path.join(PWD, '.env')
def counts():
pass
# Tokenize the corpus
# tokenize()
# get_freqs()
# Collate the counts
# cat freqs | sort -k2 | gather_freqs()
# gather_freqs()
# smooth()
# clean, make, sdist
# cd to new env, install from sdist,
# Push changes to server
# Pull changes on server
# clean make init model
# test --vectors --slow
# train
# test --vectors --slow --models
# sdist
# upload data to server
# change to clean venv
# py2: install from sdist, test --slow, download data, test --models --vectors
# py3: install from sdist, test --slow, download data, test --models --vectors
def prebuild(build_dir='/tmp/build_spacy'):
if file_exists(build_dir):
shutil.rmtree(build_dir)
os.mkdir(build_dir)
spacy_dir = path.dirname(__file__)
wn_url = 'http://wordnetcode.princeton.edu/3.0/WordNet-3.0.tar.gz'
build_venv = path.join(build_dir, '.env')
with lcd(build_dir):
local('git clone %s .' % spacy_dir)
local('virtualenv ' + build_venv)
with prefix('cd %s && PYTHONPATH=`pwd` && . %s/bin/activate' % (build_dir, build_venv)):
local('pip install cython fabric fabtools pytest')
2015-10-19 05:47:14 +00:00
local('pip install --no-cache-dir -r requirements.txt')
local('fab clean make')
local('cp -r %s/corpora/en/wordnet corpora/en/' % spacy_dir)
local('cp %s/corpora/en/freqs.txt.gz corpora/en/' % spacy_dir)
2015-09-12 23:27:49 +00:00
local('PYTHONPATH=`pwd` python bin/init_model.py en lang_data corpora spacy/en/data')
local('fab test')
local('python setup.py sdist')
2015-01-03 10:02:21 +00:00
2015-10-25 12:15:51 +00:00
2015-07-08 10:34:35 +00:00
def docs():
2015-10-25 12:15:51 +00:00
def jade(source_name, out_dir):
pwd = path.join(path.dirname(__file__), 'website')
jade_loc = path.join(pwd, 'src', 'jade', source_name)
out_loc = path.join(pwd, 'site', out_dir)
local('jade -P %s --out %s' % (jade_loc, out_loc))
2015-10-25 12:27:37 +00:00
with virtualenv(VENV_DIR):
2015-10-25 13:07:47 +00:00
local('./website/create_code_samples tests/website/ website/src/code/')
2015-10-25 12:27:37 +00:00
2015-10-25 12:15:51 +00:00
jade('home/index.jade', '')
jade('docs/index.jade', 'docs/')
jade('blog/index.jade', 'blog/')
jade('tutorials/index.jade', 'tutorials/')
for post_dir in (Path(__file__).parent / 'website' / 'src' / 'jade' / 'blog').iterdir():
if post_dir.is_dir() \
and (post_dir / 'index.jade').exists() \
and (post_dir / 'meta.jade').exists():
jade(str(post_dir / 'index.jade'), path.join('blogs', post_dir.parts[-1]))
2015-01-03 10:02:21 +00:00
def publish(version):
2015-01-03 10:02:21 +00:00
with virtualenv(VENV_DIR):
local('git push origin master')
local('git tag -a %s' % version)
local('git push origin %s' % version)
local('python setup.py sdist')
local('python setup.py register')
2015-10-25 12:15:51 +00:00
local('twine upload dist/spacy-%s.tar.gz' % version)
2015-01-03 10:02:21 +00:00
def env(lang="python2.7"):
2015-01-03 10:02:21 +00:00
if file_exists('.env'):
local('rm -rf .env')
local('virtualenv -p %s .env' % lang)
2015-01-03 10:02:21 +00:00
def install():
with virtualenv(VENV_DIR):
2015-01-04 18:30:24 +00:00
local('pip install --upgrade setuptools')
2015-01-03 10:02:21 +00:00
local('pip install dist/*.tar.gz')
local('pip install pytest')
2014-07-05 18:49:34 +00:00
def make():
with virtualenv(VENV_DIR):
2015-01-03 10:02:21 +00:00
with lcd(path.dirname(__file__)):
local('pip install cython')
local('pip install murmurhash')
local('pip install -r requirements.txt')
2015-01-04 18:30:24 +00:00
local('python setup.py build_ext --inplace')
2015-01-03 10:02:21 +00:00
def clean():
with lcd(path.dirname(__file__)):
2015-01-25 03:49:29 +00:00
local('python setup.py clean --all')
2015-01-03 10:02:21 +00:00
2014-07-05 18:49:34 +00:00
2015-01-03 10:02:21 +00:00
def test():
with virtualenv(VENV_DIR):
# Run each test file separately. pytest is performing poorly, not sure why
2015-01-03 10:02:21 +00:00
with lcd(path.dirname(__file__)):
2015-06-07 20:59:05 +00:00
local('py.test -x tests/')
def train(json_dir=None, dev_loc=None, model_dir=None):
if json_dir is None:
json_dir = 'corpora/en/json'
2015-04-08 20:47:59 +00:00
if model_dir is None:
model_dir = 'models/en/'
with virtualenv(VENV_DIR):
with lcd(path.dirname(__file__)):
local('python bin/init_model.py lang_data/en/ corpora/en/ ' + model_dir)
local('python bin/parser/train.py %s %s' % (json_dir, model_dir))
2015-04-08 20:47:59 +00:00
2015-01-04 18:30:24 +00:00
def travis():
local('open https://travis-ci.org/honnibal/thinc')
def pos():
2015-01-17 05:20:03 +00:00
with virtualenv(VENV_DIR):
local('python tools/train.py ~/work_data/docparse/wsj02-21.conll ~/work_data/docparse/wsj22.conll spacy/en/data')
local('python tools/tag.py ~/work_data/docparse/wsj22.raw /tmp/tmp')
local('python tools/eval_pos.py ~/work_data/docparse/wsj22.conll /tmp/tmp')
def ner():
local('rm -rf data/en/ner')
2014-11-11 10:10:40 +00:00
local('python tools/train_ner.py ~/work_data/docparse/wsj02-21.conll data/en/ner')
local('python tools/tag_ner.py ~/work_data/docparse/wsj22.raw /tmp/tmp')
2014-11-06 17:44:14 +00:00
local('python tools/eval_ner.py ~/work_data/docparse/wsj22.conll /tmp/tmp | tail')
2014-11-12 12:22:05 +00:00
def conll():
local('rm -rf data/en/ner')
local('python tools/conll03_train.py ~/work_data/ner/conll2003/eng.train data/en/ner/')
local('python tools/conll03_eval.py ~/work_data/ner/conll2003/eng.testa')