spaCy/spacy/en/download.py

62 lines
1.5 KiB
Python
Raw Normal View History

import sys
2015-01-02 10:44:41 +00:00
import os
import shutil
2015-11-15 14:58:21 +00:00
import plac
from sputnik import Sputnik
2015-01-02 10:44:41 +00:00
2015-11-15 14:58:21 +00:00
def migrate(path):
data_path = os.path.join(path, 'data')
if os.path.isdir(data_path) and not os.path.islink(data_path):
shutil.rmtree(data_path)
for filename in os.listdir(path):
2015-11-18 16:35:21 +00:00
if filename.endswith('.tgz'):
2015-11-15 14:58:21 +00:00
os.unlink(os.path.join(path, filename))
2015-11-15 14:58:21 +00:00
def link(package, path):
if os.path.exists(path):
2015-12-22 22:25:38 +00:00
if os.path.isdir(path):
shutil.rmtree(path)
else:
os.unlink(path)
if not hasattr(os, 'symlink'): # not supported by win+py27
shutil.copytree(package.dir_path('data'), path)
else:
os.symlink(package.dir_path('data'), path)
@plac.annotations(
force=("Force overwrite", "flag", "f", bool),
)
2015-11-18 16:35:21 +00:00
def main(data_size='all', force=False):
2015-11-15 14:58:21 +00:00
# TODO read version from the same source as the setup
sputnik = Sputnik('spacy', '0.99.0', console=sys.stdout)
2015-10-21 05:59:34 +00:00
2015-11-15 14:58:21 +00:00
path = os.path.dirname(os.path.abspath(__file__))
2015-10-21 05:59:34 +00:00
2015-12-13 10:49:17 +00:00
data_path = os.path.abspath(os.path.join(path, '..', 'data'))
if not os.path.isdir(data_path):
os.mkdir(data_path)
2015-12-13 13:46:08 +00:00
command = sputnik.command(
2015-12-13 10:49:17 +00:00
data_path=data_path,
2015-11-18 17:09:46 +00:00
repository_url='https://index.spacy.io')
2015-11-15 14:58:21 +00:00
if force:
command.purge()
package = command.install('en_default')
2015-10-20 17:11:29 +00:00
2015-11-15 14:58:21 +00:00
# FIXME clean up old-style packages
migrate(path)
2015-11-15 14:58:21 +00:00
# FIXME supply spacy with an old-style data dir
link(package, os.path.join(path, 'data'))
2015-01-02 10:44:41 +00:00
if __name__ == '__main__':
2015-01-31 02:51:56 +00:00
plac.call(main)