From 7f87716cf705f6edefe98cc787873d079c6a2c8e Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 25 Jan 2015 23:01:10 +1100 Subject: [PATCH] * Fix download script --- spacy/en/download.py | 40 ++++++++++------------------------------ 1 file changed, 10 insertions(+), 30 deletions(-) diff --git a/spacy/en/download.py b/spacy/en/download.py index fbce062ba..9997c90b6 100644 --- a/spacy/en/download.py +++ b/spacy/en/download.py @@ -2,53 +2,33 @@ from os import path import os import tarfile import shutil -import requests +import wget -PARSER_URL = 'http://s3-us-west-1.amazonaws.com/media.spacynlp.com/en.tgz' +PARSER_URL = 'http://s3-us-west-1.amazonaws.com/media.spacynlp.com/en_deps-0.30.tgz' -DEP_VECTORS_URL = 'http://u.cs.biu.ac.il/~yogo/data/syntemb/deps.words.bz2' + +DEP_VECTORS_URL = 'http://s3-us-west-1.amazonaws.com/media.spacynlp.com/vec.bin' DEST_DIR = path.join(path.dirname(__file__), 'data') -def download_file(url): - local_filename = url.split('/')[-1] - return path.join(DEST_DIR, local_filename) - # NOTE the stream=True parameter - r = requests.get(url, stream=True) - print "Download %s" % url - i = 0 - with open(local_filename, 'wb') as f: - for chunk in r.iter_content(chunk_size=1024): - if chunk: # filter out keep-alive new chunks - f.write(chunk) - f.flush() - print i - i += 1 - return local_filename +def download_file(url, out): + wget.download(url, out=out) def install_parser_model(url, dest_dir): - if not os.path.exists(dest_dir): - os.mkdir(dest_dir) - assert not path.exists(path.join(dest_dir, 'en')) - - filename = download_file(url) - t = tarfile.open(filename, mode=":gz") + download_file(url, dest_dir) + t = tarfile.open(path.join(DEST_DIR, 'en_deps-0.30.tgz'), mode=":gz") t.extractall(dest_dir) - shutil.move(path.join(dest_dir, 'en', 'deps', 'model'), dest_dir) - shutil.move(path.join(dest_dir, 'en', 'deps', 'config.json'), dest_dir) - shutil.rmtree(path.join(dest_dir, 'en')) def install_dep_vectors(url, dest_dir): if not os.path.exists(dest_dir): os.mkdir(dest_dir) - filename = download_file(url) - shutil.move(filename, path.join(dest_dir, 'vec.bz2')) + filename = download_file(url, dest_dir) def main(): - #install_parser_model(PARSER_URL, path.join(DEST_DIR, 'deps')) + install_parser_model(PARSER_URL, DEST_DIR) install_dep_vectors(DEP_VECTORS_URL, path.join(DEST_DIR, 'vocab'))