From 8ba0dfd01792dd2b34e8a71307f0c39a1ff6ebbb Mon Sep 17 00:00:00 2001 From: ines <ines@ines.io> Date: Wed, 3 Jan 2018 17:38:09 +0100 Subject: [PATCH 1/7] Make message on failed linking more clear --- spacy/cli/download.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/spacy/cli/download.py b/spacy/cli/download.py index 76e349a70..c1a03a9c3 100644 --- a/spacy/cli/download.py +++ b/spacy/cli/download.py @@ -49,8 +49,7 @@ def download(cmd, model, direct=False): "you don't have admin permissions?), but you can still " "load the model via its full package name:", "nlp = spacy.load('%s')" % model_name, - title="Download successful") - + title="Download successful but linking failed") def get_json(url, desc): r = requests.get(url) From 319d754309ae8872bfdb73d4acd2569ef404cc51 Mon Sep 17 00:00:00 2001 From: ines <ines@ines.io> Date: Wed, 3 Jan 2018 17:39:36 +0100 Subject: [PATCH 2/7] Fix overwriting of existing symlinks Check for is_symlink() to also overwrite invalid and outdated symlinks. Also show better error message if link path exists but is not symlink (i.e. file or directory). --- spacy/cli/link.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/spacy/cli/link.py b/spacy/cli/link.py index cfbc97e3e..151d5609a 100644 --- a/spacy/cli/link.py +++ b/spacy/cli/link.py @@ -34,11 +34,18 @@ def link(cmd, origin, link_name, force=False, model_path=None): "located here:", path2str(spacy_loc), exits=1, title="Can't find the spaCy data path to create model symlink") link_path = util.get_data_path() / link_name - if link_path.exists() and not force: + if link_path.is_symlink() and not force: prints("To overwrite an existing link, use the --force flag.", title="Link %s already exists" % link_name, exits=1) - elif link_path.exists(): + elif link_path.is_symlink(): # does a symlink exist? + # NB: It's important to check for is_symlink here and not for exists, + # because invalid/outdated symlinks would return False otherwise. link_path.unlink() + elif link_path.exists(): # does it exist otherwise? + # NB: Check this last because valid symlinks also "exist". + prints("This can happen if your data directory contains a directory " + "or file of the same name.", link_path, + title="Can't overwrite symlink %s" % link_name, exits=1) try: symlink_to(link_path, model_path) except: From d8109964d671a12bcf62dd493ebd00b27caedf85 Mon Sep 17 00:00:00 2001 From: ines <ines@ines.io> Date: Wed, 3 Jan 2018 17:40:37 +0100 Subject: [PATCH 3/7] Use --no-deps on model install In general, it's nice for models to specify spaCy as a dependency. However, this tends to cause problems in conda environments, as pip will re-install spaCy and its dependencies (especially Thinc) --- spacy/cli/download.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/cli/download.py b/spacy/cli/download.py index c1a03a9c3..c043e383f 100644 --- a/spacy/cli/download.py +++ b/spacy/cli/download.py @@ -84,5 +84,5 @@ def get_version(model, comp): def download_model(filename): download_url = about.__download_url__ + '/' + filename return subprocess.call( - [sys.executable, '-m', 'pip', 'install', '--no-cache-dir', + [sys.executable, '-m', 'pip', 'install', '--no-cache-dir', '--no-deps', download_url], env=os.environ.copy()) From 1081e08efbd79c5a356470ca317a04ea9eec56e6 Mon Sep 17 00:00:00 2001 From: ines <ines@ines.io> Date: Wed, 3 Jan 2018 20:14:50 +0100 Subject: [PATCH 4/7] Fix formatting --- spacy/cli/download.py | 1 + 1 file changed, 1 insertion(+) diff --git a/spacy/cli/download.py b/spacy/cli/download.py index c043e383f..07dea3858 100644 --- a/spacy/cli/download.py +++ b/spacy/cli/download.py @@ -51,6 +51,7 @@ def download(cmd, model, direct=False): "nlp = spacy.load('%s')" % model_name, title="Download successful but linking failed") + def get_json(url, desc): r = requests.get(url) if r.status_code != 200: From dacfaa2ca4e48be00fac0e099029aa7db2b6b4ae Mon Sep 17 00:00:00 2001 From: ines <ines@ines.io> Date: Wed, 3 Jan 2018 21:03:36 +0100 Subject: [PATCH 5/7] Ensure that download command exits properly (resolves #1714) --- spacy/cli/download.py | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/spacy/cli/download.py b/spacy/cli/download.py index 07dea3858..991fefd2c 100644 --- a/spacy/cli/download.py +++ b/spacy/cli/download.py @@ -31,25 +31,28 @@ def download(cmd, model, direct=False): version = get_version(model_name, compatibility) dl = download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model_name, v=version)) - if dl == 0: - try: - # Get package path here because link uses - # pip.get_installed_distributions() to check if model is a - # package, which fails if model was just installed via - # subprocess - package_path = get_package_path(model_name) - link(None, model_name, model, force=True, - model_path=package_path) - except: - # Dirty, but since spacy.download and the auto-linking is - # mostly a convenience wrapper, it's best to show a success - # message and loading instructions, even if linking fails. - prints( - "Creating a shortcut link for 'en' didn't work (maybe " - "you don't have admin permissions?), but you can still " - "load the model via its full package name:", - "nlp = spacy.load('%s')" % model_name, - title="Download successful but linking failed") + if dl != 0: + # if download subprocess doesn't return 0, exit with the respective + # exit code before doing anything else + sys.exit(dl) + try: + # Get package path here because link uses + # pip.get_installed_distributions() to check if model is a + # package, which fails if model was just installed via + # subprocess + package_path = get_package_path(model_name) + link(None, model_name, model, force=True, + model_path=package_path) + except: + # Dirty, but since spacy.download and the auto-linking is + # mostly a convenience wrapper, it's best to show a success + # message and loading instructions, even if linking fails. + prints( + "Creating a shortcut link for 'en' didn't work (maybe " + "you don't have admin permissions?), but you can still " + "load the model via its full package name:", + "nlp = spacy.load('%s')" % model_name, + title="Download successful but linking failed") def get_json(url, desc): From 2c656f90fbafba14c86b825fc9b52816cb82021e Mon Sep 17 00:00:00 2001 From: ines <ines@ines.io> Date: Wed, 3 Jan 2018 21:20:35 +0100 Subject: [PATCH 6/7] Exit with 1 if incompatible models found (see #1714) --- spacy/cli/validate.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/spacy/cli/validate.py b/spacy/cli/validate.py index 2d8cba891..d675c4b95 100644 --- a/spacy/cli/validate.py +++ b/spacy/cli/validate.py @@ -4,6 +4,7 @@ from __future__ import unicode_literals, print_function import requests import pkg_resources from pathlib import Path +import sys from ..compat import path2str, locale_escape from ..util import prints, get_data_path, read_json @@ -62,6 +63,9 @@ def validate(cmd): "them from the data directory. Data path: {}" .format(path2str(get_data_path()))) + if incompat_models or incompat_links: + sys.exit(1) + def get_model_links(compat): links = {} From ef210c73ddc98c5ac383c52c50c396a51c35384e Mon Sep 17 00:00:00 2001 From: ines <ines@ines.io> Date: Wed, 3 Jan 2018 21:34:03 +0100 Subject: [PATCH 7/7] Update cli.download and cli.validate docs --- website/api/cli.jade | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/website/api/cli.jade b/website/api/cli.jade index fdb8e4efe..412a43cbf 100644 --- a/website/api/cli.jade +++ b/website/api/cli.jade @@ -17,6 +17,17 @@ p | Direct downloads don't perform any compatibility checks and require the | model name to be specified with its version (e.g., #[code en_core_web_sm-1.2.0]). ++aside("Downloading best practices") + | The #[code download] command is mostly intended as a convenient, + | interactive wrapper – it performs compatibility checks and prints + | detailed messages in case things go wrong. It's #[strong not recommended] + | to use this command as part of an automated process. If you know which + | model your project needs, you should consider a + | #[+a("/usage/models#download-pip") direct download via pip], or + | uploading the model to a local PyPi installation and fetching it straight + | from there. This will also allow you to add it as a versioned package + | dependency to your project. + +code(false, "bash", "$"). python -m spacy download [model] [--direct] @@ -43,17 +54,6 @@ p | The installed model package in your #[code site-packages] | directory and a shortcut link as a symlink in #[code spacy/data]. -+aside("Downloading best practices") - | The #[code download] command is mostly intended as a convenient, - | interactive wrapper – it performs compatibility checks and prints - | detailed messages in case things go wrong. It's #[strong not recommended] - | to use this command as part of an automated process. If you know which - | model your project needs, you should consider a - | #[+a("/usage/models#download-pip") direct download via pip], or - | uploading the model to a local PyPi installation and fetching it straight - | from there. This will also allow you to add it as a versioned package - | dependency to your project. - +h(3, "link") Link p @@ -144,8 +144,14 @@ p | #[code pip install -U spacy] to ensure that all installed models are | can be used with the new version. The command is also useful to detect | out-of-sync model links resulting from links created in different virtual - | environments. Prints a list of models, the installed versions, the latest - | compatible version (if out of date) and the commands for updating. + | environments. It will a list of models, the installed versions, the + | latest compatible version (if out of date) and the commands for updating. + ++aside("Automated validation") + | You can also use the #[code validate] command as part of your build + | process or test suite, to ensure all models are up to date before + | proceeding. If incompatible models or shortcut links are found, it will + | return #[code 1]. +code(false, "bash", "$"). python -m spacy validate