From 8ba0dfd01792dd2b34e8a71307f0c39a1ff6ebbb Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Wed, 3 Jan 2018 17:38:09 +0100
Subject: [PATCH 1/7] Make message on failed linking more clear

---
 spacy/cli/download.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/spacy/cli/download.py b/spacy/cli/download.py
index 76e349a70..c1a03a9c3 100644
--- a/spacy/cli/download.py
+++ b/spacy/cli/download.py
@@ -49,8 +49,7 @@ def download(cmd, model, direct=False):
                     "you don't have admin permissions?), but you can still "
                     "load the model via its full package name:",
                     "nlp = spacy.load('%s')" % model_name,
-                    title="Download successful")
-
+                    title="Download successful but linking failed")
 
 def get_json(url, desc):
     r = requests.get(url)

From 319d754309ae8872bfdb73d4acd2569ef404cc51 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Wed, 3 Jan 2018 17:39:36 +0100
Subject: [PATCH 2/7] Fix overwriting of existing symlinks

Check for is_symlink() to also overwrite invalid and outdated symlinks. Also show better error message if link path exists but is not symlink (i.e. file or directory).
---
 spacy/cli/link.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/spacy/cli/link.py b/spacy/cli/link.py
index cfbc97e3e..151d5609a 100644
--- a/spacy/cli/link.py
+++ b/spacy/cli/link.py
@@ -34,11 +34,18 @@ def link(cmd, origin, link_name, force=False, model_path=None):
                "located here:", path2str(spacy_loc), exits=1,
                title="Can't find the spaCy data path to create model symlink")
     link_path = util.get_data_path() / link_name
-    if link_path.exists() and not force:
+    if link_path.is_symlink() and not force:
         prints("To overwrite an existing link, use the --force flag.",
                title="Link %s already exists" % link_name, exits=1)
-    elif link_path.exists():
+    elif link_path.is_symlink():  # does a symlink exist?
+        # NB: It's important to check for is_symlink here and not for exists,
+        # because invalid/outdated symlinks would return False otherwise.
         link_path.unlink()
+    elif link_path.exists(): # does it exist otherwise?
+        # NB: Check this last because valid symlinks also "exist".
+        prints("This can happen if your data directory contains a directory "
+               "or file of the same name.", link_path,
+               title="Can't overwrite symlink %s" % link_name, exits=1)
     try:
         symlink_to(link_path, model_path)
     except:

From d8109964d671a12bcf62dd493ebd00b27caedf85 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Wed, 3 Jan 2018 17:40:37 +0100
Subject: [PATCH 3/7] Use --no-deps on model install

In general, it's nice for models to specify spaCy as a dependency. However, this tends to cause problems in conda environments, as pip will re-install spaCy and its dependencies (especially Thinc)
---
 spacy/cli/download.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/cli/download.py b/spacy/cli/download.py
index c1a03a9c3..c043e383f 100644
--- a/spacy/cli/download.py
+++ b/spacy/cli/download.py
@@ -84,5 +84,5 @@ def get_version(model, comp):
 def download_model(filename):
     download_url = about.__download_url__ + '/' + filename
     return subprocess.call(
-        [sys.executable, '-m', 'pip', 'install', '--no-cache-dir',
+        [sys.executable, '-m', 'pip', 'install', '--no-cache-dir', '--no-deps',
          download_url], env=os.environ.copy())

From 1081e08efbd79c5a356470ca317a04ea9eec56e6 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Wed, 3 Jan 2018 20:14:50 +0100
Subject: [PATCH 4/7] Fix formatting

---
 spacy/cli/download.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/spacy/cli/download.py b/spacy/cli/download.py
index c043e383f..07dea3858 100644
--- a/spacy/cli/download.py
+++ b/spacy/cli/download.py
@@ -51,6 +51,7 @@ def download(cmd, model, direct=False):
                     "nlp = spacy.load('%s')" % model_name,
                     title="Download successful but linking failed")
 
+
 def get_json(url, desc):
     r = requests.get(url)
     if r.status_code != 200:

From dacfaa2ca4e48be00fac0e099029aa7db2b6b4ae Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Wed, 3 Jan 2018 21:03:36 +0100
Subject: [PATCH 5/7] Ensure that download command exits properly (resolves
 #1714)

---
 spacy/cli/download.py | 41 ++++++++++++++++++++++-------------------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/spacy/cli/download.py b/spacy/cli/download.py
index 07dea3858..991fefd2c 100644
--- a/spacy/cli/download.py
+++ b/spacy/cli/download.py
@@ -31,25 +31,28 @@ def download(cmd, model, direct=False):
         version = get_version(model_name, compatibility)
         dl = download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model_name,
                                                             v=version))
-        if dl == 0:
-            try:
-                # Get package path here because link uses
-                # pip.get_installed_distributions() to check if model is a
-                # package, which fails if model was just installed via
-                # subprocess
-                package_path = get_package_path(model_name)
-                link(None, model_name, model, force=True,
-                     model_path=package_path)
-            except:
-                # Dirty, but since spacy.download and the auto-linking is
-                # mostly a convenience wrapper, it's best to show a success
-                # message and loading instructions, even if linking fails.
-                prints(
-                    "Creating a shortcut link for 'en' didn't work (maybe "
-                    "you don't have admin permissions?), but you can still "
-                    "load the model via its full package name:",
-                    "nlp = spacy.load('%s')" % model_name,
-                    title="Download successful but linking failed")
+        if dl != 0:
+            # if download subprocess doesn't return 0, exit with the respective
+            # exit code before doing anything else
+            sys.exit(dl)
+        try:
+            # Get package path here because link uses
+            # pip.get_installed_distributions() to check if model is a
+            # package, which fails if model was just installed via
+            # subprocess
+            package_path = get_package_path(model_name)
+            link(None, model_name, model, force=True,
+                    model_path=package_path)
+        except:
+            # Dirty, but since spacy.download and the auto-linking is
+            # mostly a convenience wrapper, it's best to show a success
+            # message and loading instructions, even if linking fails.
+            prints(
+                "Creating a shortcut link for 'en' didn't work (maybe "
+                "you don't have admin permissions?), but you can still "
+                "load the model via its full package name:",
+                "nlp = spacy.load('%s')" % model_name,
+                title="Download successful but linking failed")
 
 
 def get_json(url, desc):

From 2c656f90fbafba14c86b825fc9b52816cb82021e Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Wed, 3 Jan 2018 21:20:35 +0100
Subject: [PATCH 6/7] Exit with 1 if incompatible models found (see #1714)

---
 spacy/cli/validate.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/spacy/cli/validate.py b/spacy/cli/validate.py
index 2d8cba891..d675c4b95 100644
--- a/spacy/cli/validate.py
+++ b/spacy/cli/validate.py
@@ -4,6 +4,7 @@ from __future__ import unicode_literals, print_function
 import requests
 import pkg_resources
 from pathlib import Path
+import sys
 
 from ..compat import path2str, locale_escape
 from ..util import prints, get_data_path, read_json
@@ -62,6 +63,9 @@ def validate(cmd):
                "them from the data directory. Data path: {}"
                .format(path2str(get_data_path())))
 
+    if incompat_models or incompat_links:
+        sys.exit(1)
+
 
 def get_model_links(compat):
     links = {}

From ef210c73ddc98c5ac383c52c50c396a51c35384e Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Wed, 3 Jan 2018 21:34:03 +0100
Subject: [PATCH 7/7] Update cli.download and cli.validate docs

---
 website/api/cli.jade | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/website/api/cli.jade b/website/api/cli.jade
index fdb8e4efe..412a43cbf 100644
--- a/website/api/cli.jade
+++ b/website/api/cli.jade
@@ -17,6 +17,17 @@ p
     |  Direct downloads don't perform any compatibility checks and require the
     |  model name to be specified with its version (e.g., #[code en_core_web_sm-1.2.0]).
 
++aside("Downloading best practices")
+    |  The #[code download] command is mostly intended as a convenient,
+    |  interactive wrapper – it performs compatibility checks and prints
+    |  detailed messages in case things go wrong. It's #[strong not recommended]
+    |  to use this command as part of an automated process. If you know which
+    |  model your project needs, you should consider a
+    |  #[+a("/usage/models#download-pip") direct download via pip], or
+    |  uploading the model to a local PyPi installation and fetching it straight
+    |  from there. This will also allow you to add it as a versioned package
+    |  dependency to your project.
+
 +code(false, "bash", "$").
     python -m spacy download [model] [--direct]
 
@@ -43,17 +54,6 @@ p
             |  The installed model package in your #[code site-packages]
             |  directory and a shortcut link as a symlink in #[code spacy/data].
 
-+aside("Downloading best practices")
-    |  The #[code download] command is mostly intended as a convenient,
-    |  interactive wrapper – it performs compatibility checks and prints
-    |  detailed messages in case things go wrong. It's #[strong not recommended]
-    |  to use this command as part of an automated process. If you know which
-    |  model your project needs, you should consider a
-    |  #[+a("/usage/models#download-pip") direct download via pip], or
-    |  uploading the model to a local PyPi installation and fetching it straight
-    |  from there. This will also allow you to add it as a versioned package
-    |  dependency to your project.
-
 +h(3, "link") Link
 
 p
@@ -144,8 +144,14 @@ p
     |  #[code pip install -U spacy] to ensure that all installed models are
     |  can be used with the new version. The command is also useful to detect
     |  out-of-sync model links resulting from links created in different virtual
-    |  environments. Prints a list of models, the installed versions, the latest
-    |  compatible version (if out of date) and the commands for updating.
+    |  environments. It will a list of models, the installed versions, the
+    |  latest compatible version (if out of date) and the commands for updating.
+
++aside("Automated validation")
+    |  You can also use the #[code validate] command as part of your build
+    |  process or test suite, to ensure all models are up to date before
+    |  proceeding. If incompatible models or shortcut links are found, it will
+    |  return #[code 1].
 
 +code(false, "bash", "$").
     python -m spacy validate