diff --git a/requirements.txt b/requirements.txt
index 3b8d66e0e..3e8501b2f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -34,4 +34,5 @@ mypy>=0.910,<0.970; platform_machine!='aarch64'
types-dataclasses>=0.1.3; python_version < "3.7"
types-mock>=0.1.1
types-requests
+types-setuptools>=57.0.0
black>=22.0,<23.0
diff --git a/spacy/cli/download.py b/spacy/cli/download.py
index b7de88729..0c9a32b93 100644
--- a/spacy/cli/download.py
+++ b/spacy/cli/download.py
@@ -20,7 +20,7 @@ def download_cli(
ctx: typer.Context,
model: str = Arg(..., help="Name of pipeline package to download"),
direct: bool = Opt(False, "--direct", "-d", "-D", help="Force direct download of name + version"),
- sdist: bool = Opt(False, "--sdist", "-S", help="Download sdist (.tar.gz) archive instead of pre-built binary wheel")
+ sdist: bool = Opt(False, "--sdist", "-S", help="Download sdist (.tar.gz) archive instead of pre-built binary wheel"),
# fmt: on
):
"""
@@ -36,7 +36,12 @@ def download_cli(
download(model, direct, sdist, *ctx.args)
-def download(model: str, direct: bool = False, sdist: bool = False, *pip_args) -> None:
+def download(
+ model: str,
+ direct: bool = False,
+ sdist: bool = False,
+ *pip_args,
+) -> None:
if (
not (is_package("spacy") or is_package("spacy-nightly"))
and "--no-deps" not in pip_args
@@ -50,13 +55,10 @@ def download(model: str, direct: bool = False, sdist: bool = False, *pip_args) -
"dependencies, you'll have to install them manually."
)
pip_args = pip_args + ("--no-deps",)
- suffix = SDIST_SUFFIX if sdist else WHEEL_SUFFIX
- dl_tpl = "{m}-{v}/{m}-{v}{s}#egg={m}=={v}"
if direct:
components = model.split("-")
model_name = "".join(components[:-1])
version = components[-1]
- download_model(dl_tpl.format(m=model_name, v=version, s=suffix), pip_args)
else:
model_name = model
if model in OLD_MODEL_SHORTCUTS:
@@ -67,13 +69,26 @@ def download(model: str, direct: bool = False, sdist: bool = False, *pip_args) -
model_name = OLD_MODEL_SHORTCUTS[model]
compatibility = get_compatibility()
version = get_version(model_name, compatibility)
- download_model(dl_tpl.format(m=model_name, v=version, s=suffix), pip_args)
+
+ filename = get_model_filename(model_name, version, sdist)
+
+ download_model(filename, pip_args)
msg.good(
"Download and installation successful",
f"You can now load the package via spacy.load('{model_name}')",
)
+def get_model_filename(model_name: str, version: str, sdist: bool = False) -> str:
+ dl_tpl = "{m}-{v}/{m}-{v}{s}"
+ egg_tpl = "#egg={m}=={v}"
+ suffix = SDIST_SUFFIX if sdist else WHEEL_SUFFIX
+ filename = dl_tpl.format(m=model_name, v=version, s=suffix)
+ if sdist:
+ filename += egg_tpl.format(m=model_name, v=version)
+ return filename
+
+
def get_compatibility() -> dict:
if is_prerelease_version(about.__version__):
version: Optional[str] = about.__version__
@@ -105,6 +120,11 @@ def get_version(model: str, comp: dict) -> str:
return comp[model][0]
+def get_latest_version(model: str) -> str:
+ comp = get_compatibility()
+ return get_version(model, comp)
+
+
def download_model(
filename: str, user_pip_args: Optional[Sequence[str]] = None
) -> None:
diff --git a/spacy/cli/info.py b/spacy/cli/info.py
index e6a1cb616..e6ac4270f 100644
--- a/spacy/cli/info.py
+++ b/spacy/cli/info.py
@@ -1,10 +1,13 @@
from typing import Optional, Dict, Any, Union, List
import platform
+import pkg_resources
+import json
from pathlib import Path
from wasabi import Printer, MarkdownRenderer
import srsly
from ._util import app, Arg, Opt, string_to_list
+from .download import get_model_filename, get_latest_version
from .. import util
from .. import about
@@ -16,6 +19,7 @@ def info_cli(
markdown: bool = Opt(False, "--markdown", "-md", help="Generate Markdown for GitHub issues"),
silent: bool = Opt(False, "--silent", "-s", "-S", help="Don't print anything (just return)"),
exclude: str = Opt("labels", "--exclude", "-e", help="Comma-separated keys to exclude from the print-out"),
+ url: bool = Opt(False, "--url", "-u", help="Print the URL to download the most recent compatible version of the pipeline"),
# fmt: on
):
"""
@@ -23,10 +27,19 @@ def info_cli(
print its meta information. Flag --markdown prints details in Markdown for easy
copy-pasting to GitHub issues.
+ Flag --url prints only the download URL of the most recent compatible
+ version of the pipeline.
+
DOCS: https://spacy.io/api/cli#info
"""
exclude = string_to_list(exclude)
- info(model, markdown=markdown, silent=silent, exclude=exclude)
+ info(
+ model,
+ markdown=markdown,
+ silent=silent,
+ exclude=exclude,
+ url=url,
+ )
def info(
@@ -35,11 +48,20 @@ def info(
markdown: bool = False,
silent: bool = True,
exclude: Optional[List[str]] = None,
+ url: bool = False,
) -> Union[str, dict]:
msg = Printer(no_print=silent, pretty=not silent)
if not exclude:
exclude = []
- if model:
+ if url:
+ if model is not None:
+ title = f"Download info for pipeline '{model}'"
+ data = info_model_url(model)
+ print(data["download_url"])
+ return data
+ else:
+ msg.fail("--url option requires a pipeline name", exits=1)
+ elif model:
title = f"Info about pipeline '{model}'"
data = info_model(model, silent=silent)
else:
@@ -99,11 +121,43 @@ def info_model(model: str, *, silent: bool = True) -> Dict[str, Any]:
meta["source"] = str(model_path.resolve())
else:
meta["source"] = str(model_path)
+ download_url = info_installed_model_url(model)
+ if download_url:
+ meta["download_url"] = download_url
return {
k: v for k, v in meta.items() if k not in ("accuracy", "performance", "speed")
}
+def info_installed_model_url(model: str) -> Optional[str]:
+ """Given a pipeline name, get the download URL if available, otherwise
+ return None.
+
+ This is only available for pipelines installed as modules that have
+ dist-info available.
+ """
+ try:
+ dist = pkg_resources.get_distribution(model)
+ data = json.loads(dist.get_metadata("direct_url.json"))
+ return data["url"]
+ except pkg_resources.DistributionNotFound:
+ # no such package
+ return None
+ except Exception:
+ # something else, like no file or invalid JSON
+ return None
+
+def info_model_url(model: str) -> Dict[str, Any]:
+ """Return the download URL for the latest version of a pipeline."""
+ version = get_latest_version(model)
+
+ filename = get_model_filename(model, version)
+ download_url = about.__download_url__ + "/" + filename
+ release_tpl = "https://github.com/explosion/spacy-models/releases/tag/{m}-{v}"
+ release_url = release_tpl.format(m=model, v=version)
+ return {"download_url": download_url, "release_url": release_url}
+
+
def get_markdown(
data: Dict[str, Any],
title: Optional[str] = None,
diff --git a/spacy/tests/package/test_requirements.py b/spacy/tests/package/test_requirements.py
index e20227455..b403f274f 100644
--- a/spacy/tests/package/test_requirements.py
+++ b/spacy/tests/package/test_requirements.py
@@ -17,6 +17,7 @@ def test_build_dependencies():
"types-dataclasses",
"types-mock",
"types-requests",
+ "types-setuptools",
]
# ignore language-specific packages that shouldn't be installed by all
libs_ignore_setup = [
diff --git a/website/docs/api/cli.md b/website/docs/api/cli.md
index cbd1f794a..e5cd3089b 100644
--- a/website/docs/api/cli.md
+++ b/website/docs/api/cli.md
@@ -77,14 +77,15 @@ $ python -m spacy info [--markdown] [--silent] [--exclude]
$ python -m spacy info [model] [--markdown] [--silent] [--exclude]
```
-| Name | Description |
-| ------------------------------------------------ | --------------------------------------------------------------------------------------------- |
-| `model` | A trained pipeline, i.e. package name or path (optional). ~~Optional[str] \(option)~~ |
-| `--markdown`, `-md` | Print information as Markdown. ~~bool (flag)~~ |
-| `--silent`, `-s` 2.0.12 | Don't print anything, just return the values. ~~bool (flag)~~ |
-| `--exclude`, `-e` | Comma-separated keys to exclude from the print-out. Defaults to `"labels"`. ~~Optional[str]~~ |
-| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
-| **PRINTS** | Information about your spaCy installation. |
+| Name | Description |
+| ------------------------------------------------ | ----------------------------------------------------------------------------------------------------------------------- |
+| `model` | A trained pipeline, i.e. package name or path (optional). ~~Optional[str] \(option)~~ |
+| `--markdown`, `-md` | Print information as Markdown. ~~bool (flag)~~ |
+| `--silent`, `-s` 2.0.12 | Don't print anything, just return the values. ~~bool (flag)~~ |
+| `--exclude`, `-e` | Comma-separated keys to exclude from the print-out. Defaults to `"labels"`. ~~Optional[str]~~ |
+| `--url`, `-u` 3.5.0 | Print the URL to download the most recent compatible version of the pipeline. Requires a pipeline name. ~~bool (flag)~~ |
+| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
+| **PRINTS** | Information about your spaCy installation. |
## validate {#validate new="2" tag="command"}
diff --git a/website/docs/usage/models.md b/website/docs/usage/models.md
index 56992e7e3..6971ac8b4 100644
--- a/website/docs/usage/models.md
+++ b/website/docs/usage/models.md
@@ -365,15 +365,32 @@ pipeline package can be found.
To download a trained pipeline directly using
[pip](https://pypi.python.org/pypi/pip), point `pip install` to the URL or local
path of the wheel file or archive. Installing the wheel is usually more
-efficient. To find the direct link to a package, head over to the
-[releases](https://github.com/explosion/spacy-models/releases), right click on
-the archive link and copy it to your clipboard.
+efficient.
+
+> #### Pipeline Package URLs {#pipeline-urls}
+>
+> Pretrained pipeline distributions are hosted on
+> [Github Releases](https://github.com/explosion/spacy-models/releases), and you
+> can find download links there, as well as on the model page. You can also get
+> URLs directly from the command line by using `spacy info` with the `--url`
+> flag, which may be useful for automation.
+>
+> ```bash
+> spacy info en_core_web_sm --url
+> ```
+>
+> This command will print the URL for the latest version of a pipeline
+> compatible with the version of spaCy you're using. Note that in order to look
+> up the compatibility information an internet connection is required.
```bash
# With external URL
$ pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0-py3-none-any.whl
$ pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz
+# Using spacy info to get the external URL
+$ pip install $(spacy info en_core_web_sm --url)
+
# With local file
$ pip install /Users/you/en_core_web_sm-3.0.0-py3-none-any.whl
$ pip install /Users/you/en_core_web_sm-3.0.0.tar.gz
@@ -514,21 +531,16 @@ should be specifying them directly.
Because pipeline packages are valid Python packages, you can add them to your
application's `requirements.txt`. If you're running your own internal PyPi
installation, you can upload the pipeline packages there. pip's
-[requirements file format](https://pip.pypa.io/en/latest/reference/pip_install/#requirements-file-format)
-supports both package names to download via a PyPi server, as well as direct
-URLs.
+[requirements file format](https://pip.pypa.io/en/latest/reference/requirements-file-format/)
+supports both package names to download via a PyPi server, as well as
+[direct URLs](#pipeline-urls).
```text
### requirements.txt
spacy>=3.0.0,<4.0.0
-https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.0.0/en_core_web_sm-3.0.0.tar.gz#egg=en_core_web_sm
+en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.4.0/en_core_web_sm-3.4.0-py3-none-any.whl
```
-Specifying `#egg=` with the package name tells pip which package to expect from
-the download URL. This way, the package won't be re-downloaded and overwritten
-if it's already installed - just like when you're downloading a package from
-PyPi.
-
All pipeline packages are versioned and specify their spaCy dependency. This
ensures cross-compatibility and lets you specify exact version requirements for
each pipeline. If you've [trained](/usage/training) your own pipeline, you can
diff --git a/website/src/templates/models.js b/website/src/templates/models.js
index df53f8c3c..16a2360d5 100644
--- a/website/src/templates/models.js
+++ b/website/src/templates/models.js
@@ -76,6 +76,7 @@ const MODEL_META = {
benchmark_ner: 'NER accuracy',
benchmark_speed: 'Speed',
compat: 'Latest compatible package version for your spaCy installation',
+ download_link: 'Download link for the pipeline',
}
const LABEL_SCHEME_META = {
@@ -138,6 +139,13 @@ function formatAccuracy(data, lang) {
.filter(item => item)
}
+function formatDownloadLink(lang, name, version) {
+ const fullName = `${lang}_${name}-${version}`
+ const filename = `${fullName}-py3-none-any.whl`
+ const url = `https://github.com/explosion/spacy-models/releases/download/${fullName}/${filename}`
+ return {filename}
+}
+
function formatModelMeta(data) {
return {
fullName: `${data.lang}_${data.name}-${data.version}`,
@@ -154,6 +162,7 @@ function formatModelMeta(data) {
labels: isEmptyObj(data.labels) ? null : data.labels,
vectors: formatVectors(data.vectors),
accuracy: formatAccuracy(data.performance, data.lang),
+ download_link: formatDownloadLink(data.lang, data.name, data.version),
}
}
@@ -244,6 +253,7 @@ const Model = ({
{ label: 'Components', content: components, help: MODEL_META.components },
{ label: 'Pipeline', content: pipeline, help: MODEL_META.pipeline },
{ label: 'Vectors', content: meta.vectors, help: MODEL_META.vecs },
+ { label: 'Download Link', content: meta.download_link, help: MODEL_META.download_link },
{ label: 'Sources', content: sources, help: MODEL_META.sources },
{ label: 'Author', content: author },
{ label: 'License', content: license },