From 28e2fec23bf5f654490c8d8f17d551fda190e831 Mon Sep 17 00:00:00 2001 From: ines Date: Wed, 9 Aug 2017 11:52:38 +0200 Subject: [PATCH 1/4] Fix autolinking failure on fresh model install (resolves #1138) On fresh install via subprocess, pip.get_installed_distributions() won't show new model, so is_package check in link command fails. Solution for now is to get model package path explicitly and pass it to link command. --- spacy/cli/download.py | 8 ++++++-- spacy/cli/link.py | 4 ++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/spacy/cli/download.py b/spacy/cli/download.py index b6e5549da..675ae8cee 100644 --- a/spacy/cli/download.py +++ b/spacy/cli/download.py @@ -8,7 +8,7 @@ import subprocess import sys from .link import link -from ..util import prints +from ..util import prints, get_package_path from .. import about @@ -32,7 +32,11 @@ def download(cmd, model, direct=False): version = get_version(model_name, compatibility) download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model_name, v=version)) try: - link(None, model_name, model, force=True) + # Get package path here because link uses + # pip.get_installed_distributions() to check if model is a package, + # which fails if model was just installed via subprocess + package_path = get_package_path(model_name) + link(None, model_name, model, force=True, model_path=package_path) except: # Dirty, but since spacy.download and the auto-linking is mostly # a convenience wrapper, it's best to show a success message and diff --git a/spacy/cli/link.py b/spacy/cli/link.py index a8ee01565..712a05aee 100644 --- a/spacy/cli/link.py +++ b/spacy/cli/link.py @@ -14,7 +14,7 @@ from .. import util link_name=("name of shortuct link to create", "positional", None, str), force=("force overwriting of existing link", "flag", "f", bool) ) -def link(cmd, origin, link_name, force=False): +def link(cmd, origin, link_name, force=False, model_path=None): """ Create a symlink for models within the spacy/data directory. Accepts either the name of a pip package, or the local path to the model data @@ -23,7 +23,7 @@ def link(cmd, origin, link_name, force=False): if util.is_package(origin): model_path = util.get_package_path(origin) else: - model_path = Path(origin) + model_path = Path(origin) if model_path is None else Path(model_path) if not model_path.exists(): prints("The data should be located in %s" % path2str(model_path), title="Can't locate model data", exits=1) From 764540a6dd36b4a51fc6b9f28786aa5ffeaee202 Mon Sep 17 00:00:00 2001 From: ines Date: Wed, 9 Aug 2017 12:16:30 +0200 Subject: [PATCH 2/4] Don't ignore /bin directory --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index 52838918c..cb0a8e84e 100644 --- a/.gitignore +++ b/.gitignore @@ -40,7 +40,6 @@ venv/ # Distribution / packaging env/ -bin/ build/ develop-eggs/ dist/ From 495e0424291e95846fcccb679c938a0a1e8f6ff1 Mon Sep 17 00:00:00 2001 From: ines Date: Wed, 9 Aug 2017 12:17:30 +0200 Subject: [PATCH 3/4] Add entry point-style auto alias for "spacy" Simplest way to run commands as spacy xxx instead of python -m spacy xxx, while avoiding environment conflicts --- MANIFEST.in | 1 + bin/spacy | 1 + setup.py | 1 + 3 files changed, 3 insertions(+) create mode 100644 bin/spacy diff --git a/MANIFEST.in b/MANIFEST.in index 697748835..4d804a23e 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,4 @@ recursive-include include *.h include LICENSE include README.rst +include bin/spacy diff --git a/bin/spacy b/bin/spacy new file mode 100644 index 000000000..29d9a80e5 --- /dev/null +++ b/bin/spacy @@ -0,0 +1 @@ +python -m spacy "$@" diff --git a/setup.py b/setup.py index ecdf15536..0a3384ed5 100755 --- a/setup.py +++ b/setup.py @@ -187,6 +187,7 @@ def setup_package(): url=about['__uri__'], license=about['__license__'], ext_modules=ext_modules, + scripts=['bin/spacy'], install_requires=[ 'numpy>=1.7', 'murmurhash>=0.28,<0.29', From bcce6f7de0d03c86c5c189381d00de16b6cdbb19 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 9 Aug 2017 16:23:12 -0500 Subject: [PATCH 4/4] Fix parser fine tuning --- spacy/syntax/nn_parser.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx index 00835f697..31c3801a2 100644 --- a/spacy/syntax/nn_parser.pyx +++ b/spacy/syntax/nn_parser.pyx @@ -513,7 +513,7 @@ cdef class Parser: self._make_updates(d_tokvecs, backprops, sgd, cuda_stream) d_tokvecs = self.model[0].ops.unflatten(d_tokvecs, [len(d) for d in docs]) - #bp_my_tokvecs(d_tokvecs, sgd=sgd) + bp_my_tokvecs(d_tokvecs, sgd=sgd) return d_tokvecs def _init_gold_batch(self, whole_docs, whole_golds):