From bf43ebbf61097c15e4add76b0163d852fbca3953 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Mon, 29 Jun 2020 16:32:25 +0200 Subject: [PATCH 01/24] fix typo's --- spacy/cli/project.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spacy/cli/project.py b/spacy/cli/project.py index 5011a13f9..185578392 100644 --- a/spacy/cli/project.py +++ b/spacy/cli/project.py @@ -107,7 +107,7 @@ def project_assets_cli( """Use DVC (Data Version Control) to fetch project assets. Assets are defined in the "assets" section of the project config. If possible, DVC will try to track the files so you can pull changes from upstream. It will - also try and store the checksum so the assets are versioned. If th file + also try and store the checksum so the assets are versioned. If the file can't be tracked or checked, it will be downloaded without DVC. If a checksum is provided in the project config, the file is only downloaded if no local file with the same checksum exists. @@ -567,7 +567,7 @@ def run_commands( variables (Dict[str, str]): Dictionary of variable names, mapped to their values. Will be used to substitute format string variables in the commands. - silent (boll): Don't print the commands. + silent (bool): Don't print the commands. """ for command in commands: # Substitute variables, e.g. "./{NAME}.json" From f8dddeda2722cb45f5ef4410b5ff4d7be3faf1bd Mon Sep 17 00:00:00 2001 From: svlandeg Date: Mon, 29 Jun 2020 16:38:15 +0200 Subject: [PATCH 02/24] print help msg when just calling 'project' without args --- spacy/cli/project.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/cli/project.py b/spacy/cli/project.py index 185578392..44ff78f2b 100644 --- a/spacy/cli/project.py +++ b/spacy/cli/project.py @@ -202,7 +202,7 @@ def project_update_dvc_cli( msg.info(f"No changes found in {CONFIG_FILE}, no update needed") -app.add_typer(project_cli, name="project") +app.add_typer(project_cli, name="project", no_args_is_help=True) ################# From 3487214ba182572a8696152ec671f08078e0dc91 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Mon, 29 Jun 2020 17:45:47 +0200 Subject: [PATCH 03/24] fix shlex.split for non-posix --- spacy/cli/project.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/spacy/cli/project.py b/spacy/cli/project.py index 44ff78f2b..1196eb3dd 100644 --- a/spacy/cli/project.py +++ b/spacy/cli/project.py @@ -14,6 +14,7 @@ import tqdm from ._app import app, Arg, Opt, COMMAND, NAME from .. import about +from ..compat import is_windows from ..schemas import ProjectConfigSchema, validate from ..util import ensure_path, run_command, make_tempdir, working_dir from ..util import get_hash, get_checksum @@ -234,7 +235,10 @@ def project_clone( # We're using Git and sparse checkout to only clone the files we need with make_tempdir() as tmp_dir: cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 --config core.sparseCheckout=true" - run_command(shlex.split(cmd)) + try: + run_command(shlex.split(cmd, posix=not is_windows)) + except: + raise RuntimeError(f"Could not clone the repo '{repo}' into the temp dir '{tmp_dir}'.") with (tmp_dir / ".git" / "info" / "sparse-checkout").open("w") as f: f.write(name) run_command(["git", "-C", tmp_dir, "fetch"]) From efe7eb71f2e3949f41bacbcc390177243fc4c727 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Mon, 29 Jun 2020 17:46:08 +0200 Subject: [PATCH 04/24] create subfolder in working dir --- spacy/cli/project.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spacy/cli/project.py b/spacy/cli/project.py index 1196eb3dd..7470e23ea 100644 --- a/spacy/cli/project.py +++ b/spacy/cli/project.py @@ -82,6 +82,8 @@ def project_clone_cli( initializing DVC (Data Version Control). This allows DVC to integrate with Git. """ + if dest == Path.cwd(): + dest = dest / name project_clone(name, dest, repo=repo, git=git, no_init=no_init) From 894b8e7ff652a26c71ebbe6fa3d75a25104991c8 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Mon, 29 Jun 2020 18:16:39 +0200 Subject: [PATCH 05/24] throw warning (instead of crashing) when temp dir can't be cleaned --- spacy/cli/project.py | 4 ++-- spacy/errors.py | 1 + spacy/util.py | 5 ++++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/spacy/cli/project.py b/spacy/cli/project.py index 7470e23ea..dd6b4da11 100644 --- a/spacy/cli/project.py +++ b/spacy/cli/project.py @@ -243,8 +243,8 @@ def project_clone( raise RuntimeError(f"Could not clone the repo '{repo}' into the temp dir '{tmp_dir}'.") with (tmp_dir / ".git" / "info" / "sparse-checkout").open("w") as f: f.write(name) - run_command(["git", "-C", tmp_dir, "fetch"]) - run_command(["git", "-C", tmp_dir, "checkout"]) + run_command(["git", "-C", str(tmp_dir), "fetch"]) + run_command(["git", "-C", str(tmp_dir), "checkout"]) shutil.move(str(tmp_dir / Path(name).name), str(project_dir)) msg.good(f"Cloned project '{name}' from {repo}") for sub_dir in DIRS: diff --git a/spacy/errors.py b/spacy/errors.py index b3e6efdd4..c54aa804b 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -132,6 +132,7 @@ class Warnings(object): "are currently: da, de, el, en, id, lb, pt, ru, sr, ta, th.") # TODO: fix numbering after merging develop into master + W091 = ("Could not clean/remove the temp directory at {dir}.") W092 = ("Ignoring annotations for sentence starts, as dependency heads are set.") W093 = ("Could not find any data to train the {name} on. Is your " "input data correctly formatted ?") diff --git a/spacy/util.py b/spacy/util.py index 3f0a1ec6f..5be83e20f 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -467,7 +467,10 @@ def make_tempdir(): """ d = Path(tempfile.mkdtemp()) yield d - shutil.rmtree(str(d)) + try: + shutil.rmtree(str(d)) + except PermissionError: + warnings.warn(Warnings.W091.format(dir=d)) def get_hash(data) -> str: From ff233d5743a24d445d6e6c1e70790d71c6147ec2 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Mon, 29 Jun 2020 18:22:33 +0200 Subject: [PATCH 06/24] print details on error msg (e.g. PermissionError on specific file) --- spacy/errors.py | 2 +- spacy/util.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/spacy/errors.py b/spacy/errors.py index c54aa804b..1af673569 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -132,7 +132,7 @@ class Warnings(object): "are currently: da, de, el, en, id, lb, pt, ru, sr, ta, th.") # TODO: fix numbering after merging develop into master - W091 = ("Could not clean/remove the temp directory at {dir}.") + W091 = ("Could not clean/remove the temp directory at {dir}: {msg}.") W092 = ("Ignoring annotations for sentence starts, as dependency heads are set.") W093 = ("Could not find any data to train the {name} on. Is your " "input data correctly formatted ?") diff --git a/spacy/util.py b/spacy/util.py index 5be83e20f..cdaed7a92 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -469,8 +469,8 @@ def make_tempdir(): yield d try: shutil.rmtree(str(d)) - except PermissionError: - warnings.warn(Warnings.W091.format(dir=d)) + except PermissionError as e: + warnings.warn(Warnings.W091.format(dir=d, msg=e)) def get_hash(data) -> str: From 1176783310095c7c31ba9963a2aa1d1266c66707 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Mon, 29 Jun 2020 18:37:42 +0200 Subject: [PATCH 07/24] fix one more shlex.split --- spacy/cli/project.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/spacy/cli/project.py b/spacy/cli/project.py index dd6b4da11..16e87cb40 100644 --- a/spacy/cli/project.py +++ b/spacy/cli/project.py @@ -246,7 +246,7 @@ def project_clone( run_command(["git", "-C", str(tmp_dir), "fetch"]) run_command(["git", "-C", str(tmp_dir), "checkout"]) shutil.move(str(tmp_dir / Path(name).name), str(project_dir)) - msg.good(f"Cloned project '{name}' from {repo}") + msg.good(f"Cloned project '{name}' from {repo} into {project_dir}") for sub_dir in DIRS: dir_path = project_dir / sub_dir if not dir_path.exists(): @@ -484,7 +484,7 @@ def update_dvc_config( path = path.resolve() dvc_config_path = path / DVC_CONFIG if dvc_config_path.exists(): - # Cneck if the file was generated using the current config, if not, redo + # Check if the file was generated using the current config, if not, redo with dvc_config_path.open("r", encoding="utf8") as f: ref_hash = f.readline().strip().replace("# ", "") if ref_hash == config_hash and not force: @@ -578,7 +578,7 @@ def run_commands( for command in commands: # Substitute variables, e.g. "./{NAME}.json" command = command.format(**variables) - command = shlex.split(command) + command = shlex.split(command, posix=not is_windows) # TODO: is this needed / a good idea? if len(command) and command[0] == "python": command[0] = sys.executable From 7e4cbda89ac86cb2e45701c8335f7bfb9f8f4e33 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Tue, 30 Jun 2020 11:09:53 +0200 Subject: [PATCH 08/24] fix project_init for relative path --- spacy/cli/project.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spacy/cli/project.py b/spacy/cli/project.py index 16e87cb40..4588e3336 100644 --- a/spacy/cli/project.py +++ b/spacy/cli/project.py @@ -284,8 +284,8 @@ def project_init( # opt-in explicitly. If they want it, they can always enable it. if not analytics: run_command(["dvc", "config", "core.analytics", "false"]) - config = load_project_config(project_dir) - setup_check_dvc(project_dir, config) + config = load_project_config(project_dir) + setup_check_dvc(project_dir, config) def project_assets(project_dir: Path) -> None: From d23be563eb8ee4e72b9389e675a7e4dcb7140941 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Tue, 30 Jun 2020 11:23:35 +0200 Subject: [PATCH 09/24] remove redundant setting of no_args_is_help --- spacy/cli/project.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/cli/project.py b/spacy/cli/project.py index 3d2aa9440..50517d594 100644 --- a/spacy/cli/project.py +++ b/spacy/cli/project.py @@ -207,7 +207,7 @@ def project_update_dvc_cli( msg.info(f"No changes found in {CONFIG_FILE}, no update needed") -app.add_typer(project_cli, name="project", no_args_is_help=True) +app.add_typer(project_cli, name="project") ################# From 140c4896a0c8fb0f5798aaa6785d6f169d5f6ce8 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Tue, 30 Jun 2020 12:54:15 +0200 Subject: [PATCH 10/24] split_command util function --- spacy/cli/project.py | 8 +++----- spacy/util.py | 10 +++++++--- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/spacy/cli/project.py b/spacy/cli/project.py index 50517d594..c82e4e774 100644 --- a/spacy/cli/project.py +++ b/spacy/cli/project.py @@ -4,7 +4,6 @@ import srsly from pathlib import Path from wasabi import msg import subprocess -import shlex import os import re import shutil @@ -14,10 +13,9 @@ import tqdm from ._app import app, Arg, Opt, COMMAND, NAME from .. import about -from ..compat import is_windows from ..schemas import ProjectConfigSchema, validate from ..util import ensure_path, run_command, make_tempdir, working_dir -from ..util import get_hash, get_checksum +from ..util import get_hash, get_checksum, split_command CONFIG_FILE = "project.yml" @@ -240,7 +238,7 @@ def project_clone( with make_tempdir() as tmp_dir: cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 --config core.sparseCheckout=true" try: - run_command(shlex.split(cmd, posix=not is_windows)) + run_command(split_command(cmd)) except: raise RuntimeError(f"Could not clone the repo '{repo}' into the temp dir '{tmp_dir}'.") with (tmp_dir / ".git" / "info" / "sparse-checkout").open("w") as f: @@ -599,7 +597,7 @@ def run_commands( for command in commands: # Substitute variables, e.g. "./{NAME}.json" command = command.format(**variables) - command = shlex.split(command, posix=not is_windows) + command = split_command(command) # TODO: is this needed / a good idea? if len(command) and command[0] in ("python", "python3"): command[0] = sys.executable diff --git a/spacy/util.py b/spacy/util.py index cdaed7a92..a61bbf044 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -22,7 +22,7 @@ from contextlib import contextmanager import tempfile import shutil import hashlib - +import shlex try: import cupy.random @@ -35,7 +35,7 @@ except ImportError: import importlib_metadata from .symbols import ORTH -from .compat import cupy, CudaStream +from .compat import cupy, CudaStream, is_windows from .errors import Errors, Warnings from . import about @@ -925,7 +925,11 @@ def from_disk(path, readers, exclude): # Split to support file names like meta.json if key.split(".")[0] not in exclude: reader(path / key) - return path + return + + +def split_command(command): + return shlex.split(command, posix=not is_windows) def import_file(name, loc): From 7584fdafec2022bb19bd458eca07f1903c98ef20 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Tue, 30 Jun 2020 12:59:13 +0200 Subject: [PATCH 11/24] Fix typo --- spacy/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/util.py b/spacy/util.py index a61bbf044..7f70a131a 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -925,7 +925,7 @@ def from_disk(path, readers, exclude): # Split to support file names like meta.json if key.split(".")[0] not in exclude: reader(path / key) - return + return path def split_command(command): From 3aca404735eab7be48fffcc19dc905ee6db753a5 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Tue, 30 Jun 2020 13:17:00 +0200 Subject: [PATCH 12/24] Make run_command take string and list --- spacy/util.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/spacy/util.py b/spacy/util.py index 7f70a131a..4b0bdbae5 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -434,11 +434,24 @@ def get_package_path(name): return Path(pkg.__file__).parent -def run_command(command: List[str]) -> None: - """Run a command on the command line as a subprocess. +def split_command(command: str) -> List[str]: + """Split a string command using shlex. Handles platform compatibility. - command (list): The split command. + command (str) : The command to split + RETURNS (List[str]): The split command. """ + return shlex.split(command, posix=not is_windows) + + +def run_command(command: Union[str, List[str]]) -> None: + """Run a command on the command line as a subprocess. If the subprocess + returns a non-zero exit code, a system exit is performed. + + command (str / List[str]): The command. If provided as a string, the + string will be split using shlex.split. + """ + if isinstance(command, str): + command = split_command(command) status = subprocess.call(command, env=os.environ.copy()) if status != 0: sys.exit(status) @@ -928,10 +941,6 @@ def from_disk(path, readers, exclude): return path -def split_command(command): - return shlex.split(command, posix=not is_windows) - - def import_file(name, loc): """Import module from a file. Used to load models from a directory. From c5e31acb06bb52358dd0097ebd459f3b9117115c Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Tue, 30 Jun 2020 13:17:14 +0200 Subject: [PATCH 13/24] Make working_dir yield absolute cwd path --- spacy/util.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/spacy/util.py b/spacy/util.py index 4b0bdbae5..f8acebb63 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -462,11 +462,14 @@ def working_dir(path: Union[str, Path]) -> None: """Change current working directory and returns to previous on exit. path (str / Path): The directory to navigate to. + YIELDS (Path): The absolute path to the current working directory. This + should be used if the block needs to perform actions within the working + directory, to prevent mismatches with relative paths. """ prev_cwd = Path.cwd() os.chdir(str(path)) try: - yield + yield Path(path).resolve() finally: os.chdir(prev_cwd) From 72175b5c601c7b76f75d2e688b723489e2331b9d Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Tue, 30 Jun 2020 13:17:26 +0200 Subject: [PATCH 14/24] Update project command --- spacy/cli/project.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/spacy/cli/project.py b/spacy/cli/project.py index c82e4e774..6ac604bdf 100644 --- a/spacy/cli/project.py +++ b/spacy/cli/project.py @@ -15,7 +15,7 @@ from ._app import app, Arg, Opt, COMMAND, NAME from .. import about from ..schemas import ProjectConfigSchema, validate from ..util import ensure_path, run_command, make_tempdir, working_dir -from ..util import get_hash, get_checksum, split_command +from ..util import get_hash, get_checksum CONFIG_FILE = "project.yml" @@ -238,9 +238,10 @@ def project_clone( with make_tempdir() as tmp_dir: cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 --config core.sparseCheckout=true" try: - run_command(split_command(cmd)) - except: - raise RuntimeError(f"Could not clone the repo '{repo}' into the temp dir '{tmp_dir}'.") + run_command(cmd) + except SystemExit: + err = f"Could not clone the repo '{repo}' into the temp dir '{tmp_dir}'" + msg.fail(err) with (tmp_dir / ".git" / "info" / "sparse-checkout").open("w") as f: f.write(name) run_command(["git", "-C", str(tmp_dir), "fetch"]) @@ -272,8 +273,7 @@ def project_init( silent (bool): Don't print any output (via DVC). analytics (bool): Opt-in to DVC analytics (defaults to False). """ - project_dir = project_dir.resolve() - with working_dir(project_dir): + with working_dir(project_dir.resolve()) as cwd: if git: run_command(["git", "init"]) init_cmd = ["dvc", "init"] @@ -292,11 +292,11 @@ def project_init( # TODO: maybe we shouldn't do this, but it's otherwise super confusing # once you commit your changes via Git and it creates a bunch of files # that have no purpose - plots_dir = project_dir / DVC_DIR / "plots" + plots_dir = cwd / DVC_DIR / "plots" if plots_dir.exists(): shutil.rmtree(str(plots_dir)) - config = load_project_config(project_dir) - setup_check_dvc(project_dir, config) + config = load_project_config(cwd) + setup_check_dvc(cwd, config) def project_assets(project_dir: Path) -> None: @@ -597,8 +597,13 @@ def run_commands( for command in commands: # Substitute variables, e.g. "./{NAME}.json" command = command.format(**variables) - command = split_command(command) - # TODO: is this needed / a good idea? + # Not sure if this is needed or a good idea. Motivation: users may often + # use commands in their config that reference "python" and we want to + # make sure that it's always executing the same Python that spaCy is + # executed with and the pip in the same env, not some other Python/pip. + # Also ensures cross-compatibility if user 1 writes "python3" (because + # that's how it's set up on their system), and user 2 without the + # shortcut tries to re-run the command. if len(command) and command[0] in ("python", "python3"): command[0] = sys.executable elif len(command) and command[0] in ("pip", "pip3"): From 8e205059705a97bd1b7b978b787c1878bbbb5309 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Tue, 30 Jun 2020 13:29:45 +0200 Subject: [PATCH 15/24] Resolve within working_dir context manager --- spacy/cli/project.py | 2 +- spacy/util.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/spacy/cli/project.py b/spacy/cli/project.py index 6ac604bdf..f0315dfe0 100644 --- a/spacy/cli/project.py +++ b/spacy/cli/project.py @@ -273,7 +273,7 @@ def project_init( silent (bool): Don't print any output (via DVC). analytics (bool): Opt-in to DVC analytics (defaults to False). """ - with working_dir(project_dir.resolve()) as cwd: + with working_dir(project_dir) as cwd: if git: run_command(["git", "init"]) init_cmd = ["dvc", "init"] diff --git a/spacy/util.py b/spacy/util.py index f8acebb63..1d998ec36 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -467,11 +467,12 @@ def working_dir(path: Union[str, Path]) -> None: directory, to prevent mismatches with relative paths. """ prev_cwd = Path.cwd() - os.chdir(str(path)) + current = Path(path).resolve() + os.chdir(str(current)) try: - yield Path(path).resolve() + yield current finally: - os.chdir(prev_cwd) + os.chdir(str(prev_cwd)) @contextmanager From b2281119259681f04ffe178579efcb16a9b06f81 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Tue, 30 Jun 2020 14:54:45 +0200 Subject: [PATCH 16/24] fix funny printing --- spacy/cli/project.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/cli/project.py b/spacy/cli/project.py index f0315dfe0..1c6d36edf 100644 --- a/spacy/cli/project.py +++ b/spacy/cli/project.py @@ -609,7 +609,7 @@ def run_commands( elif len(command) and command[0] in ("pip", "pip3"): command = [sys.executable, "-m", "pip", *command[1:]] if not silent: - print(" ".join(command)) + print(f"Running command: {command}") run_command(command) From a46b76f188290adc17e3bf5f883c089c18383ec7 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Tue, 30 Jun 2020 15:39:24 +0200 Subject: [PATCH 17/24] use current working dir as default throughout --- spacy/cli/project.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/spacy/cli/project.py b/spacy/cli/project.py index 1c6d36edf..ce2f801ad 100644 --- a/spacy/cli/project.py +++ b/spacy/cli/project.py @@ -88,7 +88,7 @@ def project_clone_cli( @project_cli.command("init") def project_init_cli( - path: Path = Arg(..., help="Path to cloned project", exists=True, file_okay=False), + path: Path = Arg(Path.cwd(), help="Path to cloned project. Defaults to current working directory.", exists=True, file_okay=False), git: bool = Opt(False, "--git", "-G", help="Initialize project as a Git repo"), force: bool = Opt(False, "--force", "-F", help="Force initiziation"), ): @@ -104,7 +104,7 @@ def project_init_cli( @project_cli.command("assets") def project_assets_cli( # fmt: off - project_dir: Path = Arg(..., help="Path to cloned project", exists=True, file_okay=False), + project_dir: Path = Arg(Path.cwd(), help="Path to cloned project. Defaults to current working directory.", exists=True, file_okay=False), # fmt: on ): """Use DVC (Data Version Control) to fetch project assets. Assets are @@ -125,7 +125,7 @@ def project_assets_cli( def project_run_all_cli( # fmt: off ctx: typer.Context, - project_dir: Path = Arg(..., help="Location of project directory", exists=True, file_okay=False), + project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False), show_help: bool = Opt(False, "--help", help="Show help message and available subcommands") # fmt: on ): @@ -149,7 +149,7 @@ def project_run_all_cli( def project_run_cli( # fmt: off ctx: typer.Context, - project_dir: Path = Arg(..., help="Location of project directory", exists=True, file_okay=False), + project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False), subcommand: str = Arg(None, help="Name of command defined in project config"), show_help: bool = Opt(False, "--help", help="Show help message and available subcommands") # fmt: on @@ -173,7 +173,7 @@ def project_run_cli( @project_cli.command("exec", hidden=True) def project_exec_cli( # fmt: off - project_dir: Path = Arg(..., help="Location of project directory", exists=True, file_okay=False), + project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False), subcommand: str = Arg(..., help="Name of command defined in project config"), # fmt: on ): @@ -188,7 +188,7 @@ def project_exec_cli( @project_cli.command("update-dvc") def project_update_dvc_cli( # fmt: off - project_dir: Path = Arg(..., help="Location of project directory", exists=True, file_okay=False), + project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False), verbose: bool = Opt(False, "--verbose", "-V", help="Print more info"), force: bool = Opt(False, "--force", "-F", help="Force update DVC config"), # fmt: on From 1ae6fa2554e067f79ea4290a08b36b770e754c07 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Tue, 30 Jun 2020 16:04:53 +0200 Subject: [PATCH 18/24] move subcommand one place up as project_dir has default --- spacy/cli/project.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spacy/cli/project.py b/spacy/cli/project.py index ce2f801ad..ce7c30fb4 100644 --- a/spacy/cli/project.py +++ b/spacy/cli/project.py @@ -149,8 +149,8 @@ def project_run_all_cli( def project_run_cli( # fmt: off ctx: typer.Context, - project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False), subcommand: str = Arg(None, help="Name of command defined in project config"), + project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False), show_help: bool = Opt(False, "--help", help="Show help message and available subcommands") # fmt: on ): @@ -173,8 +173,8 @@ def project_run_cli( @project_cli.command("exec", hidden=True) def project_exec_cli( # fmt: off - project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False), subcommand: str = Arg(..., help="Name of command defined in project config"), + project_dir: Path = Arg(Path.cwd(), help="Location of project directory. Defaults to current working directory.", exists=True, file_okay=False), # fmt: on ): """Execute a command defined in the project config. This CLI command is From cd632d8ec23c48d59cad982ad60fd619b993deb0 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Tue, 30 Jun 2020 17:19:36 +0200 Subject: [PATCH 19/24] move folder for exec argument one up --- spacy/cli/project.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/cli/project.py b/spacy/cli/project.py index ce7c30fb4..59c515dcb 100644 --- a/spacy/cli/project.py +++ b/spacy/cli/project.py @@ -525,7 +525,7 @@ def update_dvc_config( continue # Default to "." as the project path since dvc.yaml is auto-generated # and we don't want arbitrary paths in there - project_cmd = ["python", "-m", NAME, "project", "exec", ".", name] + project_cmd = ["python", "-m", NAME, "project", ".", "exec", name] deps_cmd = [c for cl in [["-d", p] for p in deps] for c in cl] outputs_cmd = [c for cl in [["-o", p] for p in outputs] for c in cl] outputs_nc_cmd = [c for cl in [["-O", p] for p in outputs_no_cache] for c in cl] From 39953c7c60e97cdd9af5b90e4a26634528548a19 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Tue, 30 Jun 2020 17:28:09 +0200 Subject: [PATCH 20/24] fix print_run_help with new arg order --- spacy/cli/project.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spacy/cli/project.py b/spacy/cli/project.py index 59c515dcb..6d0ec7991 100644 --- a/spacy/cli/project.py +++ b/spacy/cli/project.py @@ -397,13 +397,13 @@ def print_run_help(project_dir: Path, subcommand: Optional[str] = None) -> None: commands = {cmd["name"]: cmd for cmd in config_commands} if subcommand: validate_subcommand(commands.keys(), subcommand) - print(f"Usage: {COMMAND} project run {project_dir} {subcommand}") + print(f"Usage: {COMMAND} project run {subcommand} {project_dir}") help_text = commands[subcommand].get("help") if help_text: msg.text(f"\n{help_text}\n") else: print(f"\nAvailable commands in {CONFIG_FILE}") - print(f"Usage: {COMMAND} project run {project_dir} [COMMAND]") + print(f"Usage: {COMMAND} project run [COMMAND] {project_dir}") msg.table([(cmd["name"], cmd.get("help", "")) for cmd in config_commands]) msg.text("Run all commands defined in the 'run' block of the project config:") print(f"{COMMAND} project run-all {project_dir}") From 60f97bc519ecf607f7711c3ae0402f89d939989b Mon Sep 17 00:00:00 2001 From: svlandeg Date: Tue, 30 Jun 2020 17:28:43 +0200 Subject: [PATCH 21/24] add custom warning when run_command fails --- spacy/errors.py | 1 + spacy/util.py | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/spacy/errors.py b/spacy/errors.py index 1af673569..66a3c61da 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -539,6 +539,7 @@ class Errors(object): E199 = ("Unable to merge 0-length span at doc[{start}:{end}].") # TODO: fix numbering after merging develop into master + E970 = ("Can not execute command '{str_command}'. Do you have '{tool}' installed?") E971 = ("Found incompatible lengths in Doc.from_array: {array_length} for the " "array and {doc_length} for the Doc itself.") E972 = ("Example.__init__ got None for '{arg}'. Requires Doc.") diff --git a/spacy/util.py b/spacy/util.py index 1d998ec36..7c29bed8e 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -452,7 +452,12 @@ def run_command(command: Union[str, List[str]]) -> None: """ if isinstance(command, str): command = split_command(command) - status = subprocess.call(command, env=os.environ.copy()) + try: + status = subprocess.call(command, env=os.environ.copy()) + except FileNotFoundError: + raise FileNotFoundError( + Errors.E970.format(str_command=" ".join(command), tool=command[0]) + ) if status != 0: sys.exit(status) From 6da3500728af537df22f7b147a624956b74c4397 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Tue, 30 Jun 2020 20:35:51 +0200 Subject: [PATCH 22/24] Fix command substitution --- spacy/cli/project.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/spacy/cli/project.py b/spacy/cli/project.py index 6d0ec7991..4cda55956 100644 --- a/spacy/cli/project.py +++ b/spacy/cli/project.py @@ -15,7 +15,7 @@ from ._app import app, Arg, Opt, COMMAND, NAME from .. import about from ..schemas import ProjectConfigSchema, validate from ..util import ensure_path, run_command, make_tempdir, working_dir -from ..util import get_hash, get_checksum +from ..util import get_hash, get_checksum, split_command CONFIG_FILE = "project.yml" @@ -588,7 +588,7 @@ def run_commands( ) -> None: """Run a sequence of commands in a subprocess, in order. - commands (List[str]): The split commands. + commands (List[str]): The string commands. variables (Dict[str, str]): Dictionary of variable names, mapped to their values. Will be used to substitute format string variables in the commands. @@ -597,6 +597,7 @@ def run_commands( for command in commands: # Substitute variables, e.g. "./{NAME}.json" command = command.format(**variables) + command = split_command(command) # Not sure if this is needed or a good idea. Motivation: users may often # use commands in their config that reference "python" and we want to # make sure that it's always executing the same Python that spaCy is From d64644d9d1f353fbb7167a93ed8d2b79c7a66864 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Tue, 30 Jun 2020 20:36:30 +0200 Subject: [PATCH 23/24] Adjust auto-formatting --- spacy/cli/project.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spacy/cli/project.py b/spacy/cli/project.py index 4cda55956..cba8a07dc 100644 --- a/spacy/cli/project.py +++ b/spacy/cli/project.py @@ -88,9 +88,11 @@ def project_clone_cli( @project_cli.command("init") def project_init_cli( + # fmt: off path: Path = Arg(Path.cwd(), help="Path to cloned project. Defaults to current working directory.", exists=True, file_okay=False), git: bool = Opt(False, "--git", "-G", help="Initialize project as a Git repo"), force: bool = Opt(False, "--force", "-F", help="Force initiziation"), + # fmt: on ): """Initialize a project directory with DVC and optionally Git. This should typically be taken care of automatically when you run the "project clone" From b032943c34fd79a724ca87cb289efd5972f30b28 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Tue, 30 Jun 2020 21:33:41 +0200 Subject: [PATCH 24/24] Fix funny printing again --- spacy/cli/project.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/cli/project.py b/spacy/cli/project.py index cba8a07dc..5f125816c 100644 --- a/spacy/cli/project.py +++ b/spacy/cli/project.py @@ -612,7 +612,7 @@ def run_commands( elif len(command) and command[0] in ("pip", "pip3"): command = [sys.executable, "-m", "pip", *command[1:]] if not silent: - print(f"Running command: {command}") + print(f"Running command: {' '.join(command)}") run_command(command)