From a6b76440b766a16afba8716118275ea79b918ff4 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Mon, 22 Jun 2020 14:53:31 +0200 Subject: [PATCH] Update project CLI --- spacy/cli/project.py | 126 +++++++++++++++++++++++++++++++------------ spacy/schemas.py | 10 ++-- spacy/tests/util.py | 9 +--- spacy/util.py | 9 ++++ 4 files changed, 110 insertions(+), 44 deletions(-) diff --git a/spacy/cli/project.py b/spacy/cli/project.py index c33f6a395..454b99d25 100644 --- a/spacy/cli/project.py +++ b/spacy/cli/project.py @@ -7,15 +7,26 @@ import subprocess import shlex import os import re +import shutil -from ._app import app, Arg, Opt +from ._app import app, Arg, Opt, COMMAND from .. import about from ..schemas import ProjectConfigSchema, validate -from ..util import ensure_path, run_command +from ..util import ensure_path, run_command, make_tempdir, working_dir CONFIG_FILE = "project.yml" -DIRS = ["assets", "configs", "packages", "metrics", "scripts", "notebooks", "training"] +DIRS = [ + "assets", + "metas", + "configs", + "packages", + "metrics", + "scripts", + "notebooks", + "training", + "corpus", +] CACHES = [ Path.home() / ".torch", Path.home() / ".caches" / "torch", @@ -45,28 +56,37 @@ def callback(): def project_clone_cli( # fmt: off name: str = Arg(..., help="The name of the template to fetch"), - dest: Path = Arg(Path.cwd(), help="Where to download and work. Defaults to current working directory.", exists=True, file_okay=False), + dest: Path = Arg(Path.cwd(), help="Where to download and work. Defaults to current working directory.", exists=False), repo: str = Opt(about.__projects__, "--repo", "-r", help="The repository to look in."), + verbose: bool = Opt(False, "--verbose", "-V", help="Show detailed information") # fmt: on ): """Clone a project template from a repository.""" - project_clone(name, dest, repo=repo) + project_clone(name, dest, repo=repo, verbose=verbose) -def project_clone(name: str, dest: Path, repo: str = about.__projects__) -> None: +def project_clone( + name: str, dest: Path, *, repo: str = about.__projects__, verbose: bool = False +) -> None: dest = ensure_path(dest) - if not dest or not dest.exists() or not dest.is_dir(): - msg.fail("Not a valid directory to clone project", dest, exits=1) - cmd = ["dvc", "get", repo, name, "-o", str(dest)] - msg.info(" ".join(cmd)) - run_command(cmd) + check_clone_dest(dest) + # When cloning a subdirectory with DVC, it will create a folder of that name + # within the destination dir, so we use a tempdir and then copy it into the + # parent directory to create the cloned directory + with make_tempdir() as tmp_dir: + cmd = ["dvc", "get", repo, name, "-o", str(tmp_dir)] + if verbose: + cmd.append("-v") + print(" ".join(cmd)) + run_command(cmd) + shutil.move(str(tmp_dir / Path(name).name), str(dest)) msg.good(f"Cloned project '{name}' from {repo}") - with msg.loading("Setting up directories..."): - for sub_dir in DIRS: - dir_path = dest / sub_dir - if not dir_path.exists(): - dir_path.mkdir(parents=True) + for sub_dir in DIRS: + dir_path = dest / sub_dir + if not dir_path.exists(): + dir_path.mkdir(parents=True) msg.good(f"Your project is now ready!", dest.resolve()) + print(f"To get the assets, run:\npython -m spacy project get-assets {dest}") @project_cli.command("get-assets") @@ -91,7 +111,6 @@ def project_get_assets(project_path: Path) -> None: dest_path = project_path / dest check_asset(url) cmd = ["dvc", "get-url", url, str(dest_path)] - msg.info(" ".join(cmd)) run_command(cmd) msg.good(f"Got asset {dest}") @@ -100,11 +119,33 @@ def project_get_assets(project_path: Path) -> None: def project_run_cli( # fmt: off project_dir: Path = Arg(..., help="Location of project directory", exists=True, file_okay=False), - subcommand: str = Arg(None, help="Name of command defined in project config") + subcommand: str = Arg(None, help="Name of command defined in project config"), + show_help: bool = Opt(False, "--help", help="Show help message and available subcommands") # fmt: on ): """Run scripts defined in the project.""" - project_run(project_dir, subcommand) + if show_help: + print_run_help(project_dir, subcommand) + else: + project_run(project_dir, subcommand) + + +def print_run_help(project_dir: Path, subcommand: str) -> None: + """Simulate a CLI help prompt using the info available in the project config.""" + config = load_project_config(project_dir) + config_commands = config.get("commands", []) + commands = {cmd["name"]: cmd for cmd in config_commands} + if subcommand: + if subcommand not in commands: + msg.fail(f"Can't find command '{subcommand}' in project config", exits=1) + print(f"Usage: {COMMAND} project run {project_dir} {subcommand}") + help_text = commands[subcommand].get("help") + if help_text: + msg.text(f"\n{help_text}\n") + else: + print(f"\nAvailable commands in {CONFIG_FILE}") + print(f"Usage: {COMMAND} project run {project_dir} [COMMAND]") + msg.table([(cmd["name"], cmd.get("help", "")) for cmd in config_commands]) def project_run(project_dir: Path, subcommand: str) -> None: @@ -112,20 +153,23 @@ def project_run(project_dir: Path, subcommand: str) -> None: config_commands = config.get("commands", []) variables = config.get("variables", {}) commands = {cmd["name"]: cmd for cmd in config_commands} - if subcommand is None: - all_commands = config.get("run", []) - if not all_commands: - msg.warn("No run commands defined in project config", exits=0) - msg.table([(cmd["name"], cmd.get("help", "")) for cmd in config_commands]) - for command in all_commands: - if command not in commands: - msg.fail(f"Can't find command '{command}' in project config", exits=1) - msg.divider(command) - run_commands(commands[command]["script"], variables) - return - if subcommand not in commands: + if subcommand and subcommand not in commands: msg.fail(f"Can't find command '{subcommand}' in project config", exits=1) - run_commands(commands[subcommand]["script"], variables) + with working_dir(project_dir): + if subcommand is None: + all_commands = config.get("run", []) + if not all_commands: + msg.warn("No run commands defined in project config", exits=0) + msg.table([(cmd["name"], cmd.get("help", "")) for cmd in config_commands]) + for command in all_commands: + if command not in commands: + msg.fail( + f"Can't find command '{command}' in project config", exits=1 + ) + msg.divider(command) + run_commands(commands[command]["script"], variables) + else: + run_commands(commands[subcommand]["script"], variables) app.add_typer(project_cli, name="project") @@ -146,7 +190,7 @@ def run_commands(commands: List[str] = tuple(), variables: Dict[str, str] = {}) for command in commands: # Substitute variables, e.g. "./{NAME}.json" command = command.format(**variables) - msg.info(command) + print(command) run_command(shlex.split(command)) @@ -160,3 +204,19 @@ def check_asset(url: str) -> None: "download the raw file, click on 'Download' on the GitHub page " "and copy the raw.githubusercontent.com URL instead." ) + # url.replace("github.com", "raw.githubusercontent.com").replace("/blob/", "/").replace("/tree/", "/") + + +def check_clone_dest(dest: Path) -> None: + """Check and validate that the destination path can be used to clone.""" + if not dest: + msg.fail(f"Not a valid directory to clone project: {dest}", exits=1) + if dest.exists(): + # Directory already exists (not allowed, clone needs to create it) + msg.fail(f"Can't clone project, directory already exists: {dest}", exits=1) + if not dest.parent.exists(): + # We're not creating parents, parent dir should exist + msg.fail( + f"Can't clone project, parent directory doesn't exist: {dest.parent}", + exits=1, + ) diff --git a/spacy/schemas.py b/spacy/schemas.py index 04f9bbffa..43694b325 100644 --- a/spacy/schemas.py +++ b/spacy/schemas.py @@ -229,11 +229,15 @@ class ProjectConfigCommand(BaseModel): name: StrictStr = Field(..., title="Name of command") help: Optional[StrictStr] = Field(None, title="Command description") script: List[StrictStr] = Field([], title="List of CLI commands to run, in order") - dvc_deps: List[StrictStr] = Field([], title="Data Version Control dependencies") - dvc_outputs: List[StrictStr] = Field([], title="Data Version Control outputs") - dvc_outputs_no_cache: List[StrictStr] = Field([], title="Data Version Control outputs (no cache)") + deps: List[StrictStr] = Field([], title="Data Version Control dependencies") + outputs: List[StrictStr] = Field([], title="Data Version Control outputs") + outputs_no_cache: List[StrictStr] = Field([], title="Data Version Control outputs (no cache)") # fmt: on + class Config: + title = "A single named command specified in a project config" + extra = "forbid" + class ProjectConfigSchema(BaseModel): # fmt: off diff --git a/spacy/tests/util.py b/spacy/tests/util.py index 3d0a023c9..01c4254c4 100644 --- a/spacy/tests/util.py +++ b/spacy/tests/util.py @@ -8,8 +8,8 @@ from pathlib import Path from spacy import Errors from spacy.tokens import Doc, Span from spacy.attrs import POS, TAG, HEAD, DEP, LEMMA - from spacy.vocab import Vocab +from spacy.util import make_tempdir @contextlib.contextmanager @@ -19,13 +19,6 @@ def make_tempfile(mode="r"): f.close() -@contextlib.contextmanager -def make_tempdir(): - d = Path(tempfile.mkdtemp()) - yield d - shutil.rmtree(str(d)) - - def get_doc( vocab, words=[], pos=None, heads=None, deps=None, tags=None, ents=None, lemmas=None ): diff --git a/spacy/util.py b/spacy/util.py index 7f27e9467..feb863261 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -19,6 +19,8 @@ from packaging.specifiers import SpecifierSet, InvalidSpecifier from packaging.version import Version, InvalidVersion import subprocess from contextlib import contextmanager +import tempfile +import shutil try: @@ -453,6 +455,13 @@ def working_dir(path: Union[str, Path]) -> None: os.chdir(prev_cwd) +@contextmanager +def make_tempdir(): + d = Path(tempfile.mkdtemp()) + yield d + shutil.rmtree(str(d)) + + def is_in_jupyter(): """Check if user is running spaCy from a Jupyter notebook by detecting the IPython kernel. Mainly used for the displaCy visualizer.