Update project CLI

This commit is contained in:
Ines Montani 2020-06-22 14:53:31 +02:00
parent 3f2f5f9cb3
commit a6b76440b7
4 changed files with 110 additions and 44 deletions

View File

@ -7,15 +7,26 @@ import subprocess
import shlex import shlex
import os import os
import re import re
import shutil
from ._app import app, Arg, Opt from ._app import app, Arg, Opt, COMMAND
from .. import about from .. import about
from ..schemas import ProjectConfigSchema, validate from ..schemas import ProjectConfigSchema, validate
from ..util import ensure_path, run_command from ..util import ensure_path, run_command, make_tempdir, working_dir
CONFIG_FILE = "project.yml" CONFIG_FILE = "project.yml"
DIRS = ["assets", "configs", "packages", "metrics", "scripts", "notebooks", "training"] DIRS = [
"assets",
"metas",
"configs",
"packages",
"metrics",
"scripts",
"notebooks",
"training",
"corpus",
]
CACHES = [ CACHES = [
Path.home() / ".torch", Path.home() / ".torch",
Path.home() / ".caches" / "torch", Path.home() / ".caches" / "torch",
@ -45,28 +56,37 @@ def callback():
def project_clone_cli( def project_clone_cli(
# fmt: off # fmt: off
name: str = Arg(..., help="The name of the template to fetch"), name: str = Arg(..., help="The name of the template to fetch"),
dest: Path = Arg(Path.cwd(), help="Where to download and work. Defaults to current working directory.", exists=True, file_okay=False), dest: Path = Arg(Path.cwd(), help="Where to download and work. Defaults to current working directory.", exists=False),
repo: str = Opt(about.__projects__, "--repo", "-r", help="The repository to look in."), repo: str = Opt(about.__projects__, "--repo", "-r", help="The repository to look in."),
verbose: bool = Opt(False, "--verbose", "-V", help="Show detailed information")
# fmt: on # fmt: on
): ):
"""Clone a project template from a repository.""" """Clone a project template from a repository."""
project_clone(name, dest, repo=repo) project_clone(name, dest, repo=repo, verbose=verbose)
def project_clone(name: str, dest: Path, repo: str = about.__projects__) -> None: def project_clone(
name: str, dest: Path, *, repo: str = about.__projects__, verbose: bool = False
) -> None:
dest = ensure_path(dest) dest = ensure_path(dest)
if not dest or not dest.exists() or not dest.is_dir(): check_clone_dest(dest)
msg.fail("Not a valid directory to clone project", dest, exits=1) # When cloning a subdirectory with DVC, it will create a folder of that name
cmd = ["dvc", "get", repo, name, "-o", str(dest)] # within the destination dir, so we use a tempdir and then copy it into the
msg.info(" ".join(cmd)) # parent directory to create the cloned directory
with make_tempdir() as tmp_dir:
cmd = ["dvc", "get", repo, name, "-o", str(tmp_dir)]
if verbose:
cmd.append("-v")
print(" ".join(cmd))
run_command(cmd) run_command(cmd)
shutil.move(str(tmp_dir / Path(name).name), str(dest))
msg.good(f"Cloned project '{name}' from {repo}") msg.good(f"Cloned project '{name}' from {repo}")
with msg.loading("Setting up directories..."):
for sub_dir in DIRS: for sub_dir in DIRS:
dir_path = dest / sub_dir dir_path = dest / sub_dir
if not dir_path.exists(): if not dir_path.exists():
dir_path.mkdir(parents=True) dir_path.mkdir(parents=True)
msg.good(f"Your project is now ready!", dest.resolve()) msg.good(f"Your project is now ready!", dest.resolve())
print(f"To get the assets, run:\npython -m spacy project get-assets {dest}")
@project_cli.command("get-assets") @project_cli.command("get-assets")
@ -91,7 +111,6 @@ def project_get_assets(project_path: Path) -> None:
dest_path = project_path / dest dest_path = project_path / dest
check_asset(url) check_asset(url)
cmd = ["dvc", "get-url", url, str(dest_path)] cmd = ["dvc", "get-url", url, str(dest_path)]
msg.info(" ".join(cmd))
run_command(cmd) run_command(cmd)
msg.good(f"Got asset {dest}") msg.good(f"Got asset {dest}")
@ -100,18 +119,43 @@ def project_get_assets(project_path: Path) -> None:
def project_run_cli( def project_run_cli(
# fmt: off # fmt: off
project_dir: Path = Arg(..., help="Location of project directory", exists=True, file_okay=False), project_dir: Path = Arg(..., help="Location of project directory", exists=True, file_okay=False),
subcommand: str = Arg(None, help="Name of command defined in project config") subcommand: str = Arg(None, help="Name of command defined in project config"),
show_help: bool = Opt(False, "--help", help="Show help message and available subcommands")
# fmt: on # fmt: on
): ):
"""Run scripts defined in the project.""" """Run scripts defined in the project."""
if show_help:
print_run_help(project_dir, subcommand)
else:
project_run(project_dir, subcommand) project_run(project_dir, subcommand)
def print_run_help(project_dir: Path, subcommand: str) -> None:
"""Simulate a CLI help prompt using the info available in the project config."""
config = load_project_config(project_dir)
config_commands = config.get("commands", [])
commands = {cmd["name"]: cmd for cmd in config_commands}
if subcommand:
if subcommand not in commands:
msg.fail(f"Can't find command '{subcommand}' in project config", exits=1)
print(f"Usage: {COMMAND} project run {project_dir} {subcommand}")
help_text = commands[subcommand].get("help")
if help_text:
msg.text(f"\n{help_text}\n")
else:
print(f"\nAvailable commands in {CONFIG_FILE}")
print(f"Usage: {COMMAND} project run {project_dir} [COMMAND]")
msg.table([(cmd["name"], cmd.get("help", "")) for cmd in config_commands])
def project_run(project_dir: Path, subcommand: str) -> None: def project_run(project_dir: Path, subcommand: str) -> None:
config = load_project_config(project_dir) config = load_project_config(project_dir)
config_commands = config.get("commands", []) config_commands = config.get("commands", [])
variables = config.get("variables", {}) variables = config.get("variables", {})
commands = {cmd["name"]: cmd for cmd in config_commands} commands = {cmd["name"]: cmd for cmd in config_commands}
if subcommand and subcommand not in commands:
msg.fail(f"Can't find command '{subcommand}' in project config", exits=1)
with working_dir(project_dir):
if subcommand is None: if subcommand is None:
all_commands = config.get("run", []) all_commands = config.get("run", [])
if not all_commands: if not all_commands:
@ -119,12 +163,12 @@ def project_run(project_dir: Path, subcommand: str) -> None:
msg.table([(cmd["name"], cmd.get("help", "")) for cmd in config_commands]) msg.table([(cmd["name"], cmd.get("help", "")) for cmd in config_commands])
for command in all_commands: for command in all_commands:
if command not in commands: if command not in commands:
msg.fail(f"Can't find command '{command}' in project config", exits=1) msg.fail(
f"Can't find command '{command}' in project config", exits=1
)
msg.divider(command) msg.divider(command)
run_commands(commands[command]["script"], variables) run_commands(commands[command]["script"], variables)
return else:
if subcommand not in commands:
msg.fail(f"Can't find command '{subcommand}' in project config", exits=1)
run_commands(commands[subcommand]["script"], variables) run_commands(commands[subcommand]["script"], variables)
@ -146,7 +190,7 @@ def run_commands(commands: List[str] = tuple(), variables: Dict[str, str] = {})
for command in commands: for command in commands:
# Substitute variables, e.g. "./{NAME}.json" # Substitute variables, e.g. "./{NAME}.json"
command = command.format(**variables) command = command.format(**variables)
msg.info(command) print(command)
run_command(shlex.split(command)) run_command(shlex.split(command))
@ -160,3 +204,19 @@ def check_asset(url: str) -> None:
"download the raw file, click on 'Download' on the GitHub page " "download the raw file, click on 'Download' on the GitHub page "
"and copy the raw.githubusercontent.com URL instead." "and copy the raw.githubusercontent.com URL instead."
) )
# url.replace("github.com", "raw.githubusercontent.com").replace("/blob/", "/").replace("/tree/", "/")
def check_clone_dest(dest: Path) -> None:
"""Check and validate that the destination path can be used to clone."""
if not dest:
msg.fail(f"Not a valid directory to clone project: {dest}", exits=1)
if dest.exists():
# Directory already exists (not allowed, clone needs to create it)
msg.fail(f"Can't clone project, directory already exists: {dest}", exits=1)
if not dest.parent.exists():
# We're not creating parents, parent dir should exist
msg.fail(
f"Can't clone project, parent directory doesn't exist: {dest.parent}",
exits=1,
)

View File

@ -229,11 +229,15 @@ class ProjectConfigCommand(BaseModel):
name: StrictStr = Field(..., title="Name of command") name: StrictStr = Field(..., title="Name of command")
help: Optional[StrictStr] = Field(None, title="Command description") help: Optional[StrictStr] = Field(None, title="Command description")
script: List[StrictStr] = Field([], title="List of CLI commands to run, in order") script: List[StrictStr] = Field([], title="List of CLI commands to run, in order")
dvc_deps: List[StrictStr] = Field([], title="Data Version Control dependencies") deps: List[StrictStr] = Field([], title="Data Version Control dependencies")
dvc_outputs: List[StrictStr] = Field([], title="Data Version Control outputs") outputs: List[StrictStr] = Field([], title="Data Version Control outputs")
dvc_outputs_no_cache: List[StrictStr] = Field([], title="Data Version Control outputs (no cache)") outputs_no_cache: List[StrictStr] = Field([], title="Data Version Control outputs (no cache)")
# fmt: on # fmt: on
class Config:
title = "A single named command specified in a project config"
extra = "forbid"
class ProjectConfigSchema(BaseModel): class ProjectConfigSchema(BaseModel):
# fmt: off # fmt: off

View File

@ -8,8 +8,8 @@ from pathlib import Path
from spacy import Errors from spacy import Errors
from spacy.tokens import Doc, Span from spacy.tokens import Doc, Span
from spacy.attrs import POS, TAG, HEAD, DEP, LEMMA from spacy.attrs import POS, TAG, HEAD, DEP, LEMMA
from spacy.vocab import Vocab from spacy.vocab import Vocab
from spacy.util import make_tempdir
@contextlib.contextmanager @contextlib.contextmanager
@ -19,13 +19,6 @@ def make_tempfile(mode="r"):
f.close() f.close()
@contextlib.contextmanager
def make_tempdir():
d = Path(tempfile.mkdtemp())
yield d
shutil.rmtree(str(d))
def get_doc( def get_doc(
vocab, words=[], pos=None, heads=None, deps=None, tags=None, ents=None, lemmas=None vocab, words=[], pos=None, heads=None, deps=None, tags=None, ents=None, lemmas=None
): ):

View File

@ -19,6 +19,8 @@ from packaging.specifiers import SpecifierSet, InvalidSpecifier
from packaging.version import Version, InvalidVersion from packaging.version import Version, InvalidVersion
import subprocess import subprocess
from contextlib import contextmanager from contextlib import contextmanager
import tempfile
import shutil
try: try:
@ -453,6 +455,13 @@ def working_dir(path: Union[str, Path]) -> None:
os.chdir(prev_cwd) os.chdir(prev_cwd)
@contextmanager
def make_tempdir():
d = Path(tempfile.mkdtemp())
yield d
shutil.rmtree(str(d))
def is_in_jupyter(): def is_in_jupyter():
"""Check if user is running spaCy from a Jupyter notebook by detecting the """Check if user is running spaCy from a Jupyter notebook by detecting the
IPython kernel. Mainly used for the displaCy visualizer. IPython kernel. Mainly used for the displaCy visualizer.