diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py index 1db3a1d44..360d2439a 100644 --- a/spacy/cli/_util.py +++ b/spacy/cli/_util.py @@ -1,4 +1,4 @@ -from typing import Dict, Any, Union, List, Optional, TYPE_CHECKING +from typing import Dict, Any, Union, List, Optional, Tuple, TYPE_CHECKING import sys import shutil from pathlib import Path @@ -321,29 +321,37 @@ def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "m # *that* we can do by path. # We're using Git and sparse checkout to only clone the files we need with make_tempdir() as tmp_dir: + git_version = get_git_version() + supports_sparse = git_version >= (2, 22) # This is the "clone, but don't download anything" part. - cmd = ( - f"git clone {repo} {tmp_dir} --no-checkout --depth 1 " - f"--filter=blob:none " # <-- The key bit - f"-b {branch}" - ) + cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 " f"-b {branch} " + if supports_sparse: + cmd += f"--filter=blob:none" # <-- The key bit + else: + msg.warn( + f"You're running an old version of Git (v{git_version[0]}.{git_version[1]}) " + f"that doesn't fully support sparse checkout yet. This means that " + f"more files than necessary may be downloaded temporarily. To " + f"only download the files needed, upgrade to Git v2.22 or above." + ) _attempt_run_command(cmd) # Now we need to find the missing filenames for the subpath we want. # Looking for this 'rev-list' command in the git --help? Hah. - cmd = f"git -C {tmp_dir} rev-list --objects --all --missing=print -- {subpath}" + cmd = f"git -C {tmp_dir} rev-list --objects --all {'--missing=print ' if supports_sparse else ''} -- {subpath}" ret = _attempt_run_command(cmd) git_repo = _from_http_to_git(repo) # Now pass those missings into another bit of git internals missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")]) - if not missings: + if supports_sparse and not missings: err = ( f"Could not find any relevant files for '{subpath}'. " f"Did you specify a correct and complete path within repo '{repo}' " f"and branch {branch}?" ) msg.fail(err, exits=1) - cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}" - _attempt_run_command(cmd) + if supports_sparse: + cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}" + _attempt_run_command(cmd) # And finally, we can checkout our subpath cmd = f"git -C {tmp_dir} checkout {branch} {subpath}" _attempt_run_command(cmd) @@ -351,15 +359,24 @@ def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "m shutil.move(str(tmp_dir / Path(subpath)), str(dest)) -def _attempt_run_command(cmd): +def get_git_version() -> Tuple[int, int]: + ret = _attempt_run_command(["git", "--version"]) + # TODO: this seems kinda brittle? + version = ret.stdout[11:].strip().split(".") + return (int(version[0]), int(version[1])) + + +def _attempt_run_command(cmd: Union[str, List[str]]): try: return run_command(cmd, capture=True) except subprocess.CalledProcessError as e: - err = f"Could not run command: {cmd}." - msg.fail(err, exits=1) + err = f"Could not run command" + msg.fail(err) + print(cmd) + sys.exit(1) -def _from_http_to_git(repo): +def _from_http_to_git(repo: str) -> str: if repo.startswith("http://"): repo = repo.replace(r"http://", r"https://") if repo.startswith(r"https://"): diff --git a/spacy/cli/package.py b/spacy/cli/package.py index c457b3e17..8d6cd84c1 100644 --- a/spacy/cli/package.py +++ b/spacy/cli/package.py @@ -18,6 +18,7 @@ def package_cli( output_dir: Path = Arg(..., help="Output parent directory", exists=True, file_okay=False), meta_path: Optional[Path] = Opt(None, "--meta-path", "--meta", "-m", help="Path to meta.json", exists=True, dir_okay=False), create_meta: bool = Opt(False, "--create-meta", "-c", "-C", help="Create meta.json, even if one exists"), + name: Optional[str] = Opt(None, "--name", "-n", help="Package name to override meta"), version: Optional[str] = Opt(None, "--version", "-v", help="Package version to override meta"), no_sdist: bool = Opt(False, "--no-sdist", "-NS", help="Don't build .tar.gz sdist, can be set if you want to run this step manually"), force: bool = Opt(False, "--force", "-f", "-F", help="Force overwriting existing data in output directory"), @@ -38,6 +39,7 @@ def package_cli( input_dir, output_dir, meta_path=meta_path, + name=name, version=version, create_meta=create_meta, create_sdist=not no_sdist, @@ -50,6 +52,7 @@ def package( input_dir: Path, output_dir: Path, meta_path: Optional[Path] = None, + name: Optional[str] = None, version: Optional[str] = None, create_meta: bool = False, create_sdist: bool = True, @@ -71,6 +74,8 @@ def package( msg.fail("Can't load pipeline meta.json", meta_path, exits=1) meta = srsly.read_json(meta_path) meta = get_meta(input_dir, meta) + if name is not None: + meta["name"] = name if version is not None: meta["version"] = version if not create_meta: # only print if user doesn't want to overwrite diff --git a/spacy/cli/project/clone.py b/spacy/cli/project/clone.py index 427df490f..ab617e4ba 100644 --- a/spacy/cli/project/clone.py +++ b/spacy/cli/project/clone.py @@ -27,7 +27,7 @@ def project_clone_cli( DOCS: https://nightly.spacy.io/api/cli#project-clone """ if dest is None: - dest = Path.cwd() / name + dest = Path.cwd() / Path(name).parts[-1] project_clone(name, dest, repo=repo, branch=branch) diff --git a/spacy/language.py b/spacy/language.py index 777b0c24b..70dad59f3 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -243,7 +243,8 @@ class Language: self._config["nlp"]["pipeline"] = list(self.component_names) self._config["nlp"]["disabled"] = list(self.disabled) self._config["components"] = pipeline - self._config["training"]["score_weights"] = combine_score_weights(score_weights) + if not self._config["training"].get("score_weights"): + self._config["training"]["score_weights"] = combine_score_weights(score_weights) if not srsly.is_json_serializable(self._config): raise ValueError(Errors.E961.format(config=self._config)) return self._config diff --git a/spacy/schemas.py b/spacy/schemas.py index baa893802..38f47c668 100644 --- a/spacy/schemas.py +++ b/spacy/schemas.py @@ -180,7 +180,7 @@ class ModelMetaSchema(BaseModel): url: StrictStr = Field("", title="Model author URL") sources: Optional[Union[List[StrictStr], List[Dict[str, str]]]] = Field(None, title="Training data sources") vectors: Dict[str, Any] = Field({}, title="Included word vectors") - labels: Dict[str, Dict[str, List[str]]] = Field({}, title="Component labels, keyed by component name") + labels: Dict[str, List[str]] = Field({}, title="Component labels, keyed by component name") accuracy: Dict[str, Union[float, Dict[str, float]]] = Field({}, title="Accuracy numbers") speed: Dict[str, Union[float, int]] = Field({}, title="Speed evaluation numbers") spacy_git_version: StrictStr = Field("", title="Commit of spaCy version used") diff --git a/spacy/util.py b/spacy/util.py index bd567ddc7..d8df04554 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -648,7 +648,7 @@ def join_command(command: List[str]) -> str: return " ".join(shlex.quote(cmd) for cmd in command) -def run_command(command: Union[str, List[str]], *, capture=False, stdin=None) -> None: +def run_command(command: Union[str, List[str]], *, capture=False, stdin=None): """Run a command on the command line as a subprocess. If the subprocess returns a non-zero exit code, a system exit is performed. diff --git a/website/docs/api/cli.md b/website/docs/api/cli.md index ea61b9ae3..47af9be96 100644 --- a/website/docs/api/cli.md +++ b/website/docs/api/cli.md @@ -852,7 +852,7 @@ this, you can set the `--no-sdist` flag. ```cli -$ python -m spacy package [input_dir] [output_dir] [--meta-path] [--create-meta] [--no-sdist] [--version] [--force] +$ python -m spacy package [input_dir] [output_dir] [--meta-path] [--create-meta] [--no-sdist] [--name] [--version] [--force] ``` > #### Example @@ -870,6 +870,7 @@ $ python -m spacy package [input_dir] [output_dir] [--meta-path] [--create-meta] | `--meta-path`, `-m` 2 | Path to [`meta.json`](/api/data-formats#meta) file (optional). ~~Optional[Path] \(option)~~ | | `--create-meta`, `-C` 2 | Create a `meta.json` file on the command line, even if one already exists in the directory. If an existing file is found, its entries will be shown as the defaults in the command line prompt. ~~bool (flag)~~ | | `--no-sdist`, `-NS`, | Don't build the `.tar.gz` sdist automatically. Can be set if you want to run this step manually. ~~bool (flag)~~ | +| `--name`, `-n` 3 | Package name to override in meta. ~~Optional[str] \(option)~~ | | `--version`, `-v` 3 | Package version to override in meta. Useful when training new versions, as it doesn't require editing the meta template. ~~Optional[str] \(option)~~ | | `--force`, `-f` | Force overwriting of existing folder in output directory. ~~bool (flag)~~ | | `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |