mirror of https://github.com/explosion/spaCy.git
Merge remote-tracking branch 'upstream/develop' into feature/cli-config
This commit is contained in:
commit
a75cfe0da6
|
@ -1,4 +1,4 @@
|
|||
from typing import Dict, Any, Union, List, Optional, TYPE_CHECKING
|
||||
from typing import Dict, Any, Union, List, Optional, Tuple, TYPE_CHECKING
|
||||
import sys
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
@ -321,29 +321,37 @@ def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "m
|
|||
# *that* we can do by path.
|
||||
# We're using Git and sparse checkout to only clone the files we need
|
||||
with make_tempdir() as tmp_dir:
|
||||
git_version = get_git_version()
|
||||
supports_sparse = git_version >= (2, 22)
|
||||
# This is the "clone, but don't download anything" part.
|
||||
cmd = (
|
||||
f"git clone {repo} {tmp_dir} --no-checkout --depth 1 "
|
||||
f"--filter=blob:none " # <-- The key bit
|
||||
f"-b {branch}"
|
||||
)
|
||||
cmd = f"git clone {repo} {tmp_dir} --no-checkout --depth 1 " f"-b {branch} "
|
||||
if supports_sparse:
|
||||
cmd += f"--filter=blob:none" # <-- The key bit
|
||||
else:
|
||||
msg.warn(
|
||||
f"You're running an old version of Git (v{git_version[0]}.{git_version[1]}) "
|
||||
f"that doesn't fully support sparse checkout yet. This means that "
|
||||
f"more files than necessary may be downloaded temporarily. To "
|
||||
f"only download the files needed, upgrade to Git v2.22 or above."
|
||||
)
|
||||
_attempt_run_command(cmd)
|
||||
# Now we need to find the missing filenames for the subpath we want.
|
||||
# Looking for this 'rev-list' command in the git --help? Hah.
|
||||
cmd = f"git -C {tmp_dir} rev-list --objects --all --missing=print -- {subpath}"
|
||||
cmd = f"git -C {tmp_dir} rev-list --objects --all {'--missing=print ' if supports_sparse else ''} -- {subpath}"
|
||||
ret = _attempt_run_command(cmd)
|
||||
git_repo = _from_http_to_git(repo)
|
||||
# Now pass those missings into another bit of git internals
|
||||
missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
|
||||
if not missings:
|
||||
if supports_sparse and not missings:
|
||||
err = (
|
||||
f"Could not find any relevant files for '{subpath}'. "
|
||||
f"Did you specify a correct and complete path within repo '{repo}' "
|
||||
f"and branch {branch}?"
|
||||
)
|
||||
msg.fail(err, exits=1)
|
||||
cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}"
|
||||
_attempt_run_command(cmd)
|
||||
if supports_sparse:
|
||||
cmd = f"git -C {tmp_dir} fetch-pack {git_repo} {missings}"
|
||||
_attempt_run_command(cmd)
|
||||
# And finally, we can checkout our subpath
|
||||
cmd = f"git -C {tmp_dir} checkout {branch} {subpath}"
|
||||
_attempt_run_command(cmd)
|
||||
|
@ -351,15 +359,24 @@ def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "m
|
|||
shutil.move(str(tmp_dir / Path(subpath)), str(dest))
|
||||
|
||||
|
||||
def _attempt_run_command(cmd):
|
||||
def get_git_version() -> Tuple[int, int]:
|
||||
ret = _attempt_run_command(["git", "--version"])
|
||||
# TODO: this seems kinda brittle?
|
||||
version = ret.stdout[11:].strip().split(".")
|
||||
return (int(version[0]), int(version[1]))
|
||||
|
||||
|
||||
def _attempt_run_command(cmd: Union[str, List[str]]):
|
||||
try:
|
||||
return run_command(cmd, capture=True)
|
||||
except subprocess.CalledProcessError as e:
|
||||
err = f"Could not run command: {cmd}."
|
||||
msg.fail(err, exits=1)
|
||||
err = f"Could not run command"
|
||||
msg.fail(err)
|
||||
print(cmd)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def _from_http_to_git(repo):
|
||||
def _from_http_to_git(repo: str) -> str:
|
||||
if repo.startswith("http://"):
|
||||
repo = repo.replace(r"http://", r"https://")
|
||||
if repo.startswith(r"https://"):
|
||||
|
|
|
@ -18,6 +18,7 @@ def package_cli(
|
|||
output_dir: Path = Arg(..., help="Output parent directory", exists=True, file_okay=False),
|
||||
meta_path: Optional[Path] = Opt(None, "--meta-path", "--meta", "-m", help="Path to meta.json", exists=True, dir_okay=False),
|
||||
create_meta: bool = Opt(False, "--create-meta", "-c", "-C", help="Create meta.json, even if one exists"),
|
||||
name: Optional[str] = Opt(None, "--name", "-n", help="Package name to override meta"),
|
||||
version: Optional[str] = Opt(None, "--version", "-v", help="Package version to override meta"),
|
||||
no_sdist: bool = Opt(False, "--no-sdist", "-NS", help="Don't build .tar.gz sdist, can be set if you want to run this step manually"),
|
||||
force: bool = Opt(False, "--force", "-f", "-F", help="Force overwriting existing data in output directory"),
|
||||
|
@ -38,6 +39,7 @@ def package_cli(
|
|||
input_dir,
|
||||
output_dir,
|
||||
meta_path=meta_path,
|
||||
name=name,
|
||||
version=version,
|
||||
create_meta=create_meta,
|
||||
create_sdist=not no_sdist,
|
||||
|
@ -50,6 +52,7 @@ def package(
|
|||
input_dir: Path,
|
||||
output_dir: Path,
|
||||
meta_path: Optional[Path] = None,
|
||||
name: Optional[str] = None,
|
||||
version: Optional[str] = None,
|
||||
create_meta: bool = False,
|
||||
create_sdist: bool = True,
|
||||
|
@ -71,6 +74,8 @@ def package(
|
|||
msg.fail("Can't load pipeline meta.json", meta_path, exits=1)
|
||||
meta = srsly.read_json(meta_path)
|
||||
meta = get_meta(input_dir, meta)
|
||||
if name is not None:
|
||||
meta["name"] = name
|
||||
if version is not None:
|
||||
meta["version"] = version
|
||||
if not create_meta: # only print if user doesn't want to overwrite
|
||||
|
|
|
@ -27,7 +27,7 @@ def project_clone_cli(
|
|||
DOCS: https://nightly.spacy.io/api/cli#project-clone
|
||||
"""
|
||||
if dest is None:
|
||||
dest = Path.cwd() / name
|
||||
dest = Path.cwd() / Path(name).parts[-1]
|
||||
project_clone(name, dest, repo=repo, branch=branch)
|
||||
|
||||
|
||||
|
|
|
@ -243,7 +243,8 @@ class Language:
|
|||
self._config["nlp"]["pipeline"] = list(self.component_names)
|
||||
self._config["nlp"]["disabled"] = list(self.disabled)
|
||||
self._config["components"] = pipeline
|
||||
self._config["training"]["score_weights"] = combine_score_weights(score_weights)
|
||||
if not self._config["training"].get("score_weights"):
|
||||
self._config["training"]["score_weights"] = combine_score_weights(score_weights)
|
||||
if not srsly.is_json_serializable(self._config):
|
||||
raise ValueError(Errors.E961.format(config=self._config))
|
||||
return self._config
|
||||
|
|
|
@ -180,7 +180,7 @@ class ModelMetaSchema(BaseModel):
|
|||
url: StrictStr = Field("", title="Model author URL")
|
||||
sources: Optional[Union[List[StrictStr], List[Dict[str, str]]]] = Field(None, title="Training data sources")
|
||||
vectors: Dict[str, Any] = Field({}, title="Included word vectors")
|
||||
labels: Dict[str, Dict[str, List[str]]] = Field({}, title="Component labels, keyed by component name")
|
||||
labels: Dict[str, List[str]] = Field({}, title="Component labels, keyed by component name")
|
||||
accuracy: Dict[str, Union[float, Dict[str, float]]] = Field({}, title="Accuracy numbers")
|
||||
speed: Dict[str, Union[float, int]] = Field({}, title="Speed evaluation numbers")
|
||||
spacy_git_version: StrictStr = Field("", title="Commit of spaCy version used")
|
||||
|
|
|
@ -648,7 +648,7 @@ def join_command(command: List[str]) -> str:
|
|||
return " ".join(shlex.quote(cmd) for cmd in command)
|
||||
|
||||
|
||||
def run_command(command: Union[str, List[str]], *, capture=False, stdin=None) -> None:
|
||||
def run_command(command: Union[str, List[str]], *, capture=False, stdin=None):
|
||||
"""Run a command on the command line as a subprocess. If the subprocess
|
||||
returns a non-zero exit code, a system exit is performed.
|
||||
|
||||
|
|
|
@ -852,7 +852,7 @@ this, you can set the `--no-sdist` flag.
|
|||
</Infobox>
|
||||
|
||||
```cli
|
||||
$ python -m spacy package [input_dir] [output_dir] [--meta-path] [--create-meta] [--no-sdist] [--version] [--force]
|
||||
$ python -m spacy package [input_dir] [output_dir] [--meta-path] [--create-meta] [--no-sdist] [--name] [--version] [--force]
|
||||
```
|
||||
|
||||
> #### Example
|
||||
|
@ -870,6 +870,7 @@ $ python -m spacy package [input_dir] [output_dir] [--meta-path] [--create-meta]
|
|||
| `--meta-path`, `-m` <Tag variant="new">2</Tag> | Path to [`meta.json`](/api/data-formats#meta) file (optional). ~~Optional[Path] \(option)~~ |
|
||||
| `--create-meta`, `-C` <Tag variant="new">2</Tag> | Create a `meta.json` file on the command line, even if one already exists in the directory. If an existing file is found, its entries will be shown as the defaults in the command line prompt. ~~bool (flag)~~ |
|
||||
| `--no-sdist`, `-NS`, | Don't build the `.tar.gz` sdist automatically. Can be set if you want to run this step manually. ~~bool (flag)~~ |
|
||||
| `--name`, `-n` <Tag variant="new">3</Tag> | Package name to override in meta. ~~Optional[str] \(option)~~ |
|
||||
| `--version`, `-v` <Tag variant="new">3</Tag> | Package version to override in meta. Useful when training new versions, as it doesn't require editing the meta template. ~~Optional[str] \(option)~~ |
|
||||
| `--force`, `-f` | Force overwriting of existing folder in output directory. ~~bool (flag)~~ |
|
||||
| `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ |
|
||||
|
|
Loading…
Reference in New Issue