mirror of https://github.com/explosion/spaCy.git
Fix sparse checkout for 'spacy project' (#6008)
* exit if cloning fails * UX * rewrite http link to git protocol, don't use stdin * fixes to sparse checkout * formatting
This commit is contained in:
parent
4cce32f090
commit
6bfb1b3a29
|
@ -297,9 +297,7 @@ def ensure_pathy(path):
|
|||
return Pathy(path)
|
||||
|
||||
|
||||
def git_sparse_checkout(
|
||||
repo: str, subpath: str, dest: Path, *, branch: Optional[str] = None
|
||||
):
|
||||
def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "master"):
|
||||
if dest.exists():
|
||||
msg.fail("Destination of checkout must not exist", exits=1)
|
||||
if not dest.parent.exists():
|
||||
|
@ -323,21 +321,30 @@ def git_sparse_checkout(
|
|||
# This is the "clone, but don't download anything" part.
|
||||
cmd = (
|
||||
f"git clone {repo} {tmp_dir} --no-checkout --depth 1 "
|
||||
"--filter=blob:none" # <-- The key bit
|
||||
f"--filter=blob:none " # <-- The key bit
|
||||
f"-b {branch}"
|
||||
)
|
||||
if branch is not None:
|
||||
cmd = f"{cmd} -b {branch}"
|
||||
run_command(cmd, capture=True)
|
||||
# Now we need to find the missing filenames for the subpath we want.
|
||||
# Looking for this 'rev-list' command in the git --help? Hah.
|
||||
cmd = f"git -C {tmp_dir} rev-list --objects --all --missing=print -- {subpath}"
|
||||
ret = run_command(cmd, capture=True)
|
||||
missings = "\n".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
|
||||
repo = _from_http_to_git(repo)
|
||||
# Now pass those missings into another bit of git internals
|
||||
run_command(
|
||||
f"git -C {tmp_dir} fetch-pack --stdin {repo}", capture=True, stdin=missings
|
||||
)
|
||||
missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
|
||||
cmd = f"git -C {tmp_dir} fetch-pack {repo} {missings}"
|
||||
run_command(cmd, capture=True)
|
||||
# And finally, we can checkout our subpath
|
||||
run_command(f"git -C {tmp_dir} checkout {branch} {subpath}")
|
||||
cmd = f"git -C {tmp_dir} checkout {branch} {subpath}"
|
||||
run_command(cmd)
|
||||
# We need Path(name) to make sure we also support subdirectories
|
||||
shutil.move(str(tmp_dir / Path(subpath)), str(dest))
|
||||
|
||||
|
||||
def _from_http_to_git(repo):
|
||||
if repo.startswith("http://"):
|
||||
repo = repo.replace(r"http://", r"https://")
|
||||
if repo.startswith(r"https://"):
|
||||
repo = repo.replace("https://", "git@").replace("/", ":", 1)
|
||||
repo = f"{repo}.git"
|
||||
return repo
|
||||
|
|
|
@ -43,7 +43,7 @@ def project_clone(name: str, dest: Path, *, repo: str = about.__projects__) -> N
|
|||
git_sparse_checkout(repo, name, dest)
|
||||
except subprocess.CalledProcessError:
|
||||
err = f"Could not clone '{name}' from repo '{repo_name}'"
|
||||
msg.fail(err)
|
||||
msg.fail(err, exits=1)
|
||||
msg.good(f"Cloned '{name}' from {repo_name}", project_dir)
|
||||
if not (project_dir / PROJECT_FILE).exists():
|
||||
msg.warn(f"No {PROJECT_FILE} found in directory")
|
||||
|
@ -78,6 +78,7 @@ def check_clone(name: str, dest: Path, repo: str) -> None:
|
|||
if not dest.parent.exists():
|
||||
# We're not creating parents, parent dir should exist
|
||||
msg.fail(
|
||||
f"Can't clone project, parent directory doesn't exist: {dest.parent}",
|
||||
f"Can't clone project, parent directory doesn't exist: {dest.parent}. "
|
||||
f"Create the necessary folder(s) first before continuing.",
|
||||
exits=1,
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue