mirror of https://github.com/explosion/spaCy.git
Fix sparse checkout for 'spacy project' (#6008)
* exit if cloning fails * UX * rewrite http link to git protocol, don't use stdin * fixes to sparse checkout * formatting
This commit is contained in:
parent
4cce32f090
commit
6bfb1b3a29
|
@ -297,9 +297,7 @@ def ensure_pathy(path):
|
||||||
return Pathy(path)
|
return Pathy(path)
|
||||||
|
|
||||||
|
|
||||||
def git_sparse_checkout(
|
def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "master"):
|
||||||
repo: str, subpath: str, dest: Path, *, branch: Optional[str] = None
|
|
||||||
):
|
|
||||||
if dest.exists():
|
if dest.exists():
|
||||||
msg.fail("Destination of checkout must not exist", exits=1)
|
msg.fail("Destination of checkout must not exist", exits=1)
|
||||||
if not dest.parent.exists():
|
if not dest.parent.exists():
|
||||||
|
@ -323,21 +321,30 @@ def git_sparse_checkout(
|
||||||
# This is the "clone, but don't download anything" part.
|
# This is the "clone, but don't download anything" part.
|
||||||
cmd = (
|
cmd = (
|
||||||
f"git clone {repo} {tmp_dir} --no-checkout --depth 1 "
|
f"git clone {repo} {tmp_dir} --no-checkout --depth 1 "
|
||||||
"--filter=blob:none" # <-- The key bit
|
f"--filter=blob:none " # <-- The key bit
|
||||||
|
f"-b {branch}"
|
||||||
)
|
)
|
||||||
if branch is not None:
|
|
||||||
cmd = f"{cmd} -b {branch}"
|
|
||||||
run_command(cmd, capture=True)
|
run_command(cmd, capture=True)
|
||||||
# Now we need to find the missing filenames for the subpath we want.
|
# Now we need to find the missing filenames for the subpath we want.
|
||||||
# Looking for this 'rev-list' command in the git --help? Hah.
|
# Looking for this 'rev-list' command in the git --help? Hah.
|
||||||
cmd = f"git -C {tmp_dir} rev-list --objects --all --missing=print -- {subpath}"
|
cmd = f"git -C {tmp_dir} rev-list --objects --all --missing=print -- {subpath}"
|
||||||
ret = run_command(cmd, capture=True)
|
ret = run_command(cmd, capture=True)
|
||||||
missings = "\n".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
|
repo = _from_http_to_git(repo)
|
||||||
# Now pass those missings into another bit of git internals
|
# Now pass those missings into another bit of git internals
|
||||||
run_command(
|
missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
|
||||||
f"git -C {tmp_dir} fetch-pack --stdin {repo}", capture=True, stdin=missings
|
cmd = f"git -C {tmp_dir} fetch-pack {repo} {missings}"
|
||||||
)
|
run_command(cmd, capture=True)
|
||||||
# And finally, we can checkout our subpath
|
# And finally, we can checkout our subpath
|
||||||
run_command(f"git -C {tmp_dir} checkout {branch} {subpath}")
|
cmd = f"git -C {tmp_dir} checkout {branch} {subpath}"
|
||||||
|
run_command(cmd)
|
||||||
# We need Path(name) to make sure we also support subdirectories
|
# We need Path(name) to make sure we also support subdirectories
|
||||||
shutil.move(str(tmp_dir / Path(subpath)), str(dest))
|
shutil.move(str(tmp_dir / Path(subpath)), str(dest))
|
||||||
|
|
||||||
|
|
||||||
|
def _from_http_to_git(repo):
|
||||||
|
if repo.startswith("http://"):
|
||||||
|
repo = repo.replace(r"http://", r"https://")
|
||||||
|
if repo.startswith(r"https://"):
|
||||||
|
repo = repo.replace("https://", "git@").replace("/", ":", 1)
|
||||||
|
repo = f"{repo}.git"
|
||||||
|
return repo
|
||||||
|
|
|
@ -43,7 +43,7 @@ def project_clone(name: str, dest: Path, *, repo: str = about.__projects__) -> N
|
||||||
git_sparse_checkout(repo, name, dest)
|
git_sparse_checkout(repo, name, dest)
|
||||||
except subprocess.CalledProcessError:
|
except subprocess.CalledProcessError:
|
||||||
err = f"Could not clone '{name}' from repo '{repo_name}'"
|
err = f"Could not clone '{name}' from repo '{repo_name}'"
|
||||||
msg.fail(err)
|
msg.fail(err, exits=1)
|
||||||
msg.good(f"Cloned '{name}' from {repo_name}", project_dir)
|
msg.good(f"Cloned '{name}' from {repo_name}", project_dir)
|
||||||
if not (project_dir / PROJECT_FILE).exists():
|
if not (project_dir / PROJECT_FILE).exists():
|
||||||
msg.warn(f"No {PROJECT_FILE} found in directory")
|
msg.warn(f"No {PROJECT_FILE} found in directory")
|
||||||
|
@ -78,6 +78,7 @@ def check_clone(name: str, dest: Path, repo: str) -> None:
|
||||||
if not dest.parent.exists():
|
if not dest.parent.exists():
|
||||||
# We're not creating parents, parent dir should exist
|
# We're not creating parents, parent dir should exist
|
||||||
msg.fail(
|
msg.fail(
|
||||||
f"Can't clone project, parent directory doesn't exist: {dest.parent}",
|
f"Can't clone project, parent directory doesn't exist: {dest.parent}. "
|
||||||
|
f"Create the necessary folder(s) first before continuing.",
|
||||||
exits=1,
|
exits=1,
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in New Issue