Fix sparse checkout for 'spacy project' (#6008)

* exit if cloning fails

* UX

* rewrite http link to git protocol, don't use stdin

* fixes to sparse checkout

* formatting
This commit is contained in:
Sofie Van Landeghem 2020-09-01 19:49:01 +02:00 committed by GitHub
parent 4cce32f090
commit 6bfb1b3a29
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 21 additions and 13 deletions

View File

@ -297,9 +297,7 @@ def ensure_pathy(path):
return Pathy(path)
def git_sparse_checkout(
repo: str, subpath: str, dest: Path, *, branch: Optional[str] = None
):
def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "master"):
if dest.exists():
msg.fail("Destination of checkout must not exist", exits=1)
if not dest.parent.exists():
@ -323,21 +321,30 @@ def git_sparse_checkout(
# This is the "clone, but don't download anything" part.
cmd = (
f"git clone {repo} {tmp_dir} --no-checkout --depth 1 "
"--filter=blob:none" # <-- The key bit
f"--filter=blob:none " # <-- The key bit
f"-b {branch}"
)
if branch is not None:
cmd = f"{cmd} -b {branch}"
run_command(cmd, capture=True)
# Now we need to find the missing filenames for the subpath we want.
# Looking for this 'rev-list' command in the git --help? Hah.
cmd = f"git -C {tmp_dir} rev-list --objects --all --missing=print -- {subpath}"
ret = run_command(cmd, capture=True)
missings = "\n".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
repo = _from_http_to_git(repo)
# Now pass those missings into another bit of git internals
run_command(
f"git -C {tmp_dir} fetch-pack --stdin {repo}", capture=True, stdin=missings
)
missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")])
cmd = f"git -C {tmp_dir} fetch-pack {repo} {missings}"
run_command(cmd, capture=True)
# And finally, we can checkout our subpath
run_command(f"git -C {tmp_dir} checkout {branch} {subpath}")
cmd = f"git -C {tmp_dir} checkout {branch} {subpath}"
run_command(cmd)
# We need Path(name) to make sure we also support subdirectories
shutil.move(str(tmp_dir / Path(subpath)), str(dest))
def _from_http_to_git(repo):
if repo.startswith("http://"):
repo = repo.replace(r"http://", r"https://")
if repo.startswith(r"https://"):
repo = repo.replace("https://", "git@").replace("/", ":", 1)
repo = f"{repo}.git"
return repo

View File

@ -43,7 +43,7 @@ def project_clone(name: str, dest: Path, *, repo: str = about.__projects__) -> N
git_sparse_checkout(repo, name, dest)
except subprocess.CalledProcessError:
err = f"Could not clone '{name}' from repo '{repo_name}'"
msg.fail(err)
msg.fail(err, exits=1)
msg.good(f"Cloned '{name}' from {repo_name}", project_dir)
if not (project_dir / PROJECT_FILE).exists():
msg.warn(f"No {PROJECT_FILE} found in directory")
@ -78,6 +78,7 @@ def check_clone(name: str, dest: Path, repo: str) -> None:
if not dest.parent.exists():
# We're not creating parents, parent dir should exist
msg.fail(
f"Can't clone project, parent directory doesn't exist: {dest.parent}",
f"Can't clone project, parent directory doesn't exist: {dest.parent}. "
f"Create the necessary folder(s) first before continuing.",
exits=1,
)