From 6bfb1b3a29fa556daad7e81ab4980fb3a54c616e Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Tue, 1 Sep 2020 19:49:01 +0200 Subject: [PATCH] Fix sparse checkout for 'spacy project' (#6008) * exit if cloning fails * UX * rewrite http link to git protocol, don't use stdin * fixes to sparse checkout * formatting --- spacy/cli/_util.py | 29 ++++++++++++++++++----------- spacy/cli/project/clone.py | 5 +++-- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py index 16e257ce2..cfa126cc4 100644 --- a/spacy/cli/_util.py +++ b/spacy/cli/_util.py @@ -297,9 +297,7 @@ def ensure_pathy(path): return Pathy(path) -def git_sparse_checkout( - repo: str, subpath: str, dest: Path, *, branch: Optional[str] = None -): +def git_sparse_checkout(repo: str, subpath: str, dest: Path, *, branch: str = "master"): if dest.exists(): msg.fail("Destination of checkout must not exist", exits=1) if not dest.parent.exists(): @@ -323,21 +321,30 @@ def git_sparse_checkout( # This is the "clone, but don't download anything" part. cmd = ( f"git clone {repo} {tmp_dir} --no-checkout --depth 1 " - "--filter=blob:none" # <-- The key bit + f"--filter=blob:none " # <-- The key bit + f"-b {branch}" ) - if branch is not None: - cmd = f"{cmd} -b {branch}" run_command(cmd, capture=True) # Now we need to find the missing filenames for the subpath we want. # Looking for this 'rev-list' command in the git --help? Hah. cmd = f"git -C {tmp_dir} rev-list --objects --all --missing=print -- {subpath}" ret = run_command(cmd, capture=True) - missings = "\n".join([x[1:] for x in ret.stdout.split() if x.startswith("?")]) + repo = _from_http_to_git(repo) # Now pass those missings into another bit of git internals - run_command( - f"git -C {tmp_dir} fetch-pack --stdin {repo}", capture=True, stdin=missings - ) + missings = " ".join([x[1:] for x in ret.stdout.split() if x.startswith("?")]) + cmd = f"git -C {tmp_dir} fetch-pack {repo} {missings}" + run_command(cmd, capture=True) # And finally, we can checkout our subpath - run_command(f"git -C {tmp_dir} checkout {branch} {subpath}") + cmd = f"git -C {tmp_dir} checkout {branch} {subpath}" + run_command(cmd) # We need Path(name) to make sure we also support subdirectories shutil.move(str(tmp_dir / Path(subpath)), str(dest)) + + +def _from_http_to_git(repo): + if repo.startswith("http://"): + repo = repo.replace(r"http://", r"https://") + if repo.startswith(r"https://"): + repo = repo.replace("https://", "git@").replace("/", ":", 1) + repo = f"{repo}.git" + return repo diff --git a/spacy/cli/project/clone.py b/spacy/cli/project/clone.py index 7f9a46a46..751c389bc 100644 --- a/spacy/cli/project/clone.py +++ b/spacy/cli/project/clone.py @@ -43,7 +43,7 @@ def project_clone(name: str, dest: Path, *, repo: str = about.__projects__) -> N git_sparse_checkout(repo, name, dest) except subprocess.CalledProcessError: err = f"Could not clone '{name}' from repo '{repo_name}'" - msg.fail(err) + msg.fail(err, exits=1) msg.good(f"Cloned '{name}' from {repo_name}", project_dir) if not (project_dir / PROJECT_FILE).exists(): msg.warn(f"No {PROJECT_FILE} found in directory") @@ -78,6 +78,7 @@ def check_clone(name: str, dest: Path, repo: str) -> None: if not dest.parent.exists(): # We're not creating parents, parent dir should exist msg.fail( - f"Can't clone project, parent directory doesn't exist: {dest.parent}", + f"Can't clone project, parent directory doesn't exist: {dest.parent}. " + f"Create the necessary folder(s) first before continuing.", exits=1, )