From 3889747119a6302da96d91502479081ad2036aa4 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Thu, 10 Sep 2020 14:36:53 +0200 Subject: [PATCH 1/6] asset fix & UX --- spacy/cli/project/assets.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/spacy/cli/project/assets.py b/spacy/cli/project/assets.py index 2b623675d..0d54a8d05 100644 --- a/spacy/cli/project/assets.py +++ b/spacy/cli/project/assets.py @@ -38,38 +38,45 @@ def project_assets(project_dir: Path) -> None: msg.warn(f"No assets specified in {PROJECT_FILE}", exits=0) msg.info(f"Fetching {len(assets)} asset(s)") for asset in assets: - dest = Path(asset["dest"]) + dest = project_dir / asset["dest"] checksum = asset.get("checksum") - if "git" in asset: - if dest.exists(): - # If there's already a file, check for checksum - if checksum and checksum == get_checksum(dest): - msg.good(f"Skipping download with matching checksum: {dest}") - continue - else: + if dest.exists(): + # If there's already a file, check for checksum + if checksum and checksum == get_checksum(dest): + msg.good(f"Skipping download with matching checksum: {dest}") + continue + else: + msg.good(f"Removing asset with outdated checksum: {dest} ") + if dest.is_dir(): shutil.rmtree(dest) + else: + dest.unlink() + if "git" in asset: git_sparse_checkout( asset["git"]["repo"], asset["git"]["path"], dest, branch=asset["git"].get("branch"), ) - else: + elif "url" in asset: url = asset.get("url") if not url: # project.yml defines asset without URL that the user has to place check_private_asset(dest, checksum) continue fetch_asset(project_path, url, dest, checksum) + else: + msg.warn(f"Could not fetch asset {dest} as neither a 'git' or 'url' parameter is specified.") def check_private_asset(dest: Path, checksum: Optional[str] = None) -> None: """Check and validate assets without a URL (private assets that the user has to provide themselves) and give feedback about the checksum. - dest (Path): Desintation path of the asset. + dest (Path): Destination path of the asset. checksum (Optional[str]): Optional checksum of the expected file. """ + print("path", dest) if not Path(dest).exists(): err = f"No URL provided for asset. You need to add this file yourself: {dest}" msg.warn(err) From f1bc09c1e9ec58b2288d9666339aeebb0c1fa89f Mon Sep 17 00:00:00 2001 From: svlandeg Date: Thu, 10 Sep 2020 14:53:02 +0200 Subject: [PATCH 2/6] restore partly --- spacy/cli/project/assets.py | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/spacy/cli/project/assets.py b/spacy/cli/project/assets.py index 0d54a8d05..4b59b7751 100644 --- a/spacy/cli/project/assets.py +++ b/spacy/cli/project/assets.py @@ -40,33 +40,30 @@ def project_assets(project_dir: Path) -> None: for asset in assets: dest = project_dir / asset["dest"] checksum = asset.get("checksum") - if dest.exists(): - # If there's already a file, check for checksum - if checksum and checksum == get_checksum(dest): - msg.good(f"Skipping download with matching checksum: {dest}") - continue - else: - msg.good(f"Removing asset with outdated checksum: {dest} ") - if dest.is_dir(): - shutil.rmtree(dest) - else: - dest.unlink() if "git" in asset: + if dest.exists(): + # If there's already a file, check for checksum + if checksum and checksum == get_checksum(dest): + msg.good(f"Skipping download with matching checksum: {dest}") + continue + else: + if dest.is_dir(): + shutil.rmtree(dest) + else: + dest.unlink() git_sparse_checkout( asset["git"]["repo"], asset["git"]["path"], dest, branch=asset["git"].get("branch"), ) - elif "url" in asset: + else: url = asset.get("url") if not url: # project.yml defines asset without URL that the user has to place check_private_asset(dest, checksum) continue fetch_asset(project_path, url, dest, checksum) - else: - msg.warn(f"Could not fetch asset {dest} as neither a 'git' or 'url' parameter is specified.") def check_private_asset(dest: Path, checksum: Optional[str] = None) -> None: @@ -76,7 +73,6 @@ def check_private_asset(dest: Path, checksum: Optional[str] = None) -> None: dest (Path): Destination path of the asset. checksum (Optional[str]): Optional checksum of the expected file. """ - print("path", dest) if not Path(dest).exists(): err = f"No URL provided for asset. You need to add this file yourself: {dest}" msg.warn(err) From 08a831ce83c87733369c50c75c82bedb00027ec0 Mon Sep 17 00:00:00 2001 From: svlandeg Date: Thu, 10 Sep 2020 17:39:52 +0200 Subject: [PATCH 3/6] process trailing slash if any --- spacy/cli/_util.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py index b03f3eb69..c539598ef 100644 --- a/spacy/cli/_util.py +++ b/spacy/cli/_util.py @@ -203,6 +203,10 @@ def get_checksum(path: Union[Path, str]) -> str: msg.fail(f"Can't get checksum for {path}: not a file or directory", exits=1) +def _brol(path): + return str.encode(Path(path).read_text().replace("\r\n", "\n")) + + @contextmanager def show_validation_error( file_path: Optional[Union[str, Path]] = None, @@ -360,5 +364,7 @@ def _from_http_to_git(repo): repo = repo.replace(r"http://", r"https://") if repo.startswith(r"https://"): repo = repo.replace("https://", "git@").replace("/", ":", 1) + if repo.endswith("/"): + repo = repo[:-1] repo = f"{repo}.git" return repo From 1723fb73c42e141b404b8700c49b8a372eec952d Mon Sep 17 00:00:00 2001 From: svlandeg Date: Thu, 10 Sep 2020 17:44:59 +0200 Subject: [PATCH 4/6] remove brol --- spacy/cli/_util.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py index c539598ef..0755ccb46 100644 --- a/spacy/cli/_util.py +++ b/spacy/cli/_util.py @@ -203,10 +203,6 @@ def get_checksum(path: Union[Path, str]) -> str: msg.fail(f"Can't get checksum for {path}: not a file or directory", exits=1) -def _brol(path): - return str.encode(Path(path).read_text().replace("\r\n", "\n")) - - @contextmanager def show_validation_error( file_path: Optional[Union[str, Path]] = None, From 6831161bfa776273e0247f6241fea5f05f40623e Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Fri, 11 Sep 2020 09:56:49 +0200 Subject: [PATCH 5/6] Resolve path to be extra sure --- spacy/cli/project/assets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/cli/project/assets.py b/spacy/cli/project/assets.py index 4b59b7751..6f23f0e0a 100644 --- a/spacy/cli/project/assets.py +++ b/spacy/cli/project/assets.py @@ -38,7 +38,7 @@ def project_assets(project_dir: Path) -> None: msg.warn(f"No assets specified in {PROJECT_FILE}", exits=0) msg.info(f"Fetching {len(assets)} asset(s)") for asset in assets: - dest = project_dir / asset["dest"] + dest = (project_dir / asset["dest"]).resolve() checksum = asset.get("checksum") if "git" in asset: if dest.exists(): From 761bd60d43748c5b85399d5c1a484f188c6a08de Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Fri, 11 Sep 2020 09:57:00 +0200 Subject: [PATCH 6/6] Adjust info message --- spacy/cli/project/assets.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/spacy/cli/project/assets.py b/spacy/cli/project/assets.py index 6f23f0e0a..cb3a2fb99 100644 --- a/spacy/cli/project/assets.py +++ b/spacy/cli/project/assets.py @@ -44,7 +44,9 @@ def project_assets(project_dir: Path) -> None: if dest.exists(): # If there's already a file, check for checksum if checksum and checksum == get_checksum(dest): - msg.good(f"Skipping download with matching checksum: {dest}") + msg.good( + f"Skipping download with matching checksum: {asset['dest']}" + ) continue else: if dest.is_dir():