Add smart_open requirement, update deprecated options (#11864)

* Switch from deprecated `ignore_ext` to `compression`
* Add upload/download test for local files
This commit is contained in:
Adriane Boyd 2022-11-25 13:00:57 +01:00 committed by GitHub
parent c0fd8a2e71
commit 681ec20914
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 19 additions and 1 deletions

View File

@ -11,6 +11,7 @@ srsly>=2.4.3,<3.0.0
catalogue>=2.0.6,<2.1.0
typer>=0.3.0,<0.8.0
pathy>=0.3.5
smart-open>=5.2.1,<7.0.0
# Third party dependencies
numpy>=1.15.0
requests>=2.13.0,<3.0.0

View File

@ -53,6 +53,7 @@ install_requires =
# Third-party dependencies
typer>=0.3.0,<0.8.0
pathy>=0.3.5
smart-open>=5.2.1,<7.0.0
tqdm>=4.38.0,<5.0.0
numpy>=1.15.0
requests>=2.13.0,<3.0.0

View File

@ -358,7 +358,7 @@ def download_file(src: Union[str, "Pathy"], dest: Path, *, force: bool = False)
if dest.exists() and not force:
return None
src = str(src)
with smart_open.open(src, mode="rb", ignore_ext=True) as input_file:
with smart_open.open(src, mode="rb", compression="disable") as input_file:
with dest.open(mode="wb") as output_file:
shutil.copyfileobj(input_file, output_file)

View File

@ -17,6 +17,7 @@ from spacy.cli._util import is_subpath_of, load_project_config
from spacy.cli._util import parse_config_overrides, string_to_list
from spacy.cli._util import substitute_project_variables
from spacy.cli._util import validate_project_commands
from spacy.cli._util import upload_file, download_file
from spacy.cli.debug_data import _compile_gold, _get_labels_from_model
from spacy.cli.debug_data import _get_labels_from_spancat
from spacy.cli.debug_data import _get_distribution, _get_kl_divergence
@ -1014,3 +1015,18 @@ def test_project_check_requirements(reqs, output):
pkg_resources.require("spacyunknowndoesnotexist12345")
except pkg_resources.DistributionNotFound:
assert output == _check_requirements([req.strip() for req in reqs.split("\n")])
def test_upload_download_local_file():
with make_tempdir() as d1, make_tempdir() as d2:
filename = "f.txt"
content = "content"
local_file = d1 / filename
remote_file = d2 / filename
with local_file.open(mode="w") as file_:
file_.write(content)
upload_file(local_file, remote_file)
local_file.unlink()
download_file(remote_file, local_file)
with local_file.open(mode="r") as file_:
assert file_.read() == content