diff --git a/requirements.txt b/requirements.txt index 23bfa6f14..dd2eff0c2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,6 +11,7 @@ srsly>=2.4.3,<3.0.0 catalogue>=2.0.6,<2.1.0 typer>=0.3.0,<0.8.0 pathy>=0.3.5 +smart-open>=5.2.1,<7.0.0 # Third party dependencies numpy>=1.15.0 requests>=2.13.0,<3.0.0 diff --git a/setup.cfg b/setup.cfg index 82d4d2758..330dc8205 100644 --- a/setup.cfg +++ b/setup.cfg @@ -53,6 +53,7 @@ install_requires = # Third-party dependencies typer>=0.3.0,<0.8.0 pathy>=0.3.5 + smart-open>=5.2.1,<7.0.0 tqdm>=4.38.0,<5.0.0 numpy>=1.15.0 requests>=2.13.0,<3.0.0 diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py index 897964a88..872f69c88 100644 --- a/spacy/cli/_util.py +++ b/spacy/cli/_util.py @@ -358,7 +358,7 @@ def download_file(src: Union[str, "Pathy"], dest: Path, *, force: bool = False) if dest.exists() and not force: return None src = str(src) - with smart_open.open(src, mode="rb", ignore_ext=True) as input_file: + with smart_open.open(src, mode="rb", compression="disable") as input_file: with dest.open(mode="wb") as output_file: shutil.copyfileobj(input_file, output_file) diff --git a/spacy/tests/test_cli.py b/spacy/tests/test_cli.py index 1c4d0c98f..525c6d255 100644 --- a/spacy/tests/test_cli.py +++ b/spacy/tests/test_cli.py @@ -17,6 +17,7 @@ from spacy.cli._util import is_subpath_of, load_project_config from spacy.cli._util import parse_config_overrides, string_to_list from spacy.cli._util import substitute_project_variables from spacy.cli._util import validate_project_commands +from spacy.cli._util import upload_file, download_file from spacy.cli.debug_data import _compile_gold, _get_labels_from_model from spacy.cli.debug_data import _get_labels_from_spancat from spacy.cli.debug_data import _get_distribution, _get_kl_divergence @@ -1014,3 +1015,18 @@ def test_project_check_requirements(reqs, output): pkg_resources.require("spacyunknowndoesnotexist12345") except pkg_resources.DistributionNotFound: assert output == _check_requirements([req.strip() for req in reqs.split("\n")]) + + +def test_upload_download_local_file(): + with make_tempdir() as d1, make_tempdir() as d2: + filename = "f.txt" + content = "content" + local_file = d1 / filename + remote_file = d2 / filename + with local_file.open(mode="w") as file_: + file_.write(content) + upload_file(local_file, remote_file) + local_file.unlink() + download_file(remote_file, local_file) + with local_file.open(mode="r") as file_: + assert file_.read() == content