mirror of https://github.com/explosion/spaCy.git
Add smart_open dependency to fetch project assets (#5812)
* Use smart_open for project assets * Fix assets.py * Update pyproject.toml
This commit is contained in:
parent
c288dba8e7
commit
520d25cb50
|
@ -8,6 +8,7 @@ requires = [
|
||||||
"murmurhash>=0.28.0,<1.1.0",
|
"murmurhash>=0.28.0,<1.1.0",
|
||||||
"thinc>=8.0.0a19,<8.0.0a30",
|
"thinc>=8.0.0a19,<8.0.0a30",
|
||||||
"blis>=0.4.0,<0.5.0",
|
"blis>=0.4.0,<0.5.0",
|
||||||
"pytokenizations"
|
"pytokenizations",
|
||||||
|
"smart_open>=2.0.0,<3.0.0"
|
||||||
]
|
]
|
||||||
build-backend = "setuptools.build_meta"
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
|
@ -15,6 +15,7 @@ requests>=2.13.0,<3.0.0
|
||||||
tqdm>=4.38.0,<5.0.0
|
tqdm>=4.38.0,<5.0.0
|
||||||
pydantic>=1.3.0,<2.0.0
|
pydantic>=1.3.0,<2.0.0
|
||||||
pytokenizations
|
pytokenizations
|
||||||
|
smart_open>=2.0.0,<3.0.0
|
||||||
# Official Python utilities
|
# Official Python utilities
|
||||||
setuptools
|
setuptools
|
||||||
packaging
|
packaging
|
||||||
|
|
|
@ -52,6 +52,7 @@ install_requires =
|
||||||
requests>=2.13.0,<3.0.0
|
requests>=2.13.0,<3.0.0
|
||||||
pydantic>=1.3.0,<2.0.0
|
pydantic>=1.3.0,<2.0.0
|
||||||
pytokenizations
|
pytokenizations
|
||||||
|
smart_open>=2.0.0,<3.0.0
|
||||||
# Official Python utilities
|
# Official Python utilities
|
||||||
setuptools
|
setuptools
|
||||||
packaging
|
packaging
|
||||||
|
|
|
@ -1,15 +1,17 @@
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from wasabi import msg
|
from wasabi import msg
|
||||||
import requests
|
|
||||||
import tqdm
|
import tqdm
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
|
import requests
|
||||||
|
import smart_open
|
||||||
|
|
||||||
from ...util import ensure_path, working_dir
|
from ...util import ensure_path, working_dir
|
||||||
from .._util import project_cli, Arg, PROJECT_FILE, load_project_config, get_checksum
|
from .._util import project_cli, Arg, PROJECT_FILE, load_project_config, get_checksum
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# TODO: find a solution for caches
|
# TODO: find a solution for caches
|
||||||
# CACHES = [
|
# CACHES = [
|
||||||
# Path.home() / ".torch",
|
# Path.home() / ".torch",
|
||||||
|
@ -135,23 +137,12 @@ def convert_asset_url(url: str) -> str:
|
||||||
|
|
||||||
|
|
||||||
def download_file(url: str, dest: Path, chunk_size: int = 1024) -> None:
|
def download_file(url: str, dest: Path, chunk_size: int = 1024) -> None:
|
||||||
"""Download a file using requests.
|
"""Download a file using smart_open.
|
||||||
|
|
||||||
url (str): The URL of the file.
|
url (str): The URL of the file.
|
||||||
dest (Path): The destination path.
|
dest (Path): The destination path.
|
||||||
chunk_size (int): The size of chunks to read/write.
|
chunk_size (int): The size of chunks to read/write.
|
||||||
"""
|
"""
|
||||||
response = requests.get(url, stream=True)
|
with smart_open.open(url, mode="rb") as input_file:
|
||||||
response.raise_for_status()
|
with dest.open(mode="wb") as output_file:
|
||||||
total = int(response.headers.get("content-length", 0))
|
output_file.write(input_file.read())
|
||||||
progress_settings = {
|
|
||||||
"total": total,
|
|
||||||
"unit": "iB",
|
|
||||||
"unit_scale": True,
|
|
||||||
"unit_divisor": chunk_size,
|
|
||||||
"leave": False,
|
|
||||||
}
|
|
||||||
with dest.open("wb") as f, tqdm.tqdm(**progress_settings) as bar:
|
|
||||||
for data in response.iter_content(chunk_size=chunk_size):
|
|
||||||
size = f.write(data)
|
|
||||||
bar.update(size)
|
|
||||||
|
|
Loading…
Reference in New Issue