From 6e48699ff131f42b860f35e7ebd4759070b33223 Mon Sep 17 00:00:00 2001 From: Prodesire Date: Thu, 9 Nov 2017 00:26:08 +0800 Subject: [PATCH] add download to request --- pydu/request.py | 62 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 60 insertions(+), 2 deletions(-) diff --git a/pydu/request.py b/pydu/request.py index a3a2145..ba6ba26 100644 --- a/pydu/request.py +++ b/pydu/request.py @@ -1,10 +1,14 @@ import os +import shutil +import tempfile from pydu.compat import PY2, string_types from pydu.string import safeunicode if PY2: + import urllib as ulib import urlparse else: + import urllib.request as ulib import urllib.parse as urlparse @@ -24,27 +28,81 @@ class FileName(object): def from_headers(headers): """ Detect filename from Content-Disposition headers if present. - headers as dict, list or string - filename from content-disposition header or None + + headers: as dict, list or string + filename: from content-disposition header or None """ + if not headers: + return None + if isinstance(headers, string_types): headers = [line.split(':', 1) for line in headers.splitlines()] if isinstance(headers, list): headers = dict(headers) + cdisp = headers.get("Content-Disposition") if not cdisp: return None + cdtype = cdisp.split(';') if len(cdtype) == 1: return None if cdtype[0].strip().lower() not in ('inline', 'attachment'): return None + # several filename params is illegal, but just in case fnames = [x for x in cdtype[1:] if x.strip().startswith('filename=')] if len(fnames) > 1: return None + name = fnames[0].split('=')[1].strip(' \t"') name = os.path.basename(name) if not name: return None return name + + @classmethod + def from_any(cls, dst=None, headers=None, url=None): + return dst or cls.from_headers(headers) or cls.from_url(url) + + +def download(url, dst=None): + """ + High level function, which downloads URL into tmp file in current + directory and then renames it to filename autodetected from either URL + or HTTP headers. + + bar: function to track download progress (visualize etc.) + out: output filename or directory + filename: where URL is downloaded to + """ + # detect of dst is a directory + dst_ = None + if dst and os.path.isdir(dst): + dst_ = dst + dst = None + + # get filename for temp file in current directory + prefix = FileName.from_any(dst=dst, url=url) + fd, tmpfile = tempfile.mkstemp(".tmp", prefix=prefix, dir=".") + os.close(fd) + os.unlink(tmpfile) + + if PY2: + binurl = url + else: + # Python 3 can not quote URL as needed + binurl = list(urlparse.urlsplit(url)) + binurl[2] = urlparse.quote(binurl[2]) + binurl = urlparse.urlunsplit(binurl) + tmpfile, headers = ulib.urlretrieve(binurl, tmpfile) + filename = FileName.from_any(dst=dst, headers=headers, url=url) + if dst_: + filename = os.path.join(dst_, filename) + + # add numeric ' (x)' suffix if filename already exists + if os.path.exists(filename): + os.unlink(filename) + shutil.move(tmpfile, filename) + + return filename