Read static files in 64-KB chunks.

Based on #526, but updated for the new interfaces in StaticFileHandler.

Unlike #526, this change does not actually wait for each chunk to be
flushed before reading the next one.  Flushing raises some additional
complications (wsgi compatibility, chunked encoding vs content-length)
that are probably not worthwhile for the intended use of StaticFileHandler.
Reading in chunks has benefits even if we don't wait for the flush
(i.e. memory fragmentation), and this change establishes the necessary
subclass interfaces so we can add flushing in the future.
This commit is contained in:
Ben Darnell 2013-05-19 14:23:17 -04:00
parent 2e73d4b635
commit 0c238d6fcf
1 changed files with 45 additions and 12 deletions

View File

@ -690,7 +690,11 @@ class RequestHandler(object):
has been run, the previous callback will be discarded.
"""
if self.application._wsgi:
raise Exception("WSGI applications do not support flush()")
# WSGI applications cannot usefully support flush, so just make
# it a no-op (and run the callback immediately).
if callback is not None:
callback()
return
chunk = b"".join(self._write_buffer)
self._write_buffer = []
@ -1750,9 +1754,10 @@ class StaticFileHandler(RequestHandler):
of another server or CDN), override `make_static_url`, `parse_url_path`,
`get_cache_time`, and/or `get_version`.
To replace all interaction with the filesystem (e.g. to serve static
content from a database), override `get_content`, `get_modified_time`,
`get_absolute_path`, and `validate_absolute_path`.
To replace all interaction with the filesystem (e.g. to serve
static content from a database), override `get_content`,
`get_content_size`, `get_modified_time`, `get_absolute_path`, and
`validate_absolute_path`.
"""
CACHE_MAX_AGE = 86400 * 365 * 10 # 10 years
@ -1809,12 +1814,18 @@ class StaticFileHandler(RequestHandler):
httputil._get_content_range(start, end, size))
else:
start = end = None
data = self.get_content(self.absolute_path, start, end)
content = self.get_content(self.absolute_path, start, end)
if isinstance(content, bytes_type):
content = [content]
content_length = 0
for chunk in content:
if include_body:
self.write(data)
self.write(chunk)
else:
content_length += len(chunk)
if not include_body:
assert self.request.method == "HEAD"
self.set_header("Content-Length", len(data))
self.set_header("Content-Length", content_length)
def compute_etag(self):
"""Sets the ``Etag`` header based on static url version.
@ -1922,15 +1933,31 @@ class StaticFileHandler(RequestHandler):
signature is different from other overridable class methods
(no ``settings`` argument); this is deliberate to ensure that
``abspath`` is able to stand on its own as a cache key.
This method should either return a byte string or an iterator
of byte strings. The latter is preferred for large files
as it helps reduce memory fragmentation.
"""
with open(abspath, "rb") as file:
if start is not None:
file.seek(start)
if end is not None:
remaining = end - (start or 0)
return file.read(remaining)
else:
return file.read()
remaining = None
while True:
chunk_size = 64 * 1024
if remaining is not None and remaining < chunk_size:
chunk_size = remaining
chunk = file.read(chunk_size)
if chunk:
if remaining is not None:
remaining -= len(chunk)
yield chunk
else:
if remaining is not None:
assert remaining == 0
return
@classmethod
def get_content_version(cls, abspath):
@ -1940,7 +1967,13 @@ class StaticFileHandler(RequestHandler):
default implementation is a hash of the file's contents.
"""
data = cls.get_content(abspath)
return hashlib.md5(data).hexdigest()
hasher = hashlib.md5()
if isinstance(data, bytes_type):
hasher.update(data)
else:
for chunk in data:
hasher.update(chunk)
return hasher.hexdigest()
def _stat(self):
if not hasattr(self, '_stat_result'):