From 42db3efd368d154f428ce834ebc99fc8535931d7 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Sun, 4 Jan 2009 21:37:59 +0000 Subject: [PATCH] Merged revisions 68319 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r68319 | antoine.pitrou | 2009-01-04 22:29:23 +0100 (dim., 04 janv. 2009) | 3 lines Issue #4272: Add an optional argument to the GzipFile constructor to override the timestamp in the gzip stream. ........ --- Doc/library/gzip.rst | 11 +++++++- Lib/gzip.py | 21 ++++++++++++--- Lib/test/test_gzip.py | 63 +++++++++++++++++++++++++++++++++++++++++++ Misc/NEWS | 5 ++++ 4 files changed, 95 insertions(+), 5 deletions(-) diff --git a/Doc/library/gzip.rst b/Doc/library/gzip.rst index c6f9ef82bd3..fa73bba4584 100644 --- a/Doc/library/gzip.rst +++ b/Doc/library/gzip.rst @@ -24,7 +24,7 @@ For other archive formats, see the :mod:`bz2`, :mod:`zipfile`, and The module defines the following items: -.. class:: GzipFile([filename[, mode[, compresslevel[, fileobj]]]]) +.. class:: GzipFile([filename[, mode[, compresslevel[, fileobj[, mtime]]]]]) Constructor for the :class:`GzipFile` class, which simulates most of the methods of a file object, with the exception of the :meth:`readinto` and @@ -52,6 +52,15 @@ The module defines the following items: level of compression; ``1`` is fastest and produces the least compression, and ``9`` is slowest and produces the most compression. The default is ``9``. + The *mtime* argument is an optional numeric timestamp to be written to + the stream when compressing. All :program:`gzip`compressed streams are + required to contain a timestamp. If omitted or ``None``, the current + time is used. This module ignores the timestamp when decompressing; + however, some programs, such as :program:`gunzip`\ , make use of it. + The format of the timestamp is the same as that of the return value of + ``time.time()`` and of the ``st_mtime`` member of the object returned + by ``os.stat()``. + Calling a :class:`GzipFile` object's :meth:`close` method does not close *fileobj*, since you might wish to append more material after the compressed data. This also allows you to pass a :class:`StringIO` object opened for diff --git a/Lib/gzip.py b/Lib/gzip.py index 11d557172fd..560a722bae9 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -54,7 +54,7 @@ class GzipFile: max_read_chunk = 10 * 1024 * 1024 # 10Mb def __init__(self, filename=None, mode=None, - compresslevel=9, fileobj=None): + compresslevel=9, fileobj=None, mtime=None): """Constructor for the GzipFile class. At least one of fileobj and filename must be given a @@ -81,6 +81,15 @@ def __init__(self, filename=None, mode=None, level of compression; 1 is fastest and produces the least compression, and 9 is slowest and produces the most compression. The default is 9. + The mtime argument is an optional numeric timestamp to be written + to the stream when compressing. All gzip compressed streams + are required to contain a timestamp. If omitted or None, the + current time is used. This module ignores the timestamp when + decompressing; however, some programs, such as gunzip, make use + of it. The format of the timestamp is the same as that of the + return value of time.time() and of the st_mtime member of the + object returned by os.stat(). + """ # guarantee the file is opened in binary mode on platforms @@ -119,6 +128,7 @@ def __init__(self, filename=None, mode=None, self.fileobj = fileobj self.offset = 0 + self.mtime = mtime if self.mode == WRITE: self._write_gzip_header() @@ -157,7 +167,10 @@ def _write_gzip_header(self): if fname: flags = FNAME self.fileobj.write(chr(flags).encode('latin-1')) - write32u(self.fileobj, int(time.time())) + mtime = self.mtime + if mtime is None: + mtime = time.time() + write32u(self.fileobj, int(mtime)) self.fileobj.write(b'\002') self.fileobj.write(b'\377') if fname: @@ -175,10 +188,10 @@ def _read_gzip_header(self): if method != 8: raise IOError('Unknown compression method') flag = ord( self.fileobj.read(1) ) - # modtime = self.fileobj.read(4) + self.mtime = read32(self.fileobj) # extraflag = self.fileobj.read(1) # os = self.fileobj.read(1) - self.fileobj.read(6) + self.fileobj.read(2) if flag & FEXTRA: # Read & discard the extra field, if present diff --git a/Lib/test/test_gzip.py b/Lib/test/test_gzip.py index d28c024d07f..e758826fa73 100644 --- a/Lib/test/test_gzip.py +++ b/Lib/test/test_gzip.py @@ -6,6 +6,7 @@ from test import support import os import gzip +import struct data1 = b""" int length=DEFAULTALLOC, err = Z_OK; @@ -160,6 +161,68 @@ def test_1647484(self): self.assertEqual(f.name, self.filename) f.close() + def test_mtime(self): + mtime = 123456789 + fWrite = gzip.GzipFile(self.filename, 'w', mtime = mtime) + fWrite.write(data1) + fWrite.close() + + fRead = gzip.GzipFile(self.filename) + dataRead = fRead.read() + self.assertEqual(dataRead, data1) + self.assert_(hasattr(fRead, 'mtime')) + self.assertEqual(fRead.mtime, mtime) + fRead.close() + + def test_metadata(self): + mtime = 123456789 + + fWrite = gzip.GzipFile(self.filename, 'w', mtime = mtime) + fWrite.write(data1) + fWrite.close() + + fRead = open(self.filename, 'rb') + + # see RFC 1952: http://www.faqs.org/rfcs/rfc1952.html + + idBytes = fRead.read(2) + self.assertEqual(idBytes, b'\x1f\x8b') # gzip ID + + cmByte = fRead.read(1) + self.assertEqual(cmByte, b'\x08') # deflate + + flagsByte = fRead.read(1) + self.assertEqual(flagsByte, b'\x08') # only the FNAME flag is set + + mtimeBytes = fRead.read(4) + self.assertEqual(mtimeBytes, struct.pack('