From 4cd6a95dfeb06950a2566e23e7e2bc9eb8c59775 Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Sun, 17 Aug 2008 20:23:46 +0000 Subject: [PATCH] Merged revisions 65659,65693,65700,65702,65706-65707,65761 via svnmerge from svn+ssh://pythondev@svn.python.org/python/trunk ........ r65659 | martin.v.loewis | 2008-08-12 15:45:21 -0500 (Tue, 12 Aug 2008) | 2 lines Add Hirokazu Yamamoto. ........ r65693 | georg.brandl | 2008-08-15 13:35:09 -0500 (Fri, 15 Aug 2008) | 2 lines #3558: Attribute reference binds more tightly than subscription and call. ........ r65700 | antoine.pitrou | 2008-08-15 16:03:21 -0500 (Fri, 15 Aug 2008) | 3 lines #2676: email/message.py [Message.get_content_type]: Trivial regex hangs on pathological input ........ r65702 | gregory.p.smith | 2008-08-15 18:14:00 -0500 (Fri, 15 Aug 2008) | 2 lines document that waitpid raises OSError ........ r65706 | benjamin.peterson | 2008-08-15 22:02:41 -0500 (Fri, 15 Aug 2008) | 1 line fix markup ........ r65707 | benjamin.peterson | 2008-08-15 22:13:07 -0500 (Fri, 15 Aug 2008) | 1 line note how os.utime should be used for emulating touch ........ r65761 | antoine.pitrou | 2008-08-17 08:06:29 -0500 (Sun, 17 Aug 2008) | 3 lines fix ZipFile.testzip() to work with very large embedded files ........ --- Doc/library/os.rst | 22 +++++++++++++--------- Doc/reference/expressions.rst | 6 +++--- Lib/email/message.py | 18 +++++++++++------- Lib/zipfile.py | 7 ++++++- Misc/NEWS | 4 ++++ Misc/developers.txt | 3 +++ 6 files changed, 40 insertions(+), 20 deletions(-) diff --git a/Doc/library/os.rst b/Doc/library/os.rst index 86d9a896992..b95833c3e4a 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -1076,15 +1076,16 @@ Files and Directories .. function:: utime(path, times) - Set the access and modified times of the file specified by *path*. If *times* is - ``None``, then the file's access and modified times are set to the current time. - Otherwise, *times* must be a 2-tuple of numbers, of the form ``(atime, mtime)`` - which is used to set the access and modified times, respectively. Whether a - directory can be given for *path* depends on whether the operating system - implements directories as files (for example, Windows does not). Note that the - exact times you set here may not be returned by a subsequent :func:`stat` call, - depending on the resolution with which your operating system records access and - modification times; see :func:`stat`. + Set the access and modified times of the file specified by *path*. If *times* + is ``None``, then the file's access and modified times are set to the current + time. (The effect is similar to running the Unix program :program:`touch` on + the path.) Otherwise, *times* must be a 2-tuple of numbers, of the form + ``(atime, mtime)`` which is used to set the access and modified times, + respectively. Whether a directory can be given for *path* depends on whether + the operating system implements directories as files (for example, Windows + does not). Note that the exact times you set here may not be returned by a + subsequent :func:`stat` call, depending on the resolution with which your + operating system records access and modification times; see :func:`stat`. Availability: Macintosh, Unix, Windows. @@ -1596,6 +1597,9 @@ written in Python, such as a mail server's external command delivery program. ``-1``, status is requested for any process in the process group ``-pid`` (the absolute value of *pid*). + An :exc:`OSError` is raised with the value of errno when the syscall + returns -1. + On Windows: Wait for completion of a process given by process handle *pid*, and return a tuple containing *pid*, and its exit status shifted left by 8 bits (shifting makes cross-platform use of the function easier). A *pid* less than or diff --git a/Doc/reference/expressions.rst b/Doc/reference/expressions.rst index dab97e4856c..837eb89ee7c 100644 --- a/Doc/reference/expressions.rst +++ b/Doc/reference/expressions.rst @@ -1266,13 +1266,13 @@ groups from right to left). +----------------------------------------------+-------------------------------------+ | ``**`` | Exponentiation | +----------------------------------------------+-------------------------------------+ -| ``x.attribute`` | Attribute reference | -+----------------------------------------------+-------------------------------------+ | ``x[index]`` | Subscription | +----------------------------------------------+-------------------------------------+ | ``x[index:index]`` | Slicing | +----------------------------------------------+-------------------------------------+ -| ``f(arguments...)`` | Function call | +| ``x(arguments...)`` | Call | ++----------------------------------------------|-------------------------------------+ +| ``x.attribute`` | Attribute reference | +----------------------------------------------+-------------------------------------+ | ``(expressions...)`` | Binding, tuple display, generator | | | expressions | diff --git a/Lib/email/message.py b/Lib/email/message.py index ff262c7c8f5..471d46be141 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -20,18 +20,22 @@ SEMISPACE = '; ' -# Regular expression used to split header parameters. BAW: this may be too -# simple. It isn't strictly RFC 2045 (section 5.1) compliant, but it catches -# most headers found in the wild. We may eventually need a full fledged -# parser eventually. -paramre = re.compile(r'\s*;\s*') # Regular expression that matches `special' characters in parameters, the # existance of which force quoting of the parameter value. tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]') - # Helper functions +def _splitparam(param): + # Split header parameters. BAW: this may be too simple. It isn't + # strictly RFC 2045 (section 5.1) compliant, but it catches most headers + # found in the wild. We may eventually need a full fledged parser + # eventually. + a, sep, b = param.partition(';') + if not sep: + return a.strip(), None + return a.strip(), b.strip() + def _formatparam(param, value=None, quote=True): """Convenience function to format and return a key=value pair. @@ -443,7 +447,7 @@ def get_content_type(self): if value is missing: # This should have no parameters return self.get_default_type() - ctype = paramre.split(value)[0].lower().strip() + ctype = _splitparam(value)[0].lower() # RFC 2045, section 5.2 says if its invalid, use text/plain if ctype.count('/') != 1: return 'text/plain' diff --git a/Lib/zipfile.py b/Lib/zipfile.py index e4bc019d1b2..9e96e934ec1 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -813,9 +813,14 @@ def printdir(self, file=None): def testzip(self): """Read all the files and check the CRC.""" + chunk_size = 2 ** 20 for zinfo in self.filelist: try: - self.read(zinfo.filename) # Check CRC-32 + # Read by chunks, to avoid an OverflowError or a + # MemoryError with very large embedded files. + f = self.open(zinfo.filename, "r") + while f.read(chunk_size): # Check CRC-32 + pass except BadZipfile: return zinfo.filename diff --git a/Misc/NEWS b/Misc/NEWS index ae95abf2a04..afbe9890b6b 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -30,6 +30,10 @@ Core and Builtins Library ------- +- Issue #2676: in the email package, content-type parsing was hanging on + pathological input because of quadratic or exponential behaviour of a + regular expression. + - Issue #3476: binary buffered reading through the new "io" library is now thread-safe. diff --git a/Misc/developers.txt b/Misc/developers.txt index f5b157cb13f..883246360ae 100644 --- a/Misc/developers.txt +++ b/Misc/developers.txt @@ -17,6 +17,9 @@ the format to accommodate documentation needs as they arise. Permissions History ------------------- +- Hirokazu Yamamoto was given SVN access on August 12 2008 by MvL, + for contributions to the Windows build. + - Antoine Pitrou was given SVN access on July 16 2008, by recommendation from GvR, for general contributions to Python.