From 06e522521c06671b4559eecf9e2a185c2d62c141 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 30 Mar 2017 19:09:08 +0300 Subject: [PATCH] bpo-10030: Sped up reading encrypted ZIP files by 2 times. (#550) --- Lib/zipfile.py | 102 ++++++++++++++++++++++++------------------------- Misc/NEWS | 2 + 2 files changed, 52 insertions(+), 52 deletions(-) diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 8a19ca246b6..6fdf2c398dd 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -509,65 +509,63 @@ def is_dir(self): return self.filename[-1] == '/' -class _ZipDecrypter: - """Class to handle decryption of files stored within a ZIP archive. +# ZIP encryption uses the CRC32 one-byte primitive for scrambling some +# internal keys. We noticed that a direct implementation is faster than +# relying on binascii.crc32(). - ZIP supports a password-based form of encryption. Even though known - plaintext attacks have been found against it, it is still useful - to be able to get data out of such a file. +_crctable = None +def _gen_crc(crc): + for j in range(8): + if crc & 1: + crc = (crc >> 1) ^ 0xEDB88320 + else: + crc >>= 1 + return crc - Usage: - zd = _ZipDecrypter(mypwd) - plain_char = zd(cypher_char) - plain_text = map(zd, cypher_text) - """ +# ZIP supports a password-based form of encryption. Even though known +# plaintext attacks have been found against it, it is still useful +# to be able to get data out of such a file. +# +# Usage: +# zd = _ZipDecrypter(mypwd) +# plain_bytes = zd(cypher_bytes) - def _GenerateCRCTable(): - """Generate a CRC-32 table. +def _ZipDecrypter(pwd): + key0 = 305419896 + key1 = 591751049 + key2 = 878082192 - ZIP encryption uses the CRC32 one-byte primitive for scrambling some - internal keys. We noticed that a direct implementation is faster than - relying on binascii.crc32(). - """ - poly = 0xedb88320 - table = [0] * 256 - for i in range(256): - crc = i - for j in range(8): - if crc & 1: - crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly - else: - crc = ((crc >> 1) & 0x7FFFFFFF) - table[i] = crc - return table - crctable = None + global _crctable + if _crctable is None: + _crctable = list(map(_gen_crc, range(256))) + crctable = _crctable - def _crc32(self, ch, crc): + def crc32(ch, crc): """Compute the CRC32 primitive on one byte.""" - return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff] + return (crc >> 8) ^ crctable[(crc ^ ch) & 0xFF] - def __init__(self, pwd): - if _ZipDecrypter.crctable is None: - _ZipDecrypter.crctable = _ZipDecrypter._GenerateCRCTable() - self.key0 = 305419896 - self.key1 = 591751049 - self.key2 = 878082192 - for p in pwd: - self._UpdateKeys(p) + def update_keys(c): + nonlocal key0, key1, key2 + key0 = crc32(c, key0) + key1 = (key1 + (key0 & 0xFF)) & 0xFFFFFFFF + key1 = (key1 * 134775813 + 1) & 0xFFFFFFFF + key2 = crc32(key1 >> 24, key2) - def _UpdateKeys(self, c): - self.key0 = self._crc32(c, self.key0) - self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295 - self.key1 = (self.key1 * 134775813 + 1) & 4294967295 - self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2) + for p in pwd: + update_keys(p) - def __call__(self, c): - """Decrypt a single character.""" - assert isinstance(c, int) - k = self.key2 | 2 - c = c ^ (((k * (k^1)) >> 8) & 255) - self._UpdateKeys(c) - return c + def decrypter(data): + """Decrypt a bytes object.""" + result = bytearray() + append = result.append + for c in data: + k = key2 | 2 + c ^= ((k * (k^1)) >> 8) & 0xFF + update_keys(c) + append(c) + return bytes(result) + + return decrypter class LZMACompressor: @@ -953,7 +951,7 @@ def _read2(self, n): raise EOFError if self._decrypter is not None: - data = bytes(map(self._decrypter, data)) + data = self._decrypter(data) return data def close(self): @@ -1411,7 +1409,7 @@ def open(self, name, mode="r", pwd=None, *, force_zip64=False): # or the MSB of the file time depending on the header type # and is used to check the correctness of the password. header = zef_file.read(12) - h = list(map(zd, header[0:12])) + h = zd(header[0:12]) if zinfo.flag_bits & 0x8: # compare against the file type from extended local headers check_byte = (zinfo._raw_time >> 8) & 0xff diff --git a/Misc/NEWS b/Misc/NEWS index 9c434a7735e..8646b8ce452 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -301,6 +301,8 @@ Extension Modules Library ------- +- bpo-10030: Sped up reading encrypted ZIP files by 2 times. + - bpo-29204: Element.getiterator() and the html parameter of XMLParser() were deprecated only in the documentation (since Python 3.2 and 3.4 correspondintly). Now using them emits a deprecation warning.