From 58a7e130375776b192a99b013bc563205a639edc Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sun, 20 Mar 2022 23:34:45 -0700 Subject: [PATCH] bpo-38256: Fix binascii.crc32 large input. (GH-32000) (GH-32013) (GH-32015) Inputs >= 4GiB to `binascii.crc32(...)` when compiled to use the zlib crc32 implementation (the norm on POSIX) no longer return the wrong result. (cherry picked from commit 4c989e19c84ec224655bbbde9422e16d4a838a80) --- Lib/test/test_binascii.py | 10 ++++++++++ .../2022-03-19-15-54-41.bpo-38256.FoMbjE.rst | 5 +++++ Modules/binascii.c | 20 +++++++++++-------- 3 files changed, 27 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-03-19-15-54-41.bpo-38256.FoMbjE.rst diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py index 45327953a77..745329102f7 100644 --- a/Lib/test/test_binascii.py +++ b/Lib/test/test_binascii.py @@ -5,6 +5,8 @@ import array import re from test import support +from test.support import bigmemtest, _1G, _4G, warnings_helper + # Note: "*_hex" functions are aliases for "(un)hexlify" b2a_functions = ['b2a_base64', 'b2a_hex', 'b2a_hqx', 'b2a_qp', 'b2a_uu', @@ -448,6 +450,14 @@ class BytearrayBinASCIITest(BinASCIITest): class MemoryviewBinASCIITest(BinASCIITest): type2test = memoryview +class ChecksumBigBufferTestCase(unittest.TestCase): + """bpo-38256 - check that inputs >=4 GiB are handled correctly.""" + + @bigmemtest(size=_4G + 4, memuse=1, dry_run=False) + def test_big_buffer(self, size): + data = b"nyan" * (_1G + 1) + self.assertEqual(binascii.crc32(data), 1044521549) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2022-03-19-15-54-41.bpo-38256.FoMbjE.rst b/Misc/NEWS.d/next/Library/2022-03-19-15-54-41.bpo-38256.FoMbjE.rst new file mode 100644 index 00000000000..d9b57513b06 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-03-19-15-54-41.bpo-38256.FoMbjE.rst @@ -0,0 +1,5 @@ +Fix :func:`binascii.crc32` when it is compiled to use zlib'c crc32 to +work properly on inputs 4+GiB in length instead of returning the wrong +result. The workaround prior to this was to always feed the function +data in increments smaller than 4GiB or to just call the zlib module +function. diff --git a/Modules/binascii.c b/Modules/binascii.c index 1f3248b6049..3777580a79f 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -1120,16 +1120,20 @@ binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc) /*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/ #ifdef USE_ZLIB_CRC32 -/* This was taken from zlibmodule.c PyZlib_crc32 (but is PY_SSIZE_T_CLEAN) */ +/* The same core as zlibmodule.c zlib_crc32_impl. */ { - const Byte *buf; - Py_ssize_t len; - int signed_val; + unsigned char *buf = data->buf; + Py_ssize_t len = data->len; - buf = (Byte*)data->buf; - len = data->len; - signed_val = crc32(crc, buf, len); - return (unsigned int)signed_val & 0xffffffffU; + /* Avoid truncation of length for very large buffers. crc32() takes + length as an unsigned int, which may be narrower than Py_ssize_t. */ + while ((size_t)len > UINT_MAX) { + crc = crc32(crc, buf, UINT_MAX); + buf += (size_t) UINT_MAX; + len -= (size_t) UINT_MAX; + } + crc = crc32(crc, buf, (unsigned int)len); + return crc & 0xffffffff; } #else /* USE_ZLIB_CRC32 */ { /* By Jim Ahlstrom; All rights transferred to CNRI */