From 3a9b2aae615165a40614db9aaa8b90c55ff0c7f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 30 Jul 2024 10:50:30 +0200 Subject: [PATCH] gh-122400: Handle ValueError in filecmp (GH-122401) --- Lib/filecmp.py | 10 +++--- Lib/test/test_filecmp.py | 33 +++++++++++++++++++ ...-07-29-16-47-08.gh-issue-122400.fM0YSv.rst | 3 ++ 3 files changed, 42 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-07-29-16-47-08.gh-issue-122400.fM0YSv.rst diff --git a/Lib/filecmp.py b/Lib/filecmp.py index 020ea694ca6..c5b8d854d77 100644 --- a/Lib/filecmp.py +++ b/Lib/filecmp.py @@ -164,12 +164,14 @@ def phase2(self): # Distinguish files, directories, funnies ok = True try: a_stat = os.stat(a_path) - except OSError: + except (OSError, ValueError): + # See https://github.com/python/cpython/issues/122400 + # for the rationale for protecting against ValueError. # print('Can\'t stat', a_path, ':', why.args[1]) ok = False try: b_stat = os.stat(b_path) - except OSError: + except (OSError, ValueError): # print('Can\'t stat', b_path, ':', why.args[1]) ok = False @@ -285,12 +287,12 @@ def cmpfiles(a, b, common, shallow=True): # Return: # 0 for equal # 1 for different -# 2 for funny cases (can't stat, etc.) +# 2 for funny cases (can't stat, NUL bytes, etc.) # def _cmp(a, b, sh, abs=abs, cmp=cmp): try: return not abs(cmp(a, b, sh)) - except OSError: + except (OSError, ValueError): return 2 diff --git a/Lib/test/test_filecmp.py b/Lib/test/test_filecmp.py index 1fb47163719..2c83667b22f 100644 --- a/Lib/test/test_filecmp.py +++ b/Lib/test/test_filecmp.py @@ -156,6 +156,39 @@ def test_cmpfiles(self): (['file'], ['file2'], []), "Comparing mismatched directories fails") + def test_cmpfiles_invalid_names(self): + # See https://github.com/python/cpython/issues/122400. + for file, desc in [ + ('\x00', 'NUL bytes filename'), + (__file__ + '\x00', 'filename with embedded NUL bytes'), + ("\uD834\uDD1E.py", 'surrogate codes (MUSICAL SYMBOL G CLEF)'), + ('a' * 1_000_000, 'very long filename'), + ]: + for other_dir in [self.dir, self.dir_same, self.dir_diff]: + with self.subTest(f'cmpfiles: {desc}', other_dir=other_dir): + res = filecmp.cmpfiles(self.dir, other_dir, [file]) + self.assertTupleEqual(res, ([], [], [file])) + + def test_dircmp_invalid_names(self): + for bad_dir, desc in [ + ('\x00', 'NUL bytes dirname'), + (f'Top{os.sep}Mid\x00', 'dirname with embedded NUL bytes'), + ("\uD834\uDD1E", 'surrogate codes (MUSICAL SYMBOL G CLEF)'), + ('a' * 1_000_000, 'very long dirname'), + ]: + d1 = filecmp.dircmp(self.dir, bad_dir) + d2 = filecmp.dircmp(bad_dir, self.dir) + for target in [ + # attributes where os.listdir() raises OSError or ValueError + 'left_list', 'right_list', + 'left_only', 'right_only', 'common', + ]: + with self.subTest(f'dircmp(ok, bad): {desc}', target=target): + with self.assertRaises((OSError, ValueError)): + getattr(d1, target) + with self.subTest(f'dircmp(bad, ok): {desc}', target=target): + with self.assertRaises((OSError, ValueError)): + getattr(d2, target) def _assert_lists(self, actual, expected): """Assert that two lists are equal, up to ordering.""" diff --git a/Misc/NEWS.d/next/Library/2024-07-29-16-47-08.gh-issue-122400.fM0YSv.rst b/Misc/NEWS.d/next/Library/2024-07-29-16-47-08.gh-issue-122400.fM0YSv.rst new file mode 100644 index 00000000000..8c47e94f78d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-07-29-16-47-08.gh-issue-122400.fM0YSv.rst @@ -0,0 +1,3 @@ +Handle :exc:`ValueError`\s raised by :func:`os.stat` in +:class:`filecmp.dircmp` and :func:`filecmp.cmpfiles`. +Patch by Bénédikt Tran.