From bc8de020a2ee8e380ff0ff9130bd12e93430b819 Mon Sep 17 00:00:00 2001 From: Jack O'Connor Date: Fri, 10 Feb 2023 14:20:28 -0800 Subject: [PATCH] check for tarballs containing sketchy symlinks It's acceptable for a tarball to have a symlink at `a/b/c/foo.txt` that points to `../../../foo.txt` (see `legal_symlink_dots.tar`), because that symlink "stays within" the archive. However, it should be illegal for the same symlink to point to `../../../../foo.txt` (see `illegal_symlink_dots.tar`), because that symlink "reaches outside" the archive. Similarly, it should always be illegal for a tarball to hold a symlink pointing to an absolute path. Add validation and tests cases for these behaviors. --- peru/resources/plugins/curl/curl_plugin.py | 37 +++++++++++++++---- tests/resources/illegal_symlink_absolute.tar | Bin 0 -> 10240 bytes tests/resources/illegal_symlink_dots.tar | Bin 0 -> 10240 bytes tests/resources/legal_symlink_dots.tar | Bin 0 -> 10240 bytes tests/test_curl_plugin.py | 15 ++++++++ 5 files changed, 45 insertions(+), 7 deletions(-) create mode 100644 tests/resources/illegal_symlink_absolute.tar create mode 100644 tests/resources/illegal_symlink_dots.tar create mode 100644 tests/resources/legal_symlink_dots.tar diff --git a/peru/resources/plugins/curl/curl_plugin.py b/peru/resources/plugins/curl/curl_plugin.py index 65ea3cd..668428d 100755 --- a/peru/resources/plugins/curl/curl_plugin.py +++ b/peru/resources/plugins/curl/curl_plugin.py @@ -126,13 +126,17 @@ def plugin_sync(url, sha1): def extract_tar(archive_path, dest): with tarfile.open(archive_path) as t: - validate_filenames(info.path for info in t.getmembers()) + for info in t.getmembers(): + validate_filename(info.path) + if info.issym(): + validate_symlink(info.path, info.linkname) t.extractall(dest) def extract_zip(archive_path, dest): with zipfile.ZipFile(archive_path) as z: - validate_filenames(z.namelist()) + for name in z.namelist(): + validate_filename(name) z.extractall(dest) # Set file permissions. Tar does this by default, but with zip we need # to do it ourselves. @@ -153,11 +157,30 @@ def extract_zip(archive_path, dest): os.chmod(os.path.join(dest, info.filename), 0o755) -def validate_filenames(names): - for name in names: - path = pathlib.PurePosixPath(name) - if path.is_absolute() or '..' in path.parts: - raise EvilArchiveError('Illegal path in archive: ' + name) +def validate_filename(name): + path = pathlib.PurePosixPath(name) + if path.is_absolute() or ".." in path.parts: + raise EvilArchiveError("Illegal path in archive: " + name) + + +def validate_symlink(name, target): + # We might do this twice but that's fine. + validate_filename(name) + + allowed_parent_parts = len(pathlib.PurePosixPath(name).parts) - 1 + + target_path = pathlib.PurePosixPath(target) + if target_path.is_absolute(): + raise EvilArchiveError("Illegal symlink target in archive: " + target) + leading_parent_parts = 0 + for part in target_path.parts: + if part != "..": + break + leading_parent_parts += 1 + if leading_parent_parts > allowed_parent_parts: + raise EvilArchiveError("Illegal symlink target in archive: " + target) + if ".." in target_path.parts[leading_parent_parts:]: + raise EvilArchiveError("Illegal symlink target in archive: " + target) class EvilArchiveError(RuntimeError): diff --git a/tests/resources/illegal_symlink_absolute.tar b/tests/resources/illegal_symlink_absolute.tar new file mode 100644 index 0000000000000000000000000000000000000000..244169570ebc94291a9b8d8c77cc5749ae1a94f9 GIT binary patch literal 10240 zcmeIuI|_g>5QO19N=~5JJlOLD!9t6GM)3G*XQhpH|1?t!3(Ggw+O@sOpYdnROwLlv z{@&MgMKndzoHT?eUW271F7)I7-)(Cvi*quSVXhAfZ~Mb369NbzfB*srAbI@ literal 0 HcmV?d00001 diff --git a/tests/resources/illegal_symlink_dots.tar b/tests/resources/illegal_symlink_dots.tar new file mode 100644 index 0000000000000000000000000000000000000000..679545b96a34a2cef34d27f891b3ccf04ef6252c GIT binary patch literal 10240 zcmeIzK?;K~5QX6!B`46NCYkf3LKp2qXf5>kby_N}EG|Y0{Rbh7Atrn;#@agb3R6kW zZW^NGupG6e$Pr`44DIYrg;A`KQu} z^M|Hs=f`S`e}4ZPMb2vxKmY**5I_I{1Q0*~0R#|0009ILKmY**5I_I{1Q0*~0R#|0 HU`K%`!!%BU literal 0 HcmV?d00001 diff --git a/tests/resources/legal_symlink_dots.tar b/tests/resources/legal_symlink_dots.tar new file mode 100644 index 0000000000000000000000000000000000000000..7b8813935d829dc0035edcd2b73b612929dca831 GIT binary patch literal 10240 zcmeIyK@Ng25QX76N>2bSOzn9pi3?pcQA|9(EpA+)EQXl)Pe{6$K%4Ik&AphHua#ny zx`Y({``TKqBaI8-D7qy!y+n+#k&+par}! ze?3I<-(zlC{*&a4w$v@Rxzq$XH{8fh_Zs@xH^%$P- z&+mVu$oVV+2q1s}0tg_000IagfB*srAb?rUA<5x}T literal 0 HcmV?d00001 diff --git a/tests/test_curl_plugin.py b/tests/test_curl_plugin.py index 8ab476f..bad894f 100644 --- a/tests/test_curl_plugin.py +++ b/tests/test_curl_plugin.py @@ -104,6 +104,21 @@ class CurlPluginTest(shared.PeruTest): with self.assertRaises(curl_plugin.EvilArchiveError): curl_plugin.extract_tar(str(tar_archive), dest) + def test_evil_symlink_archives(self): + """Even worse than archives containing bad paths, an archive could + contain a *symlink* pointing to a bad path. Then a subsequent entry in + the *same* archive could write through the symlink.""" + dest = shared.create_dir() + for case in ["illegal_symlink_dots", "illegal_symlink_absolute"]: + tar_archive = shared.test_resources / (case + ".tar") + with self.assertRaises(curl_plugin.EvilArchiveError): + curl_plugin.extract_tar(str(tar_archive), dest) + # But leading dots should be allowed in symlinks, as long as they don't + # escape the root of the archive. + for case in ["legal_symlink_dots"]: + tar_archive = shared.test_resources / (case + ".tar") + curl_plugin.extract_tar(str(tar_archive), dest) + def test_request_has_user_agent_header(self): actual = curl_plugin.build_request("http://example.test") self.assertTrue(actual.has_header("User-agent"))