[wuffs] Skip dupe files when building seed corpus (#3763)

Updates google/oss-fuzz#22035
This commit is contained in:
Nigel Tao 2020-05-06 13:23:14 +10:00 committed by GitHub
parent b16fcfc903
commit e731fac3db
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 17 additions and 1 deletions

View File

@ -32,8 +32,24 @@ for f in fuzz/c/std/*_fuzzer.c; do
# Make the optional "gzip_fuzzer_seed_corpus.zip" archive. This means
# extracting the "foo/bar/*.gz" out of the matching "gzip: foo/bar/*.gz"
# lines in fuzz/c/std/seed_corpora.txt.
#
# The seed_corpora.txt lines can contain multiple entries, combining
# independent corpora. A naive "zip --junk-paths" of all those files can fail
# if there are duplicate file names, which can easily happen if the file name
# is a hash of its contents and the contents are a (trivial) minimal
# reproducer. We use a de-duplication step of copying all of those files into
# a single directory. Doing that in a single "cp" or "mv" call can fail with
# "will not overwrite just-created 'foo/etc' with 'bar/etc'", so we make
# multiple calls, each copying one file at a time. Later duplicates overwrite
# earlier duplicates. It's OK if the contents aren't identical. The result is
# still a valid uber-corpus of seed files.
seeds=$(sed -n -e "/^$b:/s/^$b: *//p" fuzz/c/std/seed_corpora.txt)
if [ -n "$seeds" ]; then
zip --junk-paths $OUT/${b}_fuzzer_seed_corpus.zip $seeds
mkdir ${b}_fuzzer_seed_corpus
for s in $seeds; do
cp $s ${b}_fuzzer_seed_corpus
done
zip --junk-paths --recurse-paths $OUT/${b}_fuzzer_seed_corpus.zip ${b}_fuzzer_seed_corpus
rm -rf ${b}_fuzzer_seed_corpus
fi
done