From e731fac3db61c3eb18b9d66e64cec4e622f561eb Mon Sep 17 00:00:00 2001
From: Nigel Tao <nigeltao@golang.org>
Date: Wed, 6 May 2020 13:23:14 +1000
Subject: [PATCH] [wuffs] Skip dupe files when building seed corpus (#3763)

Updates google/oss-fuzz#22035
---
 projects/wuffs/build.sh | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/projects/wuffs/build.sh b/projects/wuffs/build.sh
index 6177a3b75..d51af38cf 100755
--- a/projects/wuffs/build.sh
+++ b/projects/wuffs/build.sh
@@ -32,8 +32,24 @@ for f in fuzz/c/std/*_fuzzer.c; do
   # Make the optional "gzip_fuzzer_seed_corpus.zip" archive. This means
   # extracting the "foo/bar/*.gz" out of the matching "gzip: foo/bar/*.gz"
   # lines in fuzz/c/std/seed_corpora.txt.
+  #
+  # The seed_corpora.txt lines can contain multiple entries, combining
+  # independent corpora. A naive "zip --junk-paths" of all those files can fail
+  # if there are duplicate file names, which can easily happen if the file name
+  # is a hash of its contents and the contents are a (trivial) minimal
+  # reproducer. We use a de-duplication step of copying all of those files into
+  # a single directory. Doing that in a single "cp" or "mv" call can fail with
+  # "will not overwrite just-created 'foo/etc' with 'bar/etc'", so we make
+  # multiple calls, each copying one file at a time. Later duplicates overwrite
+  # earlier duplicates. It's OK if the contents aren't identical. The result is
+  # still a valid uber-corpus of seed files.
   seeds=$(sed -n -e "/^$b:/s/^$b: *//p" fuzz/c/std/seed_corpora.txt)
   if [ -n "$seeds" ]; then
-    zip --junk-paths $OUT/${b}_fuzzer_seed_corpus.zip $seeds
+    mkdir ${b}_fuzzer_seed_corpus
+    for s in $seeds; do
+      cp $s ${b}_fuzzer_seed_corpus
+    done
+    zip --junk-paths --recurse-paths $OUT/${b}_fuzzer_seed_corpus.zip ${b}_fuzzer_seed_corpus
+    rm -rf ${b}_fuzzer_seed_corpus
   fi
 done