oss-fuzz/infra/base-images/base-runner/coverage

#!/bin/bash -u
# Copyright 2018 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
################################################################################
cd $OUT

if (( $# > 0 )); then
  FUZZ_TARGETS="$@"
else
  FUZZ_TARGETS="$(find . -maxdepth 1 -type f -executable -printf '%P\n' | \
      grep -v -x -F \
      -e 'llvm-symbolizer' \
      -e 'jazzer_agent_deploy.jar' \
      -e 'jazzer_driver' \
      -e 'jazzer_driver_with_sanitizer')"
fi

COVERAGE_OUTPUT_DIR=${COVERAGE_OUTPUT_DIR:-$OUT}

DUMPS_DIR="$COVERAGE_OUTPUT_DIR/dumps"
FUZZER_STATS_DIR="$COVERAGE_OUTPUT_DIR/fuzzer_stats"
LOGS_DIR="$COVERAGE_OUTPUT_DIR/logs"
REPORT_ROOT_DIR="$COVERAGE_OUTPUT_DIR/report"
REPORT_PLATFORM_DIR="$COVERAGE_OUTPUT_DIR/report/linux"

for directory in $DUMPS_DIR $FUZZER_STATS_DIR $LOGS_DIR $REPORT_ROOT_DIR \
                 $REPORT_PLATFORM_DIR; do
  rm -rf $directory
  mkdir -p $directory
done

PROFILE_FILE="$DUMPS_DIR/merged.profdata"
SUMMARY_FILE="$REPORT_PLATFORM_DIR/summary.json"

# Use path mapping, as $SRC directory from the builder is copied into $OUT/$SRC.
PATH_EQUIVALENCE_ARGS="-path-equivalence=/,$OUT"

# It's important to use $COVERAGE_EXTRA_ARGS as the last argument, because it
# can contain paths to source files / directories which are positional args.
LLVM_COV_COMMON_ARGS="$PATH_EQUIVALENCE_ARGS \
    -ignore-filename-regex=.*src/libfuzzer/.* $COVERAGE_EXTRA_ARGS"

# Timeout for running a single fuzz target.
TIMEOUT=1h

# This will be used by llvm-cov command to generate the actual report.
objects=""

# Number of CPUs available, this is needed for running tests in parallel.
NPROC=$(nproc)

CORPUS_DIR=${CORPUS_DIR:-"/corpus"}

function run_fuzz_target {
  local target=$1

  # '%1m' will produce separate dump files for every object. For example, if a
  # fuzz target loads a shared library, we will have dumps for both of them.
  local profraw_file="$DUMPS_DIR/$target.%1m.profraw"
  local profraw_file_mask="$DUMPS_DIR/$target.*.profraw"
  local profdata_file="$DUMPS_DIR/$target.profdata"
  local corpus_real="$CORPUS_DIR/${target}"

  # -merge=1 requires an output directory, create a new, empty dir for that.
  local corpus_dummy="$OUT/dummy_corpus_dir_for_${target}"
  rm -rf $corpus_dummy && mkdir -p $corpus_dummy

  # Use -merge=1 instead of -runs=0 because merge is crash resistant and would
  # let to get coverage using all corpus files even if there are crash inputs.
  # Merge should not introduce any significant overhead compared to -runs=0,
  # because (A) corpuses are already minimized; (B) we do not use sancov, and so
  # libFuzzer always finishes merge with an empty output dir.
  # Use 100s timeout instead of 25s as code coverage builds can be very slow.
  local args="-merge=1 -timeout=100 $corpus_dummy $corpus_real"

  export LLVM_PROFILE_FILE=$profraw_file
  timeout $TIMEOUT $OUT/$target $args &> $LOGS_DIR/$target.log
  if (( $? != 0 )); then
    echo "Error occured while running $target:"
    cat $LOGS_DIR/$target.log
  fi

  rm -rf $corpus_dummy

  if (( $(du -c $profraw_file_mask | tail -n 1 | cut -f 1) == 0 )); then
    # Skip fuzz targets that failed to produce profile dumps.
    return 0
  fi

  llvm-profdata merge -j=1 -sparse $profraw_file_mask -o $profdata_file

  # Delete unnecessary and (potentially) large .profraw files.
  rm $profraw_file_mask

  shared_libraries=$(coverage_helper shared_libs -build-dir=$OUT -object=$target)

  llvm-cov export -summary-only -instr-profile=$profdata_file -object=$target \
      $shared_libraries $LLVM_COV_COMMON_ARGS > $FUZZER_STATS_DIR/$target.json

  if [ -n "${FULL_SUMMARY_PER_TARGET-}" ]; then
    # This is needed for dataflow strategy analysis, can be removed later. See
    # - https://github.com/google/oss-fuzz/pull/3306
    # - https://github.com/google/oss-fuzz/issues/1632
    # Intentionally writing these to the logs dir in order to hide the dumps
    # from the ClusterFuzz cron job.
    llvm-cov export -instr-profile=$profdata_file -object=$target \
      $shared_libraries $LLVM_COV_COMMON_ARGS > $LOGS_DIR/$target.json
  fi
}

function run_go_fuzz_target {
  local target=$1

  echo "Running go target $target"
  export FUZZ_CORPUS_DIR="$CORPUS_DIR/${target}/"
  export FUZZ_PROFILE_NAME="$DUMPS_DIR/$target.perf"
  $OUT/$target -test.coverprofile $DUMPS_DIR/$target.profdata &> $LOGS_DIR/$target.log
  # translate from golangish paths to current absolute paths
  cat $OUT/$target.gocovpath | while read i; do sed -i $i $DUMPS_DIR/$target.profdata; done
  # cf PATH_EQUIVALENCE_ARGS
  sed -i 's=/='$OUT'/=' $DUMPS_DIR/$target.profdata
  $SYSGOPATH/bin/gocovsum $DUMPS_DIR/$target.profdata > $FUZZER_STATS_DIR/$target.json
}

function run_java_fuzz_target {
  local target=$1

  local exec_file="$DUMPS_DIR/$target.exec"
  local class_dump_dir="$DUMPS_DIR/${target}_classes/"
  mkdir "$class_dump_dir"
  local corpus_real="$CORPUS_DIR/${target}"

  # -merge=1 requires an output directory, create a new, empty dir for that.
  local corpus_dummy="$OUT/dummy_corpus_dir_for_${target}"
  rm -rf $corpus_dummy && mkdir -p $corpus_dummy

  # Use 100s timeout instead of 25s as code coverage builds can be very slow.
  local jacoco_args="destfile=$exec_file,classdumpdir=$class_dump_dir,excludes=com.code_intelligence.jazzer.*"
  local args="-merge=1 -timeout=100 --nohooks \
      --additional_jvm_args=-javaagent:/opt/jacoco-agent.jar=$jacoco_args \
      $corpus_dummy $corpus_real"

  timeout $TIMEOUT $OUT/$target $args &> $LOGS_DIR/$target.log
  if (( $? != 0 )); then
    echo "Error occured while running $target:"
    cat $LOGS_DIR/$target.log
  fi

  if (( $(du -c $exec_file | tail -n 1 | cut -f 1) == 0 )); then
    # Skip fuzz targets that failed to produce .exec files.
    return 0
  fi

  # Generate XML report only as input to jacoco_report_converter.
  # Source files are not needed for the summary.
  local xml_report="$DUMPS_DIR/${target}.xml"
  local summary_file="$FUZZER_STATS_DIR/$target.json"
  java -jar /opt/jacoco-cli.jar report $exec_file \
      --xml $xml_report \
      --classfiles $class_dump_dir

  # Write llvm-cov summary file.
  jacoco_report_converter.py $xml_report $summary_file
}

export SYSGOPATH=$GOPATH
export GOPATH=$OUT/$GOPATH
# Run each fuzz target, generate raw coverage dumps.
for fuzz_target in $FUZZ_TARGETS; do
  # Test if fuzz target is a golang one.
  if [[ $FUZZING_LANGUAGE == "go" ]]; then
    # Continue if not a fuzz target.
    if [[ $FUZZING_ENGINE != "none" ]]; then
      grep "FUZZ_CORPUS_DIR" $fuzz_target > /dev/null 2>&1 || continue
    fi
    run_go_fuzz_target $fuzz_target &
  elif [[ $FUZZING_LANGUAGE == "jvm" ]]; then
    # Continue if not a fuzz target.
    if [[ $FUZZING_ENGINE != "none" ]]; then
      grep "LLVMFuzzerTestOneInput" $fuzz_target > /dev/null 2>&1 || continue
    fi

    echo "Running $fuzz_target"
    run_java_fuzz_target $fuzz_target &
  else
    # Continue if not a fuzz target.
    if [[ $FUZZING_ENGINE != "none" ]]; then
      grep "LLVMFuzzerTestOneInput" $fuzz_target > /dev/null 2>&1 || continue
    fi

    echo "Running $fuzz_target"
    run_fuzz_target $fuzz_target &

    if [[ -z $objects ]]; then
      # The first object needs to be passed without -object= flag.
      objects="$fuzz_target"
    else
      objects="$objects -object=$fuzz_target"
    fi
  fi

  # Do not spawn more processes than the number of CPUs available.
  n_child_proc=$(jobs -rp | wc -l)
  while [ "$n_child_proc" -eq "$NPROC" ]; do
    sleep 4
    n_child_proc=$(jobs -rp | wc -l)
  done
done

# Wait for background processes to finish.
wait

if [[ $FUZZING_LANGUAGE == "go" ]]; then
  $SYSGOPATH/bin/gocovmerge $DUMPS_DIR/*.profdata > fuzz.cov
  go tool cover -html=fuzz.cov -o $REPORT_ROOT_DIR/index.html
  $SYSGOPATH/bin/gocovsum fuzz.cov > $SUMMARY_FILE
  cp $REPORT_ROOT_DIR/index.html $REPORT_PLATFORM_DIR/index.html
  $SYSGOPATH/bin/pprof-merge $DUMPS_DIR/*.perf.cpu.prof
  mv merged.data $REPORT_ROOT_DIR/cpu.prof
  $SYSGOPATH/bin/pprof-merge $DUMPS_DIR/*.perf.heap.prof
  mv merged.data $REPORT_ROOT_DIR/heap.prof
  #TODO some proxy for go tool pprof -http=127.0.0.1:8001 $DUMPS_DIR/cpu.prof
  echo "Finished generating code coverage report for Go fuzz targets."
elif [[ $FUZZING_LANGUAGE == "jvm" ]]; then

  # From this point on the script does not tolerate any errors.
  set -e

  # Merge .exec files from the individual targets.
  jacoco_merged_exec=$DUMPS_DIR/jacoco.merged.exec
  java -jar /opt/jacoco-cli.jar merge $DUMPS_DIR/*.exec \
      --destfile $jacoco_merged_exec

  # Merge .class files from the individual targets.
  classes_dir=$DUMPS_DIR/classes
  mkdir $classes_dir
  for fuzz_target in $FUZZ_TARGETS; do
    cp -r $DUMPS_DIR/${fuzz_target}_classes/* $classes_dir/
  done

  # Heuristically determine source directories based on Maven structure.
  # Always include the $SRC root as it likely contains the fuzzer sources.
  sourcefiles_args=(--sourcefiles $OUT/$SRC)
  source_dirs=$(find $OUT/$SRC -type d -name 'java')
  for source_dir in $source_dirs; do
    sourcefiles_args+=(--sourcefiles "$source_dir")
  done

  # Generate HTML and XML reports.
  xml_report=$REPORT_PLATFORM_DIR/index.xml
  java -jar /opt/jacoco-cli.jar report $jacoco_merged_exec \
      --html $REPORT_PLATFORM_DIR \
      --xml $xml_report \
      --classfiles $classes_dir \
      "${sourcefiles_args[@]}"

  # Write llvm-cov summary file.
  jacoco_report_converter.py $xml_report $SUMMARY_FILE

  set +e
else

  # From this point on the script does not tolerate any errors.
  set -e

  # Merge all dumps from the individual targets.
  rm -f $PROFILE_FILE
  llvm-profdata merge -sparse $DUMPS_DIR/*.profdata -o $PROFILE_FILE

  # TODO(mmoroz): add script from Chromium for rendering directory view reports.
  # The first path in $objects does not have -object= prefix (llvm-cov format).
  shared_libraries=$(coverage_helper shared_libs -build-dir=$OUT -object=$objects)
  objects="$objects $shared_libraries"

  # It's important to use $LLVM_COV_COMMON_ARGS as the last argument due to
  # positional arguments (SOURCES) that can be passed via $COVERAGE_EXTRA_ARGS.
  LLVM_COV_ARGS="-instr-profile=$PROFILE_FILE $objects $LLVM_COV_COMMON_ARGS"

  # Generate HTML report.
  llvm-cov show -format=html -output-dir=$REPORT_ROOT_DIR \
      -Xdemangler rcfilt $LLVM_COV_ARGS

  # Export coverage summary in JSON format.
  llvm-cov export -summary-only $LLVM_COV_ARGS > $SUMMARY_FILE

  # Post process HTML report.
  coverage_helper -v post_process -src-root-dir=/ -summary-file=$SUMMARY_FILE \
      -output-dir=$REPORT_ROOT_DIR $PATH_EQUIVALENCE_ARGS

fi

# Make sure report is readable.
chmod -R +r $REPORT_ROOT_DIR
find $REPORT_ROOT_DIR -type d -exec chmod +x {} +

if [[ -n $HTTP_PORT ]]; then
  # Serve the report locally.
  echo "Serving the report on http://127.0.0.1:$HTTP_PORT/linux/index.html"
  cd $REPORT_ROOT_DIR
  python3 -m http.server $HTTP_PORT
fi