From c8d2319aa8cb3ae8f3eaa6a7e6ddf95e364d5fe5 Mon Sep 17 00:00:00 2001 From: jonathanmetzman <31354670+jonathanmetzman@users.noreply.github.com> Date: Thu, 3 Dec 2020 12:52:34 -0800 Subject: [PATCH] [test_all] Rewrite in Python (#4769) Rewrite test_all in python. Bash is quite annoying to write and test. One issue with bash is it is even worse than Python for parallelism (which may be causing #4707). Rewrite test_all in python and optimize base-runner/Dockerfile for fast development. Also, combine some docker layers. --- infra/base-images/base-runner/Dockerfile | 49 ++--- infra/base-images/base-runner/README.md | 2 +- infra/base-images/base-runner/test_all | 147 --------------- infra/base-images/base-runner/test_all.py | 206 ++++++++++++++++++++++ infra/cifuzz/cifuzz.py | 2 +- infra/helper.py | 11 +- 6 files changed, 241 insertions(+), 176 deletions(-) delete mode 100755 infra/base-images/base-runner/test_all create mode 100755 infra/base-images/base-runner/test_all.py diff --git a/infra/base-images/base-runner/Dockerfile b/infra/base-images/base-runner/Dockerfile index 7fb0bb4db..f0a264fa7 100644 --- a/infra/base-images/base-runner/Dockerfile +++ b/infra/base-images/base-runner/Dockerfile @@ -20,12 +20,14 @@ FROM gcr.io/oss-fuzz-base/base-clang AS base-clang FROM gcr.io/oss-fuzz-base/base-image # Copy the binaries needed for code coverage and crash symbolization. -COPY --from=base-clang /usr/local/bin/llvm-cov /usr/local/bin/ -COPY --from=base-clang /usr/local/bin/llvm-profdata /usr/local/bin/ -COPY --from=base-clang /usr/local/bin/llvm-symbolizer /usr/local/bin/ +COPY --from=base-clang /usr/local/bin/llvm-cov \ + /usr/local/bin/llvm-profdata \ + /usr/local/bin/llvm-symbolizer \ + /usr/local/bin/ -RUN apt-get update -RUN apt-get install -y \ +# TODO(metzman): Install libc6-i386 lib32gcc1 instead of libc6-dev-i386 for +# consistency with ClusterFuzz image and to reduce size. +RUN apt-get update && apt-get install -y \ binutils \ file \ fonts-dejavu \ @@ -37,24 +39,8 @@ RUN apt-get install -y \ wget \ zip -RUN git clone https://chromium.googlesource.com/chromium/src/tools/code_coverage /opt/code_coverage -RUN pip3 install -r /opt/code_coverage/requirements.txt - -COPY bad_build_check \ - collect_dft \ - coverage \ - coverage_helper \ - dataflow_tracer.py \ - download_corpus \ - minijail0 \ - reproduce \ - run_fuzzer \ - run_minijail \ - parse_options.py \ - targets_list \ - test_all \ - test_one \ - /usr/local/bin/ +RUN git clone https://chromium.googlesource.com/chromium/src/tools/code_coverage /opt/code_coverage && \ + pip3 install -r /opt/code_coverage/requirements.txt # Default environment options for various sanitizers. # Note that these match the settings used in ClusterFuzz and @@ -81,3 +67,20 @@ ENV PATH $PATH:/root/.go/bin:$GOPATH/bin # Set up Golang coverage modules. RUN go get github.com/google/oss-fuzz/infra/go/coverage/... + +# Do this last to make developing these files easier/faster due to caching. +COPY bad_build_check \ + collect_dft \ + coverage \ + coverage_helper \ + dataflow_tracer.py \ + download_corpus \ + minijail0 \ + reproduce \ + run_fuzzer \ + run_minijail \ + parse_options.py \ + targets_list \ + test_all.py \ + test_one \ + /usr/local/bin/ diff --git a/infra/base-images/base-runner/README.md b/infra/base-images/base-runner/README.md index 94f793677..734d5877e 100644 --- a/infra/base-images/base-runner/README.md +++ b/infra/base-images/base-runner/README.md @@ -11,7 +11,7 @@ docker run -ti gcr.io/oss-fuzz-base/base-runner |---------|-------------| | `reproduce ` | build all fuzz targets and run specified one with testcase `/testcase` and given options. | `run_fuzzer ` | runs specified fuzzer combining options with `.options` file | -| `test_all` | runs every binary in `/out` as a fuzzer for a while to ensure it works. | +| `test_all.py` | runs every binary in `/out` as a fuzzer for a while to ensure it works. | # Examples diff --git a/infra/base-images/base-runner/test_all b/infra/base-images/base-runner/test_all deleted file mode 100755 index a4fb58cd2..000000000 --- a/infra/base-images/base-runner/test_all +++ /dev/null @@ -1,147 +0,0 @@ -#!/bin/bash -u -# Copyright 2016 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -################################################################################ - -# Percentage threshold that needs to be reached for marking a build as failed. -ALLOWED_BROKEN_TARGETS_PERCENTAGE=${ALLOWED_BROKEN_TARGETS_PERCENTAGE:-10} - -# Test all fuzz targets in the $OUT/ dir. -TOTAL_TARGETS_COUNT=0 - -# Number of CPUs available, this is needed for running tests in parallel. -NPROC=$(nproc) - -# Directories where bad build check results will be written to. -VALID_TARGETS_DIR="/tmp/valid_fuzz_targets" -BROKEN_TARGETS_DIR="/tmp/broken_fuzz_targets" -rm -rf $VALID_TARGETS_DIR -rm -rf $BROKEN_TARGETS_DIR -mkdir $VALID_TARGETS_DIR -mkdir $BROKEN_TARGETS_DIR - -# Move the directory the fuzzer is located in to somewhere that doesn't exist -# on the builder to make it more likely that hardcoding /out fails here (since -# it will fail on ClusterFuzz). -TMP_FUZZER_DIR=/tmp/not-out -rm -rf $TMP_FUZZER_DIR -mkdir $TMP_FUZZER_DIR -# Move contents of $OUT/ into $TMP_FUZZER_DIR. We can't move the directory -# itself because it is a mount. -mv $OUT/* $TMP_FUZZER_DIR -INITIAL_OUT=$OUT -export OUT=$TMP_FUZZER_DIR - - -# Main loop that iterates through all fuzz targets and runs the check. -for FUZZER_BINARY in $(find $TMP_FUZZER_DIR -maxdepth 1 -executable -type f); do - if [ "$FUZZING_LANGUAGE" != "python" ]; then - if file "$FUZZER_BINARY" | grep -v ELF > /dev/null 2>&1; then - continue - fi - fi - - # Continue if not a fuzz target. - if [[ $FUZZING_ENGINE != "none" ]]; then - grep "LLVMFuzzerTestOneInput" $FUZZER_BINARY > /dev/null 2>&1 || continue - fi - - FUZZER=$(basename $FUZZER_BINARY) - if [[ "$FUZZER" == afl-* ]]; then - continue - fi - - echo "INFO: performing bad build checks for $FUZZER_BINARY." - - LOG_PATH_FOR_BROKEN_TARGET="${BROKEN_TARGETS_DIR}/${FUZZER}" - - # Launch bad build check process in the background. Ignore the exit codes, as - # we check the percentage of broken fuzz targets after running all the checks. - bad_build_check $FUZZER_BINARY &> $LOG_PATH_FOR_BROKEN_TARGET & - - # Count total number of fuzz targets being tested. - TOTAL_TARGETS_COUNT=$[$TOTAL_TARGETS_COUNT+1] - - # Do not spawn more processes than the number of CPUs available. - n_child_proc=$(jobs -rp | wc -l) - while [ "$n_child_proc" -eq "$NPROC" ]; do - sleep 4 - n_child_proc=$(jobs -rp | wc -l) - done -done - -# Wait for background processes to finish. -wait - -# Restore OUT -export OUT=$INITIAL_OUT -mv $TMP_FUZZER_DIR/* $OUT - -# Sanity check in case there are no fuzz targets in the $OUT/ dir. -if [ "$TOTAL_TARGETS_COUNT" -eq "0" ]; then - echo "ERROR: no fuzzers found in $OUT/" - ls -al $OUT - exit 1 -fi - -# An empty log file indicated that corresponding fuzz target is not broken. -find $BROKEN_TARGETS_DIR -empty -exec mv {} $VALID_TARGETS_DIR \; - -# Calculate number of valid and broken fuzz targets. -VALID_TARGETS_COUNT=$(ls $VALID_TARGETS_DIR | wc -l) -BROKEN_TARGETS_COUNT=$(ls $BROKEN_TARGETS_DIR | wc -l) - -# Sanity check to make sure that bad build check doesn't skip any fuzz target. -if [ "$TOTAL_TARGETS_COUNT" -ne "$[$VALID_TARGETS_COUNT+$BROKEN_TARGETS_COUNT]" ]; then - echo "ERROR: bad_build_check seems to have a bug, total number of fuzz" \ - "does not match number of fuzz targets tested." - echo "Total fuzz targets ($TOTAL_TARGETS_COUNT):" - ls -al $OUT - echo "Valid fuzz targets ($VALID_TARGETS_COUNT):" - ls -al $VALID_TARGETS_DIR - echo "Total fuzz targets ($BROKEN_TARGETS_COUNT):" - ls -al $BROKEN_TARGETS_DIR - exit 1 -fi - -# Build info about all broken fuzz targets (if any). -if [ "$BROKEN_TARGETS_COUNT" -gt "0" ]; then - echo "Broken fuzz targets ($BROKEN_TARGETS_COUNT):" - for target in $(ls $BROKEN_TARGETS_DIR); do - echo "${target}:" - cat ${BROKEN_TARGETS_DIR}/${target} - done -fi - -# Calculate the percentage of broken fuzz targets and make the finel decision. -BROKEN_TARGETS_PERCENTAGE=$[$BROKEN_TARGETS_COUNT*100/$TOTAL_TARGETS_COUNT] - - -if [ "$BROKEN_TARGETS_PERCENTAGE" -gt "$ALLOWED_BROKEN_TARGETS_PERCENTAGE" ]; then - echo "ERROR: $BROKEN_TARGETS_PERCENTAGE% of fuzz targets seem to be broken." \ - "See the list above for a detailed information." - - # TODO: figure out how to not fail the "special" cases handled below. Those - # are from "example" and "c-ares" projects and are too small targets to pass. - if [ "$(ls $OUT/do_stuff_fuzzer $OUT/ares_*_fuzzer $OUT/checksum_fuzzer $OUT/fuzz_dump $OUT/fuzz_keyring $OUT/xmltest $OUT/fuzz_compression_sas_rle 2>/dev/null | wc -l)" -gt "0" ]; then - exit 0 - fi - - exit 1 -else - echo "$TOTAL_TARGETS_COUNT fuzzers total, $BROKEN_TARGETS_COUNT seem to be" \ - "broken ($BROKEN_TARGETS_PERCENTAGE%)." - exit 0 -fi diff --git a/infra/base-images/base-runner/test_all.py b/infra/base-images/base-runner/test_all.py new file mode 100755 index 000000000..19f4918b8 --- /dev/null +++ b/infra/base-images/base-runner/test_all.py @@ -0,0 +1,206 @@ +#!/usr/bin/env python3 +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ +"""Does bad_build_check on all fuzz targets in $OUT.""" + +import contextlib +import multiprocessing +import os +import re +import shutil +import subprocess +import stat +import sys + +TMP_FUZZER_DIR = '/tmp/not-out' + +EXECUTABLE = stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH + +IGNORED_TARGETS = [ + r'do_stuff_fuzzer', r'checksum_fuzzer', r'fuzz_dump', r'fuzz_keyring', + r'xmltest', r'fuzz_compression_sas_rle', r'ares_*_fuzzer' +] + +IGNORED_TARGETS_RE = re.compile('^' + r'$|^'.join(IGNORED_TARGETS) + '$') + + +def recreate_directory(directory): + """Creates |directory|. If it already exists than deletes it first before + creating.""" + if os.path.exists(directory): + shutil.rmtree(directory) + os.mkdir(directory) + + +def move_directory_contents(src_directory, dst_directory): + """Moves contents of |src_directory| to |dst_directory|.""" + src_files = os.listdir(src_directory) + for filename in src_files: + src_path = os.path.join(src_directory, filename) + shutil.move(src_path, dst_directory) + + +def is_elf(filepath): + """Returns True if |filepath| is an ELF file.""" + result = subprocess.run(['file', filepath], + stdout=subprocess.PIPE, + check=False) + return b'ELF' in result.stdout + + +def find_fuzz_targets(directory, fuzzing_language): + """Returns paths to fuzz targets in |directory|.""" + # TODO(https://github.com/google/oss-fuzz/issues/4585): Use libClusterFuzz for + # this. + fuzz_targets = [] + for filename in os.listdir(directory): + path = os.path.join(directory, filename) + if filename.startswith('afl-'): + continue + if not os.path.isfile(path): + continue + if not os.stat(path).st_mode & EXECUTABLE: + continue + with open(path, 'rb') as file_handle: + binary_contents = file_handle.read() + if b'LLVMFuzzerTestOneInput' not in binary_contents: + continue + if fuzzing_language != 'python' and not is_elf(path): + continue + fuzz_targets.append(path) + return fuzz_targets + + +def do_bad_build_check(fuzz_target): + """Runs bad_build_check on |fuzz_target|. Returns a + Subprocess.ProcessResult.""" + print('INFO: performing bad build checks for', fuzz_target) + command = ['bad_build_check', fuzz_target] + return subprocess.run(command, + stderr=subprocess.PIPE, + stdout=subprocess.PIPE, + check=False) + + +def get_broken_fuzz_targets(bad_build_results, fuzz_targets): + """Returns a list of broken fuzz targets and their process results in + |fuzz_targets| where each item in |bad_build_results| is the result of + bad_build_check on the corresponding element in |fuzz_targets|.""" + broken = [] + for result, fuzz_target in zip(bad_build_results, fuzz_targets): + if result.returncode != 0: + broken.append((fuzz_target, result)) + return broken + + +def has_ignored_targets(out_dir): + """Returns True if |out_dir| has any fuzz targets we are supposed to ignore + bad build checks of.""" + out_files = set(os.listdir(out_dir)) + for filename in out_files: + if re.match(IGNORED_TARGETS_RE, filename): + return True + return False + + +@contextlib.contextmanager +def use_different_out_dir(): + """Context manager that moves OUT to TMP_FUZZER_DIR. This is useful for + catching hardcoding. Note that this sets the environment variable OUT and + therefore must be run before multiprocessing.Pool is created. Resets OUT at + the end.""" + # Use a fake OUT directory to catch path hardcoding that breaks on + # ClusterFuzz. + out = os.getenv('OUT') + initial_out = out + recreate_directory(TMP_FUZZER_DIR) + out = TMP_FUZZER_DIR + # Set this so that run_fuzzer which is called by bad_build_check works + # properly. + os.environ['OUT'] = out + # We move the contents of the directory because we can't move the + # directory itself because it is a mount. + move_directory_contents(initial_out, out) + try: + yield out + finally: + move_directory_contents(out, initial_out) + shutil.rmtree(out) + os.environ['OUT'] = initial_out + + +def test_all_outside_out(fuzzing_language, allowed_broken_targets_percentage): + """Wrapper around test_all that changes OUT and returns the result.""" + with use_different_out_dir() as out: + return test_all(out, fuzzing_language, allowed_broken_targets_percentage) + + +def test_all(out, fuzzing_language, allowed_broken_targets_percentage): + """Do bad_build_check on all fuzz targets.""" + # TODO(metzman): Refactor so that we can convert test_one to python. + fuzz_targets = find_fuzz_targets(out, fuzzing_language) + pool = multiprocessing.Pool() + bad_build_results = pool.map(do_bad_build_check, fuzz_targets) + broken_targets = get_broken_fuzz_targets(bad_build_results, fuzz_targets) + broken_targets_count = len(broken_targets) + if not broken_targets_count: + return True + + print('Broken fuzz targets', broken_targets_count) + total_targets_count = len(fuzz_targets) + broken_targets_percentage = 100 * broken_targets_count / total_targets_count + for broken_target, result in broken_targets: + print(broken_target) + # Use write because we can't print binary strings. + sys.stdout.buffer.write(result.stdout + result.stderr + b'\n') + + if broken_targets_percentage > allowed_broken_targets_percentage: + print('ERROR: {broken_targets_percentage}% of fuzz targets seem to be ' + 'broken. See the list above for a detailed information.'.format( + broken_targets_percentage=broken_targets_percentage)) + if has_ignored_targets(out): + print('Build check automatically passing because of ignored targets.') + return True + return False + print('{total_targets_count} fuzzers total, {broken_targets_count} ' + 'seem to be broken ({broken_targets_percentage}%).'.format( + total_targets_count=total_targets_count, + broken_targets_count=broken_targets_count, + broken_targets_percentage=broken_targets_percentage)) + return True + + +def get_allowed_broken_targets_percentage(): + """Returns the value of the environment value + 'ALLOWED_BROKEN_TARGETS_PERCENTAGE' as an int or returns a reasonable + default.""" + return int(os.getenv('ALLOWED_BROKEN_TARGETS_PERCENTAGE', '10')) + + +def main(): + """Does bad_build_check on all fuzz targets in parallel. Returns 0 on success. + Returns 1 on failure.""" + # Set these environment variables here so that stdout + fuzzing_language = os.getenv('FUZZING_LANGUAGE') + allowed_broken_targets_percentage = get_allowed_broken_targets_percentage() + if not test_all_outside_out(fuzzing_language, + allowed_broken_targets_percentage): + return 1 + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/infra/cifuzz/cifuzz.py b/infra/cifuzz/cifuzz.py index fdb6207b5..479a4e07c 100644 --- a/infra/cifuzz/cifuzz.py +++ b/infra/cifuzz/cifuzz.py @@ -314,7 +314,7 @@ def check_fuzzer_build(out_dir, sanitizer='address'): command += ['-e', 'OUT=' + out_dir, '--volumes-from', container] else: command += ['-v', '%s:/out' % out_dir] - command.extend(['-t', 'gcr.io/oss-fuzz-base/base-runner', 'test_all']) + command.extend(['-t', 'gcr.io/oss-fuzz-base/base-runner', 'test_all.py']) exit_code = helper.docker_run(command) if exit_code: logging.error('Check fuzzer build failed.') diff --git a/infra/helper.py b/infra/helper.py index 6c02032c7..e0dee6f9e 100755 --- a/infra/helper.py +++ b/infra/helper.py @@ -277,7 +277,8 @@ def get_dockerfile_path(project_name): def _get_corpus_dir(project_name=''): - """Creates and returns path to /corpus directory for the given project (if specified).""" + """Creates and returns path to /corpus directory for the given project (if + specified).""" directory = os.path.join(BUILD_DIR, 'corpus', project_name) os.makedirs(directory, exist_ok=True) @@ -285,7 +286,8 @@ def _get_corpus_dir(project_name=''): def _get_output_dir(project_name=''): - """Creates and returns path to /out directory for the given project (if specified).""" + """Creates and returns path to /out directory for the given project (if + specified).""" directory = os.path.join(BUILD_DIR, 'out', project_name) os.makedirs(directory, exist_ok=True) @@ -293,7 +295,8 @@ def _get_output_dir(project_name=''): def _get_work_dir(project_name=''): - """Creates and returns path to /work directory for the given project (if specified).""" + """Creates and returns path to /work directory for the given project (if + specified).""" directory = os.path.join(BUILD_DIR, 'work', project_name) os.makedirs(directory, exist_ok=True) @@ -627,7 +630,7 @@ def check_build(args): if args.fuzzer_name: run_args += ['test_one', os.path.join('/out', args.fuzzer_name)] else: - run_args.append('test_all') + run_args.append('test_all.py') exit_code = docker_run(run_args) if exit_code == 0: