From 2c05fb229fe9d7c915947ea2802f2b8bc111117d Mon Sep 17 00:00:00 2001 From: Leo Neat Date: Mon, 16 Dec 2019 16:48:49 -0800 Subject: [PATCH] [infra] Add support for Bisecting Git Commits Introducing Crashes(#3119) --- infra/bisector.py | 153 ++++++++++++++++++++++ infra/build_specified_commit.py | 41 +++--- infra/build_specified_commit_test.py | 65 +++------ infra/helper.py | 30 +++-- infra/{RepoManager.py => repo_manager.py} | 7 +- infra/repo_manager_test.py | 46 +++---- 6 files changed, 242 insertions(+), 100 deletions(-) create mode 100644 infra/bisector.py mode change 100644 => 100755 infra/build_specified_commit.py rename infra/{RepoManager.py => repo_manager.py} (96%) diff --git a/infra/bisector.py b/infra/bisector.py new file mode 100644 index 000000000..877110577 --- /dev/null +++ b/infra/bisector.py @@ -0,0 +1,153 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Uses bisection to determine which commit a bug was introduced and fixed. +This module takes a high and a low commit SHA, a repo name, and a bug. +The module bisects the high and low commit SHA searching for the location +where the bug was introduced. It also looks for where the bug was fixed. +This is done with the following steps: + + + NOTE: NEEDS TO BE RUN FROM THE OSS-Fuzz HOME directory + + Typical usage example: + python3 infra/bisector.py + --commit_old 1e403e9259a1abedf108ab86f711ba52c907226d + --commit_new f79be4f2330f4b89ea2f42e1c44ca998c59a0c0f + --fuzz_target rules_fuzzer + --project_name yara + --testcase infra/yara_testcase + --sanitizer address +""" + +import argparse +from dataclasses import dataclass +import os +import tempfile + +import build_specified_commit +import helper +import repo_manager + + +@dataclass +class BuildData(): + """List of data requried for bisection of errors in OSS-Fuzz projects. + + Attributes: + project_name: The name of the OSS-Fuzz project that is being checked + engine: The fuzzing engine to be used + sanitizer: The fuzzing sanitizer to be used + architecture: The system architecture being fuzzed + """ + project_name: str + engine: str + sanitizer: str + architecture: str + + +def main(): + """Finds the commit SHA where an error was initally introduced.""" + parser = argparse.ArgumentParser( + description='git bisection for finding introduction of bugs') + + parser.add_argument( + '--project_name', + help='The name of the project where the bug occured', + required=True) + parser.add_argument( + '--commit_new', + help='The newest commit SHA to be bisected', + required=True) + parser.add_argument( + '--commit_old', + help='The oldest commit SHA to be bisected', + required=True) + parser.add_argument( + '--fuzz_target', help='the name of the fuzzer to be built', required=True) + parser.add_argument( + '--testcase', help='the testcase to be reproduced', required=True) + parser.add_argument('--engine', default='libfuzzer') + parser.add_argument( + '--sanitizer', + default='address', + help='the default is "address"; "dataflow" for "dataflow" engine') + parser.add_argument('--architecture', default='x86_64') + args = parser.parse_args() + build_data = BuildData(args.project_name, args.engine, args.sanitizer, + args.architecture) + if os.getcwd() != os.path.dirname( + os.path.dirname(os.path.realpath(__file__))): + print("Error: bisector.py needs to be run from the OSS-Fuzz home directory") + return 1 + error_sha = bisect(args.commit_old, args.commit_new, args.testcase, + args.fuzz_target, build_data) + if not error_sha: + print('No error was found in commit range %s:%s' % + (args.commit_old, args.commit_new)) + return 1 + print('Error was introduced at commit %s' % error_sha) + return 0 + + +def bisect(commit_old, commit_new, testcase, fuzz_target, build_data): + """From a commit range, this function caluclates which introduced a + specific error from a fuzz testcase. + + Args: + commit_old: The oldest commit in the error regression range + commit_new: The newest commit in the error regression range + testcase: The file path of the test case that triggers the error + fuzz_target: The name of the fuzzer to be tested + build_data: a class holding all of the input parameters for bisection + + Returns: + The commit SHA that introduced the error or None + """ + local_store_path = tempfile.mkdtemp() + repo_url = build_specified_commit.infer_main_repo(build_data.project_name, + local_store_path, + commit_old) + bisect_repo_manager = repo_manager.RepoManager(repo_url, local_store_path) + commit_list = bisect_repo_manager.get_commit_list(commit_old, commit_new) + build_specified_commit.build_fuzzer_from_commit( + build_data.project_name, commit_list[0], bisect_repo_manager.repo_dir, + build_data.engine, build_data.sanitizer, build_data.architecture, + bisect_repo_manager) + error_code = helper.reproduce_impl(build_data.project_name, fuzz_target, + False, [], [], testcase) + old_idx = len(commit_list) - 1 + new_idx = 0 + if len(commit_list) == 1: + if not error_code: + return None + return commit_list[0] + + while old_idx - new_idx != 1: + curr_idx = (old_idx + new_idx) // 2 + build_specified_commit.build_fuzzer_from_commit( + build_data.project_name, commit_list[curr_idx], + bisect_repo_manager.repo_dir, build_data.engine, build_data.sanitizer, + build_data.architecture, bisect_repo_manager) + error_exists = ( + helper.reproduce_impl(build_data.project_name, fuzz_target, False, [], + [], testcase) == error_code) + if error_exists == error_code: + new_idx = curr_idx + else: + old_idx = curr_idx + return commit_list[new_idx] + + +if __name__ == '__main__': + main() diff --git a/infra/build_specified_commit.py b/infra/build_specified_commit.py old mode 100644 new mode 100755 index 49e6831e4..c7a4c633f --- a/infra/build_specified_commit.py +++ b/infra/build_specified_commit.py @@ -19,10 +19,8 @@ like continuious integration fuzzing and bisection to find errors """ import re -from helper import build_fuzzers_impl -from helper import check_project_exists -from helper import get_dockerfile_path -from RepoManager import RepoManager +import helper +import repo_manager def build_fuzzer_from_commit(project_name, @@ -30,11 +28,12 @@ def build_fuzzer_from_commit(project_name, local_store_path, engine='libfuzzer', sanitizer='address', - architecture='x86_64'): - """Builds a ossfuzz fuzzer at a specific commit SHA. + architecture='x86_64', + old_repo_manager=None): + """Builds a OSS-Fuzz fuzzer at a specific commit SHA. Args: - project_name: The oss fuzz project name + project_name: The OSS-Fuzz project name commit: The commit SHA to build the fuzzers at local_store_path: The full file path of a place where a temp git repo is stored engine: The fuzzing engine to be used @@ -44,11 +43,18 @@ def build_fuzzer_from_commit(project_name, Returns: 0 on successful build 1 on failure """ - guessed_url = infer_main_repo(project_name, local_store_path, commit) - repo_man = RepoManager(guessed_url, local_store_path) - repo_man.checkout_commit(commit) - return build_fuzzers_impl(project_name, True, engine, sanitizer, architecture, - None, repo_man.repo_dir) + if not old_repo_manager: + inferred_url = infer_main_repo(project_name, local_store_path, commit) + old_repo_manager = repo_manager.RepoManager(inferred_url, local_store_path) + old_repo_manager.checkout_commit(commit) + return helper.build_fuzzers_impl( + project_name=project_name, + clean=True, + engine=engine, + sanitizer=sanitizer, + architecture=architecture, + env_to_add=None, + source_path=old_repo_manager.repo_dir) def infer_main_repo(project_name, local_store_path, example_commit=None): @@ -56,14 +62,14 @@ def infer_main_repo(project_name, local_store_path, example_commit=None): NOTE: This is a fragile implementation and only works for git Args: - project_name: The oss fuzz project that you are checking the repo of + project_name: The OSS-Fuzz project that you are checking the repo of example_commit: A commit that is in the main repos tree Returns: The guessed repo url path or None on failue """ - if not check_project_exists(project_name): + if not helper.check_project_exists(project_name): return None - docker_path = get_dockerfile_path(project_name) + docker_path = helper.get_dockerfile_path(project_name) with open(docker_path, 'r') as file_path: lines = file_path.read() # Use generic git format and project name to guess main repo @@ -80,8 +86,9 @@ def infer_main_repo(project_name, local_store_path, example_commit=None): clone_command).group(0) print(repo_url) try: - repo_manager = RepoManager(repo_url.rstrip(), local_store_path) - if repo_manager.commit_exists(example_commit): + test_repo_manager = repo_manager.RepoManager(repo_url.rstrip(), + local_store_path) + if test_repo_manager.commit_exists(example_commit): return repo_url except: pass diff --git a/infra/build_specified_commit_test.py b/infra/build_specified_commit_test.py index 1f4337c07..77a0698fe 100644 --- a/infra/build_specified_commit_test.py +++ b/infra/build_specified_commit_test.py @@ -16,12 +16,10 @@ NOTE: THIS TEST NEEDS TO BE RUN FROM THE OSS-FUZZ BASE DIR The will consist of the following functional tests 1. The inferance of the main repo for a specific project """ -import argparse import unittest -from build_specified_commit import infer_main_repo -from build_specified_commit import build_fuzzer_from_commit -from helper import reproduce +import build_specified_commit +import helper class BuildImageUnitTests(unittest.TestCase): @@ -29,20 +27,21 @@ class BuildImageUnitTests(unittest.TestCase): def test_infer_main_repo(self): """Tests that the main repo can be infered based on an example commit.""" - infered_repo = infer_main_repo('curl', 'tmp', - 'bc5d22c3dede2f04870c37aec9a50474c4b888ad') + infered_repo = build_specified_commit.infer_main_repo( + 'curl', 'tmp', 'bc5d22c3dede2f04870c37aec9a50474c4b888ad') self.assertEqual(infered_repo, 'https://github.com/curl/curl.git') - infered_repo = infer_main_repo('curl', 'tmp') + infered_repo = build_specified_commit.infer_main_repo('curl', 'tmp') self.assertEqual(infered_repo, 'https://github.com/curl/curl.git') - infered_repo = infer_main_repo('usrsctp', 'tmp') + infered_repo = build_specified_commit.infer_main_repo('usrsctp', 'tmp') self.assertEqual(infered_repo, 'https://github.com/weinrank/usrsctp') - infered_repo = infer_main_repo('usrsctp', 'tmp', - '4886aaa49fb90e479226fcfc3241d74208908232') + infered_repo = build_specified_commit.infer_main_repo( + 'usrsctp', 'tmp', '4886aaa49fb90e479226fcfc3241d74208908232') self.assertEqual(infered_repo, 'https://github.com/weinrank/usrsctp', '4886aaa49fb90e479226fcfc3241d74208908232') - infered_repo = infer_main_repo('not_a_project', 'tmp') + infered_repo = build_specified_commit.infer_main_repo( + 'not_a_project', 'tmp') self.assertEqual(infered_repo, None) @@ -61,44 +60,16 @@ class BuildImageIntegrationTests(unittest.TestCase): new_commit = 'f50a39051ea8c7f10d6d8db9656658b49601caef' fuzzer = 'rules_fuzzer' test_data = 'infra/yara_test_data' - build_fuzzer_from_commit( - project_name, - old_commit, - '/usr/local/google/home/lneat/Documents/oss-fuzz/infra/tmp', - sanitizer='address') - old_error_code = self.reproduce_error(project_name, test_data, fuzzer) - build_fuzzer_from_commit( - project_name, - new_commit, - '/usr/local/google/home/lneat/Documents/oss-fuzz/infra/tmp', - sanitizer='address') - new_error_code = self.reproduce_error(project_name, test_data, fuzzer) + build_specified_commit.build_fuzzer_from_commit( + project_name, old_commit, 'tmp', sanitizer='address') + old_error_code = helper.reproduce_impl(project_name, fuzzer, False, [], [], + test_data) + build_specified_commit.build_fuzzer_from_commit( + project_name, new_commit, 'tmp', sanitizer='address') + new_error_code = helper.reproduce_impl(project_name, fuzzer, False, [], [], + test_data) self.assertNotEqual(new_error_code, old_error_code) - def reproduce_error(self, project_name, test_case, fuzzer_name): - """Checks to see if the error is repoduceable at a specific commit. - Args: - project_name: The name of the project you are testing - test_case: The path to the test_case you are passing in - fuzzer_name: The name of the fuzz target to be tested - Returns: - True if the error still exists - """ - parser = argparse.ArgumentParser() - parser.add_argument('project_name', help='name of the project') - parser.add_argument('fuzzer_name', help='name of the fuzzer') - parser.add_argument('testcase_path', help='path of local testcase') - parser.add_argument( - 'fuzzer_args', - help='arguments to pass to the fuzzer', - nargs=argparse.REMAINDER) - parser.add_argument( - '--valgrind', action='store_true', help='run with valgrind') - parser.add_argument( - '-e', action='append', help='set environment variable e.g. VAR=value') - args = parser.parse_args([project_name, fuzzer_name, test_case]) - return reproduce(args) - if __name__ == '__main__': unittest.main() diff --git a/infra/helper.py b/infra/helper.py index 8d3b952be..f09c0c1c9 100755 --- a/infra/helper.py +++ b/infra/helper.py @@ -417,9 +417,9 @@ def build_image(args): def build_fuzzers_impl(project_name, clean, engine, sanitizer, architecture, - env_to_add, source_path): + env_to_add, source_path, no_cache=False): """Build fuzzers.""" - if not build_image_impl(project_name): + if not build_image_impl(project_name, no_cache=no_cache): return 1 project_out_dir = _get_output_dir(project_name) @@ -432,9 +432,9 @@ def build_fuzzers_impl(project_name, clean, engine, sanitizer, architecture, '-t', 'gcr.io/oss-fuzz/%s' % project_name, '/bin/bash', '-c', 'rm -rf /out/*' ]) + else: print('Keeping existing build artifacts as-is (if any).') - env = [ 'FUZZING_ENGINE=' + engine, 'SANITIZER=' + sanitizer, @@ -737,35 +737,41 @@ def run_fuzzer(args): def reproduce(args): + """Reproduce a specific test case from a specific project.""" + return reproduce_impl(args.project_name, args.fuzzer_name, args.valgrind, args.env_to_add, + fuzzer_args, args.testcase_path) + + +def reproduce_impl(project_name, fuzzer_name, valgrind, env_to_add, fuzzer_args, testcase_path): """Reproduces a testcase in the container.""" - if not check_project_exists(args.project_name): + if not check_project_exists(project_name): return 1 - if not _check_fuzzer_exists(args.project_name, args.fuzzer_name): + if not _check_fuzzer_exists(project_name, fuzzer_name): return 1 debugger = '' env = [] image_name = 'base-runner' - if args.valgrind: + if valgrind: debugger = 'valgrind --tool=memcheck --track-origins=yes --leak-check=full' if debugger: image_name = 'base-runner-debug' env += ['DEBUGGER=' + debugger] - if args.e: - env += args.e + if env_to_add: + env += env_to_add run_args = _env_to_docker_args(env) + [ - '-v', '%s:/out' % _get_output_dir(args.project_name), - '-v', '%s:/testcase' % _get_absolute_path(args.testcase_path), + '-v', '%s:/out' % _get_output_dir(project_name), + '-v', '%s:/testcase' % _get_absolute_path(testcase_path), '-t', 'gcr.io/oss-fuzz-base/%s' % image_name, 'reproduce', - args.fuzzer_name, + fuzzer_name, '-runs=100', - ] + args.fuzzer_args + ] + fuzzer_args return docker_run(run_args) diff --git a/infra/RepoManager.py b/infra/repo_manager.py similarity index 96% rename from infra/RepoManager.py rename to infra/repo_manager.py index fe8028312..9a93df9a2 100644 --- a/infra/RepoManager.py +++ b/infra/repo_manager.py @@ -185,7 +185,12 @@ class RepoManager(object): self._run_command(['git', 'fetch', '--unshallow'], self.repo_dir, check_result=True) - self._run_command(['git', 'checkout', '-f', commit], self.repo_dir) + self._run_command(['git', 'checkout', '-f', commit], + self.repo_dir, + check_result=True) + self._run_command(['git', 'clean', '-fxd'], + self.repo_dir, + check_result=True) if self.get_current_commit() != commit: raise RepoManagerError('Error checking out commit %s' % commit) diff --git a/infra/repo_manager_test.py b/infra/repo_manager_test.py index b5e5b151a..c8627f66a 100644 --- a/infra/repo_manager_test.py +++ b/infra/repo_manager_test.py @@ -21,8 +21,7 @@ The will consist of the following functional tests import os import unittest -from RepoManager import RepoManager -from RepoManager import RepoManagerError +import repo_manager class TestRepoManager(unittest.TestCase): @@ -32,29 +31,30 @@ class TestRepoManager(unittest.TestCase): def test_clone_correctly(self): """Tests the correct location of the git repo.""" - repo_manager = RepoManager(self.curl_repo, 'tmp') - git_path = os.path.join(repo_manager.base_dir, repo_manager.repo_name, - '.git') + test_repo_manager = repo_manager.RepoManager(self.curl_repo, 'tmp') + git_path = os.path.join(test_repo_manager.base_dir, + test_repo_manager.repo_name, '.git') self.assertTrue(os.path.isdir(git_path)) - repo_manager.remove_repo() - with self.assertRaises(RepoManagerError): - repo_manager = RepoManager(' ', 'tmp') + test_repo_manager.remove_repo() + with self.assertRaises(repo_manager.RepoManagerError): + test_repo_manager = repo_manager.RepoManager(' ', 'tmp') def test_checkout_commit(self): """Tests that the git checkout command works.""" - repo_manager = RepoManager(self.curl_repo, 'tmp') + test_repo_manager = repo_manager.RepoManager(self.curl_repo, 'tmp') commit_to_test = '036ebac0134de3b72052a46f734e4ca81bb96055' - repo_manager.checkout_commit(commit_to_test) - self.assertEqual(commit_to_test, repo_manager.get_current_commit()) + test_repo_manager.checkout_commit(commit_to_test) + self.assertEqual(commit_to_test, test_repo_manager.get_current_commit()) with self.assertRaises(ValueError): - repo_manager.checkout_commit(' ') - with self.assertRaises(RepoManagerError): - repo_manager.checkout_commit('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa') - repo_manager.remove_repo() + test_repo_manager.checkout_commit(' ') + with self.assertRaises(repo_manager.RepoManagerError): + test_repo_manager.checkout_commit( + 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa') + test_repo_manager.remove_repo() def test_get_commit_list(self): """Tests an accurate commit list can be retrived from the repo manager.""" - repo_manager = RepoManager(self.curl_repo, 'tmp') + test_repo_manager = repo_manager.RepoManager(self.curl_repo, 'tmp') old_commit = '7cf18b05e04bbb0f08c74d2567b0648f6c31a952' new_commit = '113db127ee2b2f874dfcce406103ffe666e11953' commit_list = [ @@ -63,15 +63,15 @@ class TestRepoManager(unittest.TestCase): '9a2cbf30b81a2b57149bb20e78e2e4cb5c2ff389', '7cf18b05e04bbb0f08c74d2567b0648f6c31a952' ] - result_list = repo_manager.get_commit_list(old_commit, new_commit) + result_list = test_repo_manager.get_commit_list(old_commit, new_commit) self.assertListEqual(commit_list, result_list) - with self.assertRaises(RepoManagerError): - repo_manager.get_commit_list('asafd', new_commit) - with self.assertRaises(RepoManagerError): - repo_manager.get_commit_list(new_commit, 'asdfasdf') - with self.assertRaises(RepoManagerError): + with self.assertRaises(repo_manager.RepoManagerError): + test_repo_manager.get_commit_list('asafd', new_commit) + with self.assertRaises(repo_manager.RepoManagerError): + test_repo_manager.get_commit_list(new_commit, 'asdfasdf') + with self.assertRaises(repo_manager.RepoManagerError): # Testing commits out of order - result_list = repo_manager.get_commit_list(new_commit, old_commit) + test_repo_manager.get_commit_list(new_commit, old_commit) if __name__ == '__main__':