[infra] Add support for Bisecting Git Commits Introducing Crashes(#3119)

2019-12-16 16:48:49 -08:00 · 2019-12-16 16:48:49 -08:00 · 2c05fb229f
parent a0b29b879f
commit 2c05fb229f
6 changed files with 242 additions and 100 deletions
--- a/infra/bisector.py
+++ b/infra/bisector.py
@ -0,0 +1,153 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Uses bisection to determine which commit a bug was introduced and fixed.
+This module takes a high and a low commit SHA, a repo name, and a bug.
+The module bisects the high and low commit SHA searching for the location
+where the bug was introduced. It also looks for where the bug was fixed.
+This is done with the following steps:
+
+
+  NOTE: NEEDS TO BE RUN FROM THE OSS-Fuzz HOME directory
+
+  Typical usage example:
+        python3 infra/bisector.py
+          --commit_old 1e403e9259a1abedf108ab86f711ba52c907226d
+          --commit_new f79be4f2330f4b89ea2f42e1c44ca998c59a0c0f
+          --fuzz_target rules_fuzzer
+          --project_name yara
+          --testcase infra/yara_testcase
+          --sanitizer address
+"""
+
+import argparse
+from dataclasses import dataclass
+import os
+import tempfile
+
+import build_specified_commit
+import helper
+import repo_manager
+
+
+@dataclass
+class BuildData():
+  """List of data requried for bisection of errors in OSS-Fuzz projects.
+
+  Attributes:
+    project_name: The name of the OSS-Fuzz project that is being checked
+    engine: The fuzzing engine to be used
+    sanitizer: The fuzzing sanitizer to be used
+    architecture: The system architecture being fuzzed
+  """
+  project_name: str
+  engine: str
+  sanitizer: str
+  architecture: str
+
+
+def main():
+  """Finds the commit SHA where an error was initally introduced."""
+  parser = argparse.ArgumentParser(
+      description='git bisection for finding introduction of bugs')
+
+  parser.add_argument(
+      '--project_name',
+      help='The name of the project where the bug occured',
+      required=True)
+  parser.add_argument(
+      '--commit_new',
+      help='The newest commit SHA to be bisected',
+      required=True)
+  parser.add_argument(
+      '--commit_old',
+      help='The oldest commit SHA to be bisected',
+      required=True)
+  parser.add_argument(
+      '--fuzz_target', help='the name of the fuzzer to be built', required=True)
+  parser.add_argument(
+      '--testcase', help='the testcase to be reproduced', required=True)
+  parser.add_argument('--engine', default='libfuzzer')
+  parser.add_argument(
+      '--sanitizer',
+      default='address',
+      help='the default is "address"; "dataflow" for "dataflow" engine')
+  parser.add_argument('--architecture', default='x86_64')
+  args = parser.parse_args()
+  build_data = BuildData(args.project_name, args.engine, args.sanitizer,
+                         args.architecture)
+  if os.getcwd() != os.path.dirname(
+      os.path.dirname(os.path.realpath(__file__))):
+    print("Error: bisector.py needs to be run from the OSS-Fuzz home directory")
+    return 1
+  error_sha = bisect(args.commit_old, args.commit_new, args.testcase,
+                     args.fuzz_target, build_data)
+  if not error_sha:
+    print('No error was found in commit range %s:%s' %
+          (args.commit_old, args.commit_new))
+    return 1
+  print('Error was introduced at commit %s' % error_sha)
+  return 0
+
+
+def bisect(commit_old, commit_new, testcase, fuzz_target, build_data):
+  """From a commit range, this function caluclates which introduced a
+  specific error from a fuzz testcase.
+
+  Args:
+    commit_old: The oldest commit in the error regression range
+    commit_new: The newest commit in the error regression range
+    testcase: The file path of the test case that triggers the error
+    fuzz_target: The name of the fuzzer to be tested
+    build_data: a class holding all of the input parameters for bisection
+
+  Returns:
+    The commit SHA that introduced the error or None
+  """
+  local_store_path = tempfile.mkdtemp()
+  repo_url = build_specified_commit.infer_main_repo(build_data.project_name,
+                                                    local_store_path,
+                                                    commit_old)
+  bisect_repo_manager = repo_manager.RepoManager(repo_url, local_store_path)
+  commit_list = bisect_repo_manager.get_commit_list(commit_old, commit_new)
+  build_specified_commit.build_fuzzer_from_commit(
+      build_data.project_name, commit_list[0], bisect_repo_manager.repo_dir,
+      build_data.engine, build_data.sanitizer, build_data.architecture,
+      bisect_repo_manager)
+  error_code = helper.reproduce_impl(build_data.project_name, fuzz_target,
+                                     False, [], [], testcase)
+  old_idx = len(commit_list) - 1
+  new_idx = 0
+  if len(commit_list) == 1:
+    if not error_code:
+      return None
+    return commit_list[0]
+
+  while old_idx - new_idx != 1:
+    curr_idx = (old_idx + new_idx) // 2
+    build_specified_commit.build_fuzzer_from_commit(
+        build_data.project_name, commit_list[curr_idx],
+        bisect_repo_manager.repo_dir, build_data.engine, build_data.sanitizer,
+        build_data.architecture, bisect_repo_manager)
+    error_exists = (
+        helper.reproduce_impl(build_data.project_name, fuzz_target, False, [],
+                              [], testcase) == error_code)
+    if error_exists == error_code:
+      new_idx = curr_idx
+    else:
+      old_idx = curr_idx
+  return commit_list[new_idx]
+
+
+if __name__ == '__main__':
+  main()
--- a/infra/build_specified_commit.py
+++ b/infra/build_specified_commit.py
@ -19,10 +19,8 @@ like continuious integration fuzzing and bisection to find errors
 """
 import re

-from helper import build_fuzzers_impl
-from helper import check_project_exists
-from helper import get_dockerfile_path
-from RepoManager import RepoManager
+import helper
+import repo_manager


 def build_fuzzer_from_commit(project_name,
@ -30,11 +28,12 @@ def build_fuzzer_from_commit(project_name,
                             local_store_path,
                             engine='libfuzzer',
                             sanitizer='address',
-                             architecture='x86_64'):
-  """Builds a ossfuzz fuzzer at a  specific commit SHA.
+                             architecture='x86_64',
+                             old_repo_manager=None):
+  """Builds a OSS-Fuzz fuzzer at a  specific commit SHA.

  Args:
-    project_name: The oss fuzz project name
+    project_name: The OSS-Fuzz project name
    commit: The commit SHA to build the fuzzers at
    local_store_path: The full file path of a place where a temp git repo is stored
    engine: The fuzzing engine to be used
@ -44,11 +43,18 @@ def build_fuzzer_from_commit(project_name,
  Returns:
    0 on successful build 1 on failure
  """
-  guessed_url = infer_main_repo(project_name, local_store_path, commit)
-  repo_man = RepoManager(guessed_url, local_store_path)
-  repo_man.checkout_commit(commit)
-  return build_fuzzers_impl(project_name, True, engine, sanitizer, architecture,
-                            None, repo_man.repo_dir)
+  if not old_repo_manager:
+    inferred_url = infer_main_repo(project_name, local_store_path, commit)
+    old_repo_manager = repo_manager.RepoManager(inferred_url, local_store_path)
+  old_repo_manager.checkout_commit(commit)
+  return helper.build_fuzzers_impl(
+      project_name=project_name,
+      clean=True,
+      engine=engine,
+      sanitizer=sanitizer,
+      architecture=architecture,
+      env_to_add=None,
+      source_path=old_repo_manager.repo_dir)


 def infer_main_repo(project_name, local_store_path, example_commit=None):
@ -56,14 +62,14 @@ def infer_main_repo(project_name, local_store_path, example_commit=None):

  NOTE: This is a fragile implementation and only works for git
  Args:
-    project_name: The oss fuzz project that you are checking the repo of
+    project_name: The OSS-Fuzz project that you are checking the repo of
    example_commit: A commit that is in the main repos tree
  Returns:
    The guessed repo url path or None on failue
  """
-  if not check_project_exists(project_name):
+  if not helper.check_project_exists(project_name):
    return None
-  docker_path = get_dockerfile_path(project_name)
+  docker_path = helper.get_dockerfile_path(project_name)
  with open(docker_path, 'r') as file_path:
    lines = file_path.read()
    # Use generic git format and project name to guess main repo
@ -80,8 +86,9 @@ def infer_main_repo(project_name, local_store_path, example_commit=None):
                             clone_command).group(0)
        print(repo_url)
        try:
-          repo_manager = RepoManager(repo_url.rstrip(), local_store_path)
-          if repo_manager.commit_exists(example_commit):
+          test_repo_manager = repo_manager.RepoManager(repo_url.rstrip(),
+                                                       local_store_path)
+          if test_repo_manager.commit_exists(example_commit):
            return repo_url
        except:
          pass
--- a/infra/build_specified_commit_test.py
+++ b/infra/build_specified_commit_test.py
@ -16,12 +16,10 @@ NOTE: THIS TEST NEEDS TO BE RUN FROM THE OSS-FUZZ BASE DIR
 The will consist of the following functional tests
  1. The inferance of the main repo for a specific project
 """
-import argparse
 import unittest

-from build_specified_commit import infer_main_repo
-from build_specified_commit import build_fuzzer_from_commit
-from helper import reproduce
+import build_specified_commit
+import helper


 class BuildImageUnitTests(unittest.TestCase):
@ -29,20 +27,21 @@ class BuildImageUnitTests(unittest.TestCase):

  def test_infer_main_repo(self):
    """Tests that the main repo can be infered based on an example commit."""
-    infered_repo = infer_main_repo('curl', 'tmp',
-                                   'bc5d22c3dede2f04870c37aec9a50474c4b888ad')
+    infered_repo = build_specified_commit.infer_main_repo(
+        'curl', 'tmp', 'bc5d22c3dede2f04870c37aec9a50474c4b888ad')
    self.assertEqual(infered_repo, 'https://github.com/curl/curl.git')
-    infered_repo = infer_main_repo('curl', 'tmp')
+    infered_repo = build_specified_commit.infer_main_repo('curl', 'tmp')
    self.assertEqual(infered_repo, 'https://github.com/curl/curl.git')

-    infered_repo = infer_main_repo('usrsctp', 'tmp')
+    infered_repo = build_specified_commit.infer_main_repo('usrsctp', 'tmp')
    self.assertEqual(infered_repo, 'https://github.com/weinrank/usrsctp')
-    infered_repo = infer_main_repo('usrsctp', 'tmp',
-                                   '4886aaa49fb90e479226fcfc3241d74208908232')
+    infered_repo = build_specified_commit.infer_main_repo(
+        'usrsctp', 'tmp', '4886aaa49fb90e479226fcfc3241d74208908232')
    self.assertEqual(infered_repo, 'https://github.com/weinrank/usrsctp',
                     '4886aaa49fb90e479226fcfc3241d74208908232')

-    infered_repo = infer_main_repo('not_a_project', 'tmp')
+    infered_repo = build_specified_commit.infer_main_repo(
+        'not_a_project', 'tmp')
    self.assertEqual(infered_repo, None)


@ -61,44 +60,16 @@ class BuildImageIntegrationTests(unittest.TestCase):
    new_commit = 'f50a39051ea8c7f10d6d8db9656658b49601caef'
    fuzzer = 'rules_fuzzer'
    test_data = 'infra/yara_test_data'
-    build_fuzzer_from_commit(
-        project_name,
-        old_commit,
-        '/usr/local/google/home/lneat/Documents/oss-fuzz/infra/tmp',
-        sanitizer='address')
-    old_error_code = self.reproduce_error(project_name, test_data, fuzzer)
-    build_fuzzer_from_commit(
-        project_name,
-        new_commit,
-        '/usr/local/google/home/lneat/Documents/oss-fuzz/infra/tmp',
-        sanitizer='address')
-    new_error_code = self.reproduce_error(project_name, test_data, fuzzer)
+    build_specified_commit.build_fuzzer_from_commit(
+        project_name, old_commit, 'tmp', sanitizer='address')
+    old_error_code = helper.reproduce_impl(project_name, fuzzer, False, [], [],
+                                           test_data)
+    build_specified_commit.build_fuzzer_from_commit(
+        project_name, new_commit, 'tmp', sanitizer='address')
+    new_error_code = helper.reproduce_impl(project_name, fuzzer, False, [], [],
+                                           test_data)
    self.assertNotEqual(new_error_code, old_error_code)

-  def reproduce_error(self, project_name, test_case, fuzzer_name):
-    """Checks to see if the error is repoduceable at a specific commit.
-    Args:
-      project_name: The name of the project you are testing
-      test_case: The path to the test_case you are passing in
-      fuzzer_name: The name of the fuzz target to be tested
-    Returns:
-      True if the error still exists
-    """
-    parser = argparse.ArgumentParser()
-    parser.add_argument('project_name', help='name of the project')
-    parser.add_argument('fuzzer_name', help='name of the fuzzer')
-    parser.add_argument('testcase_path', help='path of local testcase')
-    parser.add_argument(
-        'fuzzer_args',
-        help='arguments to pass to the fuzzer',
-        nargs=argparse.REMAINDER)
-    parser.add_argument(
-        '--valgrind', action='store_true', help='run with valgrind')
-    parser.add_argument(
-        '-e', action='append', help='set environment variable e.g. VAR=value')
-    args = parser.parse_args([project_name, fuzzer_name, test_case])
-    return reproduce(args)
-

 if __name__ == '__main__':
  unittest.main()
--- a/infra/helper.py
+++ b/infra/helper.py
@ -417,9 +417,9 @@ def build_image(args):


 def build_fuzzers_impl(project_name, clean, engine, sanitizer, architecture,
-                       env_to_add, source_path):
+                       env_to_add, source_path, no_cache=False):
  """Build fuzzers."""
-  if not build_image_impl(project_name):
+  if not build_image_impl(project_name, no_cache=no_cache):
    return 1

  project_out_dir = _get_output_dir(project_name)
@ -432,9 +432,9 @@ def build_fuzzers_impl(project_name, clean, engine, sanitizer, architecture,
        '-t', 'gcr.io/oss-fuzz/%s' % project_name,
        '/bin/bash', '-c', 'rm -rf /out/*'
    ])
+     
  else:
    print('Keeping existing build artifacts as-is (if any).')
-
  env = [
      'FUZZING_ENGINE=' + engine,
      'SANITIZER=' + sanitizer,
@ -737,35 +737,41 @@ def run_fuzzer(args):


 def reproduce(args):
+  """Reproduce a specific test case from a specific project."""
+  return reproduce_impl(args.project_name, args.fuzzer_name, args.valgrind, args.env_to_add,
+                        fuzzer_args, args.testcase_path)
+
+
+def reproduce_impl(project_name, fuzzer_name, valgrind, env_to_add, fuzzer_args, testcase_path):
  """Reproduces a testcase in the container."""
-  if not check_project_exists(args.project_name):
+  if not check_project_exists(project_name):
    return 1

-  if not _check_fuzzer_exists(args.project_name, args.fuzzer_name):
+  if not _check_fuzzer_exists(project_name, fuzzer_name):
    return 1

  debugger = ''
  env = []
  image_name = 'base-runner'

-  if args.valgrind:
+  if valgrind:
    debugger = 'valgrind --tool=memcheck --track-origins=yes --leak-check=full'

  if debugger:
    image_name = 'base-runner-debug'
    env += ['DEBUGGER=' + debugger]

-  if args.e:
-    env += args.e
+  if env_to_add:
+    env += env_to_add

  run_args = _env_to_docker_args(env) + [
-      '-v', '%s:/out' % _get_output_dir(args.project_name),
-      '-v', '%s:/testcase' % _get_absolute_path(args.testcase_path),
+      '-v', '%s:/out' % _get_output_dir(project_name),
+      '-v', '%s:/testcase' % _get_absolute_path(testcase_path),
      '-t', 'gcr.io/oss-fuzz-base/%s' % image_name,
      'reproduce',
-      args.fuzzer_name,
+      fuzzer_name,
      '-runs=100',
-  ] + args.fuzzer_args
+  ] + fuzzer_args

  return docker_run(run_args)

--- a/infra/repo_manager.py
+++ b/infra/repo_manager.py
@ -185,7 +185,12 @@ class RepoManager(object):
      self._run_command(['git', 'fetch', '--unshallow'],
                        self.repo_dir,
                        check_result=True)
-    self._run_command(['git', 'checkout', '-f', commit], self.repo_dir)
+    self._run_command(['git', 'checkout', '-f', commit],
+                      self.repo_dir,
+                      check_result=True)
+    self._run_command(['git', 'clean', '-fxd'],
+                      self.repo_dir,
+                      check_result=True)
    if self.get_current_commit() != commit:
      raise RepoManagerError('Error checking out commit %s' % commit)

--- a/infra/repo_manager_test.py
+++ b/infra/repo_manager_test.py
@ -21,8 +21,7 @@ The will consist of the following functional tests
 import os
 import unittest

-from RepoManager import RepoManager
-from RepoManager import RepoManagerError
+import repo_manager


 class TestRepoManager(unittest.TestCase):
@ -32,29 +31,30 @@ class TestRepoManager(unittest.TestCase):

  def test_clone_correctly(self):
    """Tests the correct location of the git repo."""
-    repo_manager = RepoManager(self.curl_repo, 'tmp')
-    git_path = os.path.join(repo_manager.base_dir, repo_manager.repo_name,
-                            '.git')
+    test_repo_manager = repo_manager.RepoManager(self.curl_repo, 'tmp')
+    git_path = os.path.join(test_repo_manager.base_dir,
+                            test_repo_manager.repo_name, '.git')
    self.assertTrue(os.path.isdir(git_path))
-    repo_manager.remove_repo()
-    with self.assertRaises(RepoManagerError):
-      repo_manager = RepoManager(' ', 'tmp')
+    test_repo_manager.remove_repo()
+    with self.assertRaises(repo_manager.RepoManagerError):
+      test_repo_manager = repo_manager.RepoManager(' ', 'tmp')

  def test_checkout_commit(self):
    """Tests that the git checkout command works."""
-    repo_manager = RepoManager(self.curl_repo, 'tmp')
+    test_repo_manager = repo_manager.RepoManager(self.curl_repo, 'tmp')
    commit_to_test = '036ebac0134de3b72052a46f734e4ca81bb96055'
-    repo_manager.checkout_commit(commit_to_test)
-    self.assertEqual(commit_to_test, repo_manager.get_current_commit())
+    test_repo_manager.checkout_commit(commit_to_test)
+    self.assertEqual(commit_to_test, test_repo_manager.get_current_commit())
    with self.assertRaises(ValueError):
-      repo_manager.checkout_commit(' ')
-    with self.assertRaises(RepoManagerError):
-      repo_manager.checkout_commit('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')
-    repo_manager.remove_repo()
+      test_repo_manager.checkout_commit(' ')
+    with self.assertRaises(repo_manager.RepoManagerError):
+      test_repo_manager.checkout_commit(
+          'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa')
+    test_repo_manager.remove_repo()

  def test_get_commit_list(self):
    """Tests an accurate commit list can be retrived from the repo manager."""
-    repo_manager = RepoManager(self.curl_repo, 'tmp')
+    test_repo_manager = repo_manager.RepoManager(self.curl_repo, 'tmp')
    old_commit = '7cf18b05e04bbb0f08c74d2567b0648f6c31a952'
    new_commit = '113db127ee2b2f874dfcce406103ffe666e11953'
    commit_list = [
@ -63,15 +63,15 @@ class TestRepoManager(unittest.TestCase):
        '9a2cbf30b81a2b57149bb20e78e2e4cb5c2ff389',
        '7cf18b05e04bbb0f08c74d2567b0648f6c31a952'
    ]
-    result_list = repo_manager.get_commit_list(old_commit, new_commit)
+    result_list = test_repo_manager.get_commit_list(old_commit, new_commit)
    self.assertListEqual(commit_list, result_list)
-    with self.assertRaises(RepoManagerError):
-      repo_manager.get_commit_list('asafd', new_commit)
-    with self.assertRaises(RepoManagerError):
-      repo_manager.get_commit_list(new_commit, 'asdfasdf')
-    with self.assertRaises(RepoManagerError):
+    with self.assertRaises(repo_manager.RepoManagerError):
+      test_repo_manager.get_commit_list('asafd', new_commit)
+    with self.assertRaises(repo_manager.RepoManagerError):
+      test_repo_manager.get_commit_list(new_commit, 'asdfasdf')
+    with self.assertRaises(repo_manager.RepoManagerError):
      # Testing commits out of order
-      result_list = repo_manager.get_commit_list(new_commit, old_commit)
+      test_repo_manager.get_commit_list(new_commit, old_commit)


 if __name__ == '__main__':