2019-12-17 00:48:49 +00:00
|
|
|
# Copyright 2019 Google LLC
|
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
|
|
|
"""Uses bisection to determine which commit a bug was introduced and fixed.
|
|
|
|
This module takes a high and a low commit SHA, a repo name, and a bug.
|
|
|
|
The module bisects the high and low commit SHA searching for the location
|
|
|
|
where the bug was introduced. It also looks for where the bug was fixed.
|
|
|
|
This is done with the following steps:
|
|
|
|
|
|
|
|
|
2020-01-15 21:30:57 +00:00
|
|
|
NOTE: Needs to be run from root of the OSS-Fuzz source checkout.
|
2019-12-17 00:48:49 +00:00
|
|
|
|
|
|
|
Typical usage example:
|
|
|
|
python3 infra/bisector.py
|
2020-04-06 01:28:46 +00:00
|
|
|
--old_commit 1e403e9259a1abedf108ab86f711ba52c907226d
|
|
|
|
--new_commit f79be4f2330f4b89ea2f42e1c44ca998c59a0c0f
|
2019-12-17 00:48:49 +00:00
|
|
|
--fuzz_target rules_fuzzer
|
|
|
|
--project_name yara
|
|
|
|
--testcase infra/yara_testcase
|
|
|
|
--sanitizer address
|
|
|
|
"""
|
|
|
|
|
|
|
|
import argparse
|
2020-04-20 22:05:45 +00:00
|
|
|
import collections
|
2020-01-29 22:10:04 +00:00
|
|
|
import logging
|
2020-04-14 01:38:23 +00:00
|
|
|
import os
|
2020-05-04 06:50:25 +00:00
|
|
|
import sys
|
2019-12-17 00:48:49 +00:00
|
|
|
import tempfile
|
|
|
|
|
|
|
|
import build_specified_commit
|
|
|
|
import helper
|
|
|
|
import repo_manager
|
2020-01-29 19:03:43 +00:00
|
|
|
import utils
|
2019-12-17 00:48:49 +00:00
|
|
|
|
2020-04-20 22:05:45 +00:00
|
|
|
Result = collections.namedtuple('Result', ['repo_url', 'commit'])
|
|
|
|
|
2020-05-06 02:09:51 +00:00
|
|
|
START_MARKERS = [
|
2020-05-04 06:50:25 +00:00
|
|
|
'==ERROR',
|
2020-05-06 02:09:51 +00:00
|
|
|
'==WARNING',
|
|
|
|
]
|
|
|
|
|
|
|
|
END_MARKERS = [
|
2020-05-04 06:50:25 +00:00
|
|
|
'SUMMARY:',
|
|
|
|
]
|
|
|
|
|
2020-05-06 05:30:16 +00:00
|
|
|
DEDUP_TOKEN_MARKER = 'DEDUP_TOKEN:'
|
|
|
|
|
2019-12-17 00:48:49 +00:00
|
|
|
|
2020-05-13 04:38:46 +00:00
|
|
|
class BisectError(Exception):
|
|
|
|
"""Bisection error."""
|
|
|
|
|
|
|
|
def __init__(self, message, repo_url):
|
|
|
|
super().__init__(message)
|
|
|
|
self.repo_url = repo_url
|
|
|
|
|
|
|
|
|
2019-12-17 00:48:49 +00:00
|
|
|
def main():
|
|
|
|
"""Finds the commit SHA where an error was initally introduced."""
|
2020-04-14 01:38:23 +00:00
|
|
|
logging.getLogger().setLevel(logging.INFO)
|
2020-01-29 19:03:43 +00:00
|
|
|
utils.chdir_to_root()
|
2019-12-17 00:48:49 +00:00
|
|
|
parser = argparse.ArgumentParser(
|
|
|
|
description='git bisection for finding introduction of bugs')
|
|
|
|
|
2020-01-15 21:30:57 +00:00
|
|
|
parser.add_argument('--project_name',
|
|
|
|
help='The name of the project where the bug occurred.',
|
|
|
|
required=True)
|
2020-01-30 18:27:56 +00:00
|
|
|
parser.add_argument('--new_commit',
|
2020-01-15 21:30:57 +00:00
|
|
|
help='The newest commit SHA to be bisected.',
|
|
|
|
required=True)
|
2020-01-30 18:27:56 +00:00
|
|
|
parser.add_argument('--old_commit',
|
2020-07-09 10:04:13 +00:00
|
|
|
help='The oldest commit SHA to be bisected.',
|
|
|
|
required=True)
|
2020-01-15 21:30:57 +00:00
|
|
|
parser.add_argument('--fuzz_target',
|
|
|
|
help='The name of the fuzzer to be built.',
|
|
|
|
required=True)
|
2020-01-30 18:27:56 +00:00
|
|
|
parser.add_argument('--test_case_path',
|
2020-01-15 21:30:57 +00:00
|
|
|
help='The path to test case.',
|
|
|
|
required=True)
|
|
|
|
parser.add_argument('--engine',
|
|
|
|
help='The default is "libfuzzer".',
|
|
|
|
default='libfuzzer')
|
|
|
|
parser.add_argument('--sanitizer',
|
|
|
|
default='address',
|
|
|
|
help='The default is "address".')
|
2020-05-04 06:50:25 +00:00
|
|
|
parser.add_argument('--type',
|
|
|
|
choices=['regressed', 'fixed'],
|
|
|
|
help='The bisection type.',
|
|
|
|
required=True)
|
2019-12-17 00:48:49 +00:00
|
|
|
parser.add_argument('--architecture', default='x86_64')
|
|
|
|
args = parser.parse_args()
|
2020-01-23 17:48:09 +00:00
|
|
|
|
|
|
|
build_data = build_specified_commit.BuildData(project_name=args.project_name,
|
|
|
|
engine=args.engine,
|
|
|
|
sanitizer=args.sanitizer,
|
|
|
|
architecture=args.architecture)
|
|
|
|
|
2020-05-04 06:50:25 +00:00
|
|
|
result = bisect(args.type, args.old_commit, args.new_commit,
|
2021-08-04 13:42:17 +00:00
|
|
|
args.test_case_path, args.fuzz_target, build_data)
|
2020-04-22 07:16:00 +00:00
|
|
|
if not result.commit:
|
2020-04-06 01:28:46 +00:00
|
|
|
logging.error('No error was found in commit range %s:%s', args.old_commit,
|
|
|
|
args.new_commit)
|
2019-12-17 00:48:49 +00:00
|
|
|
return 1
|
2020-04-22 07:16:00 +00:00
|
|
|
if result.commit == args.old_commit:
|
2020-01-29 22:10:04 +00:00
|
|
|
logging.error(
|
|
|
|
'Bisection Error: Both the first and the last commits in'
|
|
|
|
'the given range have the same behavior, bisection is not possible. ')
|
2020-01-06 20:17:26 +00:00
|
|
|
return 1
|
2021-10-04 12:14:41 +00:00
|
|
|
if args.type == 'regressed':
|
|
|
|
print('Error was introduced at commit %s' % result.commit)
|
|
|
|
elif args.type == 'fixed':
|
|
|
|
print('Error was fixed at commit %s' % result.commit)
|
2019-12-17 00:48:49 +00:00
|
|
|
return 0
|
|
|
|
|
|
|
|
|
2020-05-06 05:30:16 +00:00
|
|
|
def _get_dedup_token(output):
|
|
|
|
"""Get dedup token."""
|
|
|
|
for line in output.splitlines():
|
|
|
|
token_location = line.find(DEDUP_TOKEN_MARKER)
|
|
|
|
if token_location == -1:
|
|
|
|
continue
|
|
|
|
|
|
|
|
return line[token_location + len(DEDUP_TOKEN_MARKER):].strip()
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
2021-08-04 01:05:00 +00:00
|
|
|
def _check_for_crash(project_name, fuzz_target, testcase_path):
|
2020-05-04 06:50:25 +00:00
|
|
|
"""Check for crash."""
|
|
|
|
|
|
|
|
def docker_run(args):
|
|
|
|
command = ['docker', 'run', '--rm', '--privileged']
|
|
|
|
if sys.stdin.isatty():
|
|
|
|
command.append('-i')
|
|
|
|
|
|
|
|
return utils.execute(command + args)
|
|
|
|
|
|
|
|
logging.info('Checking for crash')
|
2021-08-04 13:42:17 +00:00
|
|
|
out, err, return_code = helper.reproduce_impl(
|
|
|
|
project=helper.Project(project_name),
|
|
|
|
fuzzer_name=fuzz_target,
|
|
|
|
valgrind=False,
|
|
|
|
env_to_add=[],
|
|
|
|
fuzzer_args=[],
|
|
|
|
testcase_path=testcase_path,
|
|
|
|
run_function=docker_run,
|
|
|
|
err_result=(None, None, None))
|
2020-05-04 06:50:25 +00:00
|
|
|
if return_code is None:
|
2020-05-06 05:30:16 +00:00
|
|
|
return None
|
2020-05-04 06:50:25 +00:00
|
|
|
|
|
|
|
logging.info('stdout =\n%s', out)
|
|
|
|
logging.info('stderr =\n%s', err)
|
2020-05-06 02:09:51 +00:00
|
|
|
|
2020-05-04 06:50:25 +00:00
|
|
|
# pylint: disable=unsupported-membership-test
|
2020-05-06 02:09:51 +00:00
|
|
|
has_start_marker = any(
|
|
|
|
marker in out or marker in err for marker in START_MARKERS)
|
|
|
|
has_end_marker = any(marker in out or marker in err for marker in END_MARKERS)
|
2020-05-06 05:30:16 +00:00
|
|
|
if not has_start_marker or not has_end_marker:
|
|
|
|
return None
|
|
|
|
|
|
|
|
return _get_dedup_token(out + err)
|
2020-05-04 06:50:25 +00:00
|
|
|
|
|
|
|
|
|
|
|
# pylint: disable=too-many-locals
|
|
|
|
# pylint: disable=too-many-arguments
|
2020-07-09 10:04:13 +00:00
|
|
|
# pylint: disable=too-many-statements
|
2021-08-04 01:05:00 +00:00
|
|
|
def _bisect(bisect_type, old_commit, new_commit, testcase_path, fuzz_target,
|
2020-05-04 06:50:25 +00:00
|
|
|
build_data):
|
2020-04-21 00:11:29 +00:00
|
|
|
"""Perform the bisect."""
|
2020-05-04 06:50:25 +00:00
|
|
|
# pylint: disable=too-many-branches
|
2020-06-10 07:12:28 +00:00
|
|
|
base_builder_repo = build_specified_commit.load_base_builder_repo()
|
2020-04-28 01:31:02 +00:00
|
|
|
|
2020-01-06 20:17:26 +00:00
|
|
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
2020-04-14 01:38:23 +00:00
|
|
|
repo_url, repo_path = build_specified_commit.detect_main_repo(
|
2020-04-14 22:59:33 +00:00
|
|
|
build_data.project_name, commit=new_commit)
|
2020-04-14 01:38:23 +00:00
|
|
|
if not repo_url or not repo_path:
|
2020-01-06 20:17:26 +00:00
|
|
|
raise ValueError('Main git repo can not be determined.')
|
2020-04-14 01:38:23 +00:00
|
|
|
|
2020-09-18 04:10:10 +00:00
|
|
|
if old_commit == new_commit:
|
|
|
|
raise BisectError('old_commit is the same as new_commit', repo_url)
|
|
|
|
|
2020-04-21 00:11:29 +00:00
|
|
|
# Copy /src from the built Docker container to ensure all dependencies
|
|
|
|
# exist. This will be mounted when running them.
|
2020-04-14 01:38:23 +00:00
|
|
|
host_src_dir = build_specified_commit.copy_src_from_docker(
|
|
|
|
build_data.project_name, tmp_dir)
|
|
|
|
|
2020-12-07 23:01:38 +00:00
|
|
|
bisect_repo_manager = repo_manager.RepoManager(
|
2020-04-21 00:11:29 +00:00
|
|
|
os.path.join(host_src_dir, os.path.basename(repo_path)))
|
2021-03-23 03:17:37 +00:00
|
|
|
bisect_repo_manager.fetch_all_remotes()
|
|
|
|
|
2020-04-14 22:59:33 +00:00
|
|
|
commit_list = bisect_repo_manager.get_commit_list(new_commit, old_commit)
|
2020-04-21 00:11:29 +00:00
|
|
|
|
2020-01-06 20:17:26 +00:00
|
|
|
old_idx = len(commit_list) - 1
|
|
|
|
new_idx = 0
|
2020-04-14 01:38:23 +00:00
|
|
|
logging.info('Testing against new_commit (%s)', commit_list[new_idx])
|
2020-04-15 06:03:52 +00:00
|
|
|
if not build_specified_commit.build_fuzzers_from_commit(
|
2020-04-28 01:31:02 +00:00
|
|
|
commit_list[new_idx],
|
|
|
|
bisect_repo_manager,
|
|
|
|
host_src_dir,
|
|
|
|
build_data,
|
|
|
|
base_builder_repo=base_builder_repo):
|
2020-05-13 04:38:46 +00:00
|
|
|
raise BisectError('Failed to build new_commit', repo_url)
|
2020-04-15 06:03:52 +00:00
|
|
|
|
2020-05-04 06:50:25 +00:00
|
|
|
if bisect_type == 'fixed':
|
2020-05-06 05:30:16 +00:00
|
|
|
should_crash = False
|
2020-05-04 06:50:25 +00:00
|
|
|
elif bisect_type == 'regressed':
|
2020-05-06 05:30:16 +00:00
|
|
|
should_crash = True
|
2020-05-04 06:50:25 +00:00
|
|
|
else:
|
2020-05-13 04:38:46 +00:00
|
|
|
raise BisectError('Invalid bisect type ' + bisect_type, repo_url)
|
2020-05-04 06:50:25 +00:00
|
|
|
|
2020-05-06 05:30:16 +00:00
|
|
|
expected_error = _check_for_crash(build_data.project_name, fuzz_target,
|
2021-08-04 01:05:00 +00:00
|
|
|
testcase_path)
|
2020-05-06 05:30:16 +00:00
|
|
|
logging.info('new_commit result = %s', expected_error)
|
|
|
|
|
|
|
|
if not should_crash and expected_error:
|
|
|
|
logging.warning('new_commit crashed but not shouldn\'t. '
|
|
|
|
'Continuing to see if stack changes.')
|
2020-01-06 20:17:26 +00:00
|
|
|
|
2020-07-09 10:04:13 +00:00
|
|
|
range_valid = False
|
|
|
|
for _ in range(2):
|
2020-04-14 22:59:33 +00:00
|
|
|
logging.info('Testing against old_commit (%s)', commit_list[old_idx])
|
2020-04-15 06:03:52 +00:00
|
|
|
if not build_specified_commit.build_fuzzers_from_commit(
|
2020-04-14 22:59:33 +00:00
|
|
|
commit_list[old_idx],
|
|
|
|
bisect_repo_manager,
|
|
|
|
host_src_dir,
|
|
|
|
build_data,
|
2020-04-28 01:31:02 +00:00
|
|
|
base_builder_repo=base_builder_repo):
|
2020-05-13 04:38:46 +00:00
|
|
|
raise BisectError('Failed to build old_commit', repo_url)
|
2020-04-14 22:59:33 +00:00
|
|
|
|
2020-05-04 06:50:25 +00:00
|
|
|
if _check_for_crash(build_data.project_name, fuzz_target,
|
2021-08-04 01:05:00 +00:00
|
|
|
testcase_path) == expected_error:
|
2020-07-09 10:04:13 +00:00
|
|
|
logging.warning('old_commit %s had same result as new_commit %s',
|
|
|
|
old_commit, new_commit)
|
|
|
|
# Try again on an slightly older commit.
|
|
|
|
old_commit = bisect_repo_manager.get_parent(old_commit, 64)
|
|
|
|
if not old_commit:
|
|
|
|
break
|
|
|
|
|
|
|
|
commit_list = bisect_repo_manager.get_commit_list(
|
|
|
|
new_commit, old_commit)
|
|
|
|
old_idx = len(commit_list) - 1
|
|
|
|
continue
|
|
|
|
|
|
|
|
range_valid = True
|
|
|
|
break
|
|
|
|
|
|
|
|
if not range_valid:
|
|
|
|
raise BisectError('old_commit had same result as new_commit', repo_url)
|
2020-01-06 20:17:26 +00:00
|
|
|
|
|
|
|
while old_idx - new_idx > 1:
|
|
|
|
curr_idx = (old_idx + new_idx) // 2
|
2020-04-14 01:38:23 +00:00
|
|
|
logging.info('Testing against %s (idx=%d)', commit_list[curr_idx],
|
|
|
|
curr_idx)
|
2020-04-27 00:56:17 +00:00
|
|
|
if not build_specified_commit.build_fuzzers_from_commit(
|
2020-04-28 01:31:02 +00:00
|
|
|
commit_list[curr_idx],
|
|
|
|
bisect_repo_manager,
|
|
|
|
host_src_dir,
|
|
|
|
build_data,
|
|
|
|
base_builder_repo=base_builder_repo):
|
2020-04-27 00:56:17 +00:00
|
|
|
# Treat build failures as if we couldn't repo.
|
|
|
|
# TODO(ochang): retry nearby commits?
|
|
|
|
old_idx = curr_idx
|
|
|
|
continue
|
|
|
|
|
2020-05-06 05:30:16 +00:00
|
|
|
current_error = _check_for_crash(build_data.project_name, fuzz_target,
|
2021-08-04 01:05:00 +00:00
|
|
|
testcase_path)
|
2020-05-06 05:30:16 +00:00
|
|
|
logging.info('Current result = %s', current_error)
|
|
|
|
if expected_error == current_error:
|
2020-01-06 20:17:26 +00:00
|
|
|
new_idx = curr_idx
|
|
|
|
else:
|
|
|
|
old_idx = curr_idx
|
2020-04-20 22:05:45 +00:00
|
|
|
return Result(repo_url, commit_list[new_idx])
|
2019-12-17 00:48:49 +00:00
|
|
|
|
|
|
|
|
2020-05-04 06:50:25 +00:00
|
|
|
# pylint: disable=too-many-locals
|
|
|
|
# pylint: disable=too-many-arguments
|
2021-08-04 01:05:00 +00:00
|
|
|
def bisect(bisect_type, old_commit, new_commit, testcase_path, fuzz_target,
|
2020-05-04 06:50:25 +00:00
|
|
|
build_data):
|
2020-04-21 00:11:29 +00:00
|
|
|
"""From a commit range, this function caluclates which introduced a
|
2021-08-04 01:05:00 +00:00
|
|
|
specific error from a fuzz testcase_path.
|
2020-04-21 00:11:29 +00:00
|
|
|
|
|
|
|
Args:
|
2020-05-04 06:50:25 +00:00
|
|
|
bisect_type: The type of the bisect ('regressed' or 'fixed').
|
2020-04-21 00:11:29 +00:00
|
|
|
old_commit: The oldest commit in the error regression range.
|
|
|
|
new_commit: The newest commit in the error regression range.
|
2021-08-04 01:05:00 +00:00
|
|
|
testcase_path: The file path of the test case that triggers the error
|
2020-04-21 00:11:29 +00:00
|
|
|
fuzz_target: The name of the fuzzer to be tested.
|
|
|
|
build_data: a class holding all of the input parameters for bisection.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
The commit SHA that introduced the error or None.
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
ValueError: when a repo url can't be determine from the project.
|
|
|
|
"""
|
2020-05-07 07:19:57 +00:00
|
|
|
try:
|
2021-08-04 01:05:00 +00:00
|
|
|
return _bisect(bisect_type, old_commit, new_commit, testcase_path,
|
2020-05-04 06:50:25 +00:00
|
|
|
fuzz_target, build_data)
|
2020-05-07 07:19:57 +00:00
|
|
|
finally:
|
|
|
|
# Clean up projects/ as _bisect may have modified it.
|
2020-12-07 23:01:38 +00:00
|
|
|
oss_fuzz_repo_manager = repo_manager.RepoManager(helper.OSS_FUZZ_DIR)
|
2020-05-07 07:19:57 +00:00
|
|
|
oss_fuzz_repo_manager.git(['reset', 'projects'])
|
|
|
|
oss_fuzz_repo_manager.git(['checkout', 'projects'])
|
|
|
|
oss_fuzz_repo_manager.git(['clean', '-fxd', 'projects'])
|
2020-04-21 00:11:29 +00:00
|
|
|
|
|
|
|
|
2019-12-17 00:48:49 +00:00
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|