#!/usr/bin/env python3 # Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ################################################################################ """Checks code for common issues before submitting.""" import argparse import os import re import subprocess import sys import unittest import yaml import constants _SRC_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) VALID_PROJECT_REGEX_STR = '^[a-z0-9_-]+$' VALID_PROJECT_REGEX = re.compile(VALID_PROJECT_REGEX_STR) def _is_project_file(actual_path, expected_filename): """Returns True if actual_path's name is |expected_filename| and is a file that exists and is in in projects/.""" if os.path.basename(actual_path) != expected_filename: return False if os.path.basename(os.path.dirname( os.path.dirname(actual_path))) != 'projects': return False return os.path.exists(actual_path) # TODO: Check for -fsanitize=fuzzer in files as well. def _check_one_lib_fuzzing_engine(build_sh_file): """Returns False if |build_sh_file| contains -lFuzzingEngine. This is deprecated behavior. $LIB_FUZZING_ENGINE should be used instead so that -fsanitize=fuzzer is used.""" if not _is_project_file(build_sh_file, 'build.sh'): return True with open(build_sh_file) as build_sh: build_sh_lines = build_sh.readlines() for line_num, line in enumerate(build_sh_lines): uncommented_code = line.split('#')[0] if '-lFuzzingEngine' in uncommented_code: print('Error: build.sh contains deprecated "-lFuzzingEngine" on line: ' f'{line_num}. Please use "$LIB_FUZZING_ENGINE" instead.') return False return True def check_lib_fuzzing_engine(paths): """Calls _check_one_lib_fuzzing_engine on each path in |paths|. Returns True if the result of every call is True.""" return all(_check_one_lib_fuzzing_engine(path) for path in paths) class ProjectYamlChecker: """Checks for a project.yaml file.""" # Sections in a project.yaml and the constant values that they are allowed # to have. SECTIONS_AND_CONSTANTS = { 'sanitizers': constants.SANITIZERS, 'architectures': constants.ARCHITECTURES, 'fuzzing_engines': constants.ENGINES, } # Note: this list must be updated when we allow new sections. VALID_SECTION_NAMES = [ 'architectures', 'auto_ccs', 'blackbox', 'builds_per_day', 'coverage_extra_args', 'disabled', 'fuzzing_engines', 'help_url', 'homepage', 'language', 'labels', # For internal use only, hard to lint as it uses fuzzer names. 'main_repo', 'primary_contact', 'run_tests', 'sanitizers', 'selective_unpack', 'vendor_ccs', 'view_restrictions', 'file_github_issue', ] REQUIRED_SECTIONS = ['main_repo'] def __init__(self, filename): self.filename = filename with open(filename) as file_handle: self.data = yaml.safe_load(file_handle) self.success = True def do_checks(self): """Does all project.yaml checks. Returns True if they pass.""" if self.is_disabled(): return True checks = [ self.check_project_yaml_constants, self.check_required_sections, self.check_valid_section_names, self.check_valid_emails, self.check_valid_language, self.check_valid_project_name, ] for check_function in checks: check_function() return self.success def is_disabled(self): """Returns True if this project is disabled.""" return self.data.get('disabled', False) def error(self, message): """Prints an error message and sets self.success to False.""" self.success = False print(f'Error in {self.filename}: {message}') def check_valid_project_name(self): """Checks that the project has a valid name.""" banned_names = ['google', 'g00gle'] project_name = os.path.basename(os.path.dirname(self.filename)) for banned_name in banned_names: if banned_name in project_name: self.error('Projects can\'t have \'google\' in the name.') if not VALID_PROJECT_REGEX.match(project_name): self.error(f'Projects must conform to regex {VALID_PROJECT_REGEX_STR}') def check_project_yaml_constants(self): """Returns True if certain sections only have certain constant values.""" for section, allowed_constants in self.SECTIONS_AND_CONSTANTS.items(): if section not in self.data: continue actual_constants = self.data[section] allowed_constants_str = ', '.join(allowed_constants) for constant in actual_constants: if isinstance(constant, str): if constant not in allowed_constants: self.error(f'{constant} (in {section} section) is not a valid ' f'constant ({allowed_constants_str}).') elif isinstance(constant, dict): # The only alternative value allowed is the experimental flag, i.e. # `constant == {'memory': {'experimental': True}}`. Do not check the # experimental flag, but assert that the sanitizer is a valid one. if (len(constant.keys()) > 1 or list(constant.keys())[0] not in allowed_constants): self.error(f'Not allowed value in the project.yaml: {constant}') else: self.error(f'Not allowed value in the project.yaml: {constant}') def check_valid_section_names(self): """Returns True if all section names are valid.""" for name in self.data: if name not in self.VALID_SECTION_NAMES: self.error( f'{name} is not a valid section name ({self.VALID_SECTION_NAMES})') def check_required_sections(self): """Returns True if all required sections are in |self.data|.""" for section in self.REQUIRED_SECTIONS: if section not in self.data: self.error(f'{section} section is missing.') def check_valid_emails(self): """Returns True if emails are valid looking..""" # Get email addresses. email_addresses = [] primary_contact = self.data.get('primary_contact') if primary_contact: email_addresses.append(primary_contact) auto_ccs = self.data.get('auto_ccs') if auto_ccs: email_addresses.extend(auto_ccs) # Check that email addresses seem normal. for email_address in email_addresses: if '@' not in email_address or '.' not in email_address: self.error(f'{email_address} is an invalid email address.') def check_valid_language(self): """Returns True if the language is specified and valid.""" language = self.data.get('language') if not language: self.error('Missing "language" attribute in project.yaml.') elif language not in constants.LANGUAGES: self.error( f'"language: {language}" is not supported ({constants.LANGUAGES}).') def _check_one_project_yaml(project_yaml_filename): """Does checks on the project.yaml file. Returns True on success.""" if _is_project_file(project_yaml_filename, 'project.yml'): print(project_yaml_filename, 'must be named project.yaml.') return False if not _is_project_file(project_yaml_filename, 'project.yaml'): return True checker = ProjectYamlChecker(project_yaml_filename) return checker.do_checks() def check_project_yaml(paths): """Calls _check_one_project_yaml on each path in |paths|. Returns True if the result of every call is True.""" return all([_check_one_project_yaml(path) for path in paths]) def _check_one_seed_corpus(path): """Returns False and prints error if |path| is a seed corpus.""" if os.path.basename(os.path.dirname(os.path.dirname(path))) != 'projects': return True if os.path.splitext(path)[1] == '.zip': print('Don\'t commit seed corpora into the ClusterFuzz repo,' 'they bloat it forever.') return False return True def check_seed_corpus(paths): """Calls _check_one_seed_corpus on each path in |paths|. Returns True if the result of every call is True.""" return all([_check_one_seed_corpus(path) for path in paths]) def _check_one_apt_update(path): """Checks that a Dockerfile uses apt-update before apt-install""" if os.path.basename(os.path.dirname(os.path.dirname(path))) != 'projects': return True if os.path.basename(path) != 'Dockerfile': return True with open(path, 'r') as file: dockerfile = file.read() if 'RUN apt install' in dockerfile or 'RUN apt-get install' in dockerfile: print('Please add an "apt-get update" before "apt-get install". ' 'Otherwise, a cached and outdated RUN layer may lead to install ' 'failures.') return False return True def check_apt_update(paths): """Checks that all Dockerfile use apt-update before apt-install""" return all([_check_one_apt_update(path) for path in paths]) def do_checks(changed_files): """Runs all presubmit checks. Returns False if any fails.""" checks = [ check_license, yapf, check_project_yaml, check_lib_fuzzing_engine, check_seed_corpus, check_apt_update, ] # Use a list comprehension here and in other cases where we use all() so that # we don't quit early on failure. This is more user-friendly since the more # errors we spit out at once, the less frequently the less check-fix-check # cycles they need to do. return all([check(changed_files) for check in checks]) _CHECK_LICENSE_FILENAMES = ['Dockerfile'] _CHECK_LICENSE_EXTENSIONS = [ '.bash', '.c', '.cc', '.cpp', '.css', '.Dockerfile', '.go', '.h', '.htm', '.html', '.java', '.js', '.proto', '.py', '.rs', '.sh', '.ts', ] THIRD_PARTY_DIR_NAME = 'third_party' _LICENSE_STRING = 'http://www.apache.org/licenses/LICENSE-2.0' def check_license(paths): """Validates license header.""" if not paths: return True success = True for path in paths: path_parts = str(path).split(os.sep) if any(path_part == THIRD_PARTY_DIR_NAME for path_part in path_parts): continue filename = os.path.basename(path) extension = os.path.splitext(path)[1] if (filename not in _CHECK_LICENSE_FILENAMES and extension not in _CHECK_LICENSE_EXTENSIONS): continue with open(path) as file_handle: if _LICENSE_STRING not in file_handle.read(): print('Missing license header in file %s.' % str(path)) success = False return success def bool_to_returncode(success): """Returns 0 if |success|. Otherwise returns 1.""" if success: print('Success.') return 0 print('Failed.') return 1 def is_nonfuzzer_python(path): """Returns True if |path| ends in .py.""" return os.path.splitext(path)[1] == '.py' and '/projects/' not in path def lint(_=None): """Runs python's linter on infra. Returns False if it fails linting.""" # Use --score no to make linting quieter. command = ['python3', '-m', 'pylint', '--score', 'no', '-j', '0', 'infra'] returncode = subprocess.run(command, check=False).returncode return returncode == 0 def yapf(paths, validate=True): """Does yapf on |path| if it is Python file. Only validates format if |validate|. Otherwise, formats the file. Returns False if validation or formatting fails.""" paths = [path for path in paths if is_nonfuzzer_python(path)] if not paths: return True validate_argument = '-d' if validate else '-i' command = ['yapf', validate_argument, '-p'] command.extend(paths) returncode = subprocess.run(command, check=False).returncode return returncode == 0 def get_changed_files(): """Returns a list of absolute paths of files changed in this git branch.""" branch_commit_hash = subprocess.check_output( ['git', 'merge-base', 'HEAD', 'origin/HEAD']).strip().decode() diff_commands = [ # Return list of modified files in the commits on this branch. ['git', 'diff', '--name-only', branch_commit_hash + '..'], # Return list of modified files from uncommitted changes. ['git', 'diff', '--name-only'] ] changed_files = set() for command in diff_commands: file_paths = subprocess.check_output(command).decode().splitlines() for file_path in file_paths: if not os.path.isfile(file_path): continue changed_files.add(file_path) print(f'Changed files: {" ".join(changed_files)}') return [os.path.abspath(f) for f in changed_files] def run_build_tests(): """Runs build tests because they can't be run in parallel.""" suite_list = [ unittest.TestLoader().discover(os.path.join(_SRC_ROOT, 'infra', 'build'), pattern='*_test.py'), ] suite = unittest.TestSuite(suite_list) print('Running build tests.') result = unittest.TextTestRunner().run(suite) return not result.failures and not result.errors def run_nonbuild_tests(parallel): """Runs all tests but build tests. Does them in parallel if |parallel|. The reason why we exclude build tests is because they use an emulator that prevents them from being used in parallel.""" # We look for all project directories because otherwise pytest won't run tests # that are not in valid modules (e.g. "base-images"). relevant_dirs = set() all_files = get_all_files() for file_path in all_files: directory = os.path.dirname(file_path) relevant_dirs.add(directory) # Use ignore-glob because ignore doesn't seem to work properly with the way we # pass directories to pytest. command = [ 'pytest', '--ignore-glob=infra/build/*', '--ignore-glob=projects/*', ] if parallel: command.extend(['-n', 'auto']) command += list(relevant_dirs) print('Running non-build tests.') # TODO(metzman): Get rid of this once config_utils stops using it. env = os.environ.copy() env['CIFUZZ_TEST'] = '1' return subprocess.run(command, check=False, env=env).returncode == 0 def run_tests(_=None, parallel=False, build_tests=True, nonbuild_tests=True): """Runs all unit tests.""" build_success = True nonbuild_success = True if nonbuild_tests: nonbuild_success = run_nonbuild_tests(parallel) else: print('Skipping nonbuild tests as specified.') if build_tests: build_success = run_build_tests() else: print('Skipping build tests as specified.') return nonbuild_success and build_success def run_systemsan_tests(_=None): """Runs SystemSan unit tests.""" command = ['make', 'test'] return subprocess.run(command, cwd='infra/experimental/SystemSan', check=False).returncode == 0 def get_all_files(): """Returns a list of absolute paths of files in this repo.""" get_all_files_command = ['git', 'ls-files'] output = subprocess.check_output(get_all_files_command).decode().splitlines() return [os.path.abspath(path) for path in output if os.path.isfile(path)] def main(): """Check changes on a branch for common issues before submitting.""" # Get program arguments. parser = argparse.ArgumentParser(description='Presubmit script for oss-fuzz.') parser.add_argument( 'command', choices=['format', 'lint', 'license', 'infra-tests', 'systemsan-tests'], nargs='?') parser.add_argument('-a', '--all-files', action='store_true', help='Run presubmit check(s) on all files', default=False) parser.add_argument('-p', '--parallel', action='store_true', help='Run tests in parallel.', default=False) parser.add_argument('-s', '--skip-build-tests', action='store_true', help='Skip build tests which are slow and must run ' 'sequentially.', default=False) parser.add_argument('-n', '--skip-nonbuild-tests', action='store_true', help='Only do build tests.', default=False) args = parser.parse_args() if args.all_files: relevant_files = get_all_files() else: relevant_files = get_changed_files() os.chdir(_SRC_ROOT) # Do one specific check if the user asked for it. if args.command == 'format': success = yapf(relevant_files, False) return bool_to_returncode(success) if args.command == 'lint': success = lint() return bool_to_returncode(success) if args.command == 'license': success = check_license(relevant_files) return bool_to_returncode(success) if args.command == 'infra-tests': success = run_tests(relevant_files, parallel=args.parallel, build_tests=(not args.skip_build_tests), nonbuild_tests=(not args.skip_nonbuild_tests)) return bool_to_returncode(success) if args.command == 'systemsan-tests': success = run_systemsan_tests(relevant_files) return bool_to_returncode(success) # Do all the checks (but no tests). success = do_checks(relevant_files) return bool_to_returncode(success) if __name__ == '__main__': sys.exit(main())