[infra] Add presubmit script (#3196)

This commit is contained in:
jonathanmetzman 2020-01-10 10:19:42 -08:00 committed by GitHub
parent 40a6d7ffdb
commit 363d00ba5f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 914 additions and 0 deletions

588
.pylintrc Normal file
View File

@ -0,0 +1,588 @@
[MASTER]
# A comma-separated list of package or module names from where C extensions may
# be loaded. Extensions are loading into the active Python interpreter and may
# run arbitrary code.
extension-pkg-whitelist=
# Add files or directories to the blacklist. They should be base names, not
# paths.
ignore=CVS
# Add files or directories matching the regex patterns to the blacklist. The
# regex matches against base names, not paths.
ignore-patterns=
# Python code to execute, usually for sys.path manipulation such as
# pygtk.require().
#init-hook=
# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
# number of processors available to use.
jobs=1
# Control the amount of potential inferred values when inferring a single
# object. This can help the performance when dealing with large functions or
# complex, nested conditions.
limit-inference-results=100
# List of plugins (as comma separated values of python module names) to load,
# usually to register additional checkers.
load-plugins=
# Pickle collected data for later comparisons.
persistent=yes
# Specify a configuration file.
#rcfile=
# When enabled, pylint would attempt to guess common misconfiguration and emit
# user-friendly hints instead of false-positive error messages.
suggestion-mode=yes
# Allow loading of arbitrary C extensions. Extensions are imported into the
# active Python interpreter and may run arbitrary code.
unsafe-load-any-extension=no
[MESSAGES CONTROL]
# Only show warnings with the listed confidence levels. Leave empty to show
# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED.
confidence=
# Disable the message, report, category or checker with the given id(s). You
# can either give multiple identifiers separated by comma (,) or put this
# option multiple times (only on the command line, not in the configuration
# file where it should appear only once). You can also use "--disable=all" to
# disable everything first and then reenable specific checks. For example, if
# you want to run only the similarities checker, you can use "--disable=all
# --enable=similarities". If you want to run only the classes checker, but have
# no Warning level messages displayed, use "--disable=all --enable=classes
# --disable=W".
disable=print-statement,
parameter-unpacking,
unpacking-in-except,
old-raise-syntax,
backtick,
long-suffix,
old-ne-operator,
old-octal-literal,
import-star-module-level,
non-ascii-bytes-literal,
raw-checker-failed,
bad-inline-option,
locally-disabled,
file-ignored,
suppressed-message,
useless-suppression,
deprecated-pragma,
use-symbolic-message-instead,
apply-builtin,
basestring-builtin,
buffer-builtin,
cmp-builtin,
coerce-builtin,
execfile-builtin,
file-builtin,
long-builtin,
raw_input-builtin,
reduce-builtin,
standarderror-builtin,
unicode-builtin,
xrange-builtin,
coerce-method,
delslice-method,
getslice-method,
setslice-method,
no-absolute-import,
old-division,
dict-iter-method,
dict-view-method,
next-method-called,
metaclass-assignment,
indexing-exception,
raising-string,
reload-builtin,
oct-method,
hex-method,
nonzero-method,
cmp-method,
input-builtin,
round-builtin,
intern-builtin,
unichr-builtin,
map-builtin-not-iterating,
zip-builtin-not-iterating,
range-builtin-not-iterating,
filter-builtin-not-iterating,
using-cmp-argument,
eq-without-hash,
div-method,
idiv-method,
rdiv-method,
exception-message-attribute,
invalid-str-codec,
sys-max-int,
bad-python3-import,
deprecated-string-function,
deprecated-str-translate-call,
deprecated-itertools-function,
deprecated-types-field,
next-method-defined,
dict-items-not-iterating,
dict-keys-not-iterating,
dict-values-not-iterating,
deprecated-operator-function,
deprecated-urllib-function,
xreadlines-attribute,
deprecated-sys-function,
exception-escape,
comprehension-escape,
fixme
# Enable the message, report, category or checker with the given id(s). You can
# either give multiple identifier separated by comma (,) or put this option
# multiple time (only on the command line, not in the configuration file where
# it should appear only once). See also the "--disable" option for examples.
enable=c-extension-no-member
[REPORTS]
# Python expression which should return a score less than or equal to 10. You
# have access to the variables 'error', 'warning', 'refactor', and 'convention'
# which contain the number of messages in each category, as well as 'statement'
# which is the total number of statements analyzed. This score is used by the
# global evaluation report (RP0004).
evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
# Template used to display messages. This is a python new-style format string
# used to format the message information. See doc for all details.
#msg-template=
# Set the output format. Available formats are text, parseable, colorized, json
# and msvs (visual studio). You can also give a reporter class, e.g.
# mypackage.mymodule.MyReporterClass.
output-format=text
# Tells whether to display a full report or only the messages.
reports=no
# Activate the evaluation score.
score=yes
[REFACTORING]
# Maximum number of nested blocks for function / method body
max-nested-blocks=5
# Complete name of functions that never returns. When checking for
# inconsistent-return-statements if a never returning function is called then
# it will be considered as an explicit return statement and no message will be
# printed.
never-returning-functions=sys.exit
[BASIC]
# Naming style matching correct argument names.
argument-naming-style=snake_case
# Regular expression matching correct argument names. Overrides argument-
# naming-style.
#argument-rgx=
# Naming style matching correct attribute names.
attr-naming-style=snake_case
# Regular expression matching correct attribute names. Overrides attr-naming-
# style.
#attr-rgx=
# Bad variable names which should always be refused, separated by a comma.
bad-names=foo,
bar,
baz,
toto,
tutu,
tata
# Naming style matching correct class attribute names.
class-attribute-naming-style=any
# Regular expression matching correct class attribute names. Overrides class-
# attribute-naming-style.
#class-attribute-rgx=
# Naming style matching correct class names.
class-naming-style=PascalCase
# Regular expression matching correct class names. Overrides class-naming-
# style.
#class-rgx=
# Naming style matching correct constant names.
const-naming-style=UPPER_CASE
# Regular expression matching correct constant names. Overrides const-naming-
# style.
#const-rgx=
# Minimum line length for functions/classes that require docstrings, shorter
# ones are exempt.
docstring-min-length=-1
# Naming style matching correct function names.
function-naming-style=snake_case
# Regular expression matching correct function names. Overrides function-
# naming-style.
#function-rgx=
# Good variable names which should always be accepted, separated by a comma.
good-names=i,
j,
k,
ex,
Run,
_
# Include a hint for the correct naming format with invalid-name.
include-naming-hint=no
# Naming style matching correct inline iteration names.
inlinevar-naming-style=any
# Regular expression matching correct inline iteration names. Overrides
# inlinevar-naming-style.
#inlinevar-rgx=
# Naming style matching correct method names.
method-naming-style=snake_case
# Regular expression matching correct method names. Overrides method-naming-
# style.
#method-rgx=
# Naming style matching correct module names.
module-naming-style=snake_case
# Regular expression matching correct module names. Overrides module-naming-
# style.
#module-rgx=
# Colon-delimited sets of names that determine each other's naming style when
# the name regexes allow several styles.
name-group=
# Regular expression which should only match function or class names that do
# not require a docstring.
no-docstring-rgx=^_
# List of decorators that produce properties, such as abc.abstractproperty. Add
# to this list to register other decorators that produce valid properties.
# These decorators are taken in consideration only for invalid-name.
property-classes=abc.abstractproperty
# Naming style matching correct variable names.
variable-naming-style=snake_case
# Regular expression matching correct variable names. Overrides variable-
# naming-style.
#variable-rgx=
[SIMILARITIES]
# Ignore comments when computing similarities.
ignore-comments=yes
# Ignore docstrings when computing similarities.
ignore-docstrings=yes
# Ignore imports when computing similarities.
ignore-imports=no
# Minimum lines number of a similarity.
min-similarity-lines=4
[FORMAT]
# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
expected-line-ending-format=
# Regexp for a line that is allowed to be longer than the limit.
ignore-long-lines=^\s*(# )?<?https?://\S+>?$
# Number of spaces of indent required inside a hanging or continued line.
indent-after-paren=4
# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
# tab).
indent-string=' '
# Maximum number of characters on a single line.
max-line-length=100
# Maximum number of lines in a module.
max-module-lines=1000
# List of optional constructs for which whitespace checking is disabled. `dict-
# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}.
# `trailing-comma` allows a space between comma and closing bracket: (a, ).
# `empty-line` allows space-only lines.
no-space-check=trailing-comma,
dict-separator
# Allow the body of a class to be on the same line as the declaration if body
# contains single statement.
single-line-class-stmt=no
# Allow the body of an if to be on the same line as the test if there is no
# else.
single-line-if-stmt=no
[MISCELLANEOUS]
# List of note tags to take in consideration, separated by a comma.
notes=FIXME,
XXX,
TODO
[TYPECHECK]
# List of decorators that produce context managers, such as
# contextlib.contextmanager. Add to this list to register other decorators that
# produce valid context managers.
contextmanager-decorators=contextlib.contextmanager
# List of members which are set dynamically and missed by pylint inference
# system, and so shouldn't trigger E1101 when accessed. Python regular
# expressions are accepted.
generated-members=
# Tells whether missing members accessed in mixin class should be ignored. A
# mixin class is detected if its name ends with "mixin" (case insensitive).
ignore-mixin-members=yes
# Tells whether to warn about missing members when the owner of the attribute
# is inferred to be None.
ignore-none=yes
# This flag controls whether pylint should warn about no-member and similar
# checks whenever an opaque object is returned when inferring. The inference
# can return multiple potential results while evaluating a Python object, but
# some branches might not be evaluated, which results in partial inference. In
# that case, it might be useful to still emit no-member and other checks for
# the rest of the inferred objects.
ignore-on-opaque-inference=yes
# List of class names for which member attributes should not be checked (useful
# for classes with dynamically set attributes). This supports the use of
# qualified names.
ignored-classes=optparse.Values,thread._local,_thread._local
# List of module names for which member attributes should not be checked
# (useful for modules/projects where namespaces are manipulated during runtime
# and thus existing member attributes cannot be deduced by static analysis). It
# supports qualified module names, as well as Unix pattern matching.
ignored-modules=
# Show a hint with possible names when a member name was not found. The aspect
# of finding the hint is based on edit distance.
missing-member-hint=yes
# The minimum edit distance a name should have in order to be considered a
# similar match for a missing member name.
missing-member-hint-distance=1
# The total number of similar names that should be taken in consideration when
# showing a hint for a missing member.
missing-member-max-choices=1
# List of decorators that change the signature of a decorated function.
signature-mutators=
[SPELLING]
# Limits count of emitted suggestions for spelling mistakes.
max-spelling-suggestions=4
# Spelling dictionary name. Available dictionaries: none. To make it work,
# install the python-enchant package.
spelling-dict=
# List of comma separated words that should not be checked.
spelling-ignore-words=
# A path to a file that contains the private dictionary; one word per line.
spelling-private-dict-file=
# Tells whether to store unknown words to the private dictionary (see the
# --spelling-private-dict-file option) instead of raising a message.
spelling-store-unknown-words=no
[VARIABLES]
# List of additional names supposed to be defined in builtins. Remember that
# you should avoid defining new builtins when possible.
additional-builtins=
# Tells whether unused global variables should be treated as a violation.
allow-global-unused-variables=yes
# List of strings which can identify a callback function by name. A callback
# name must start or end with one of those strings.
callbacks=cb_,
_cb
# A regular expression matching the name of dummy variables (i.e. expected to
# not be used).
dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
# Argument names that match this expression will be ignored. Default to name
# with leading underscore.
ignored-argument-names=_.*|^ignored_|^unused_
# Tells whether we should check for unused import in __init__ files.
init-import=no
# List of qualified module names which can have objects that can redefine
# builtins.
redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
[LOGGING]
# Format style used to check logging format string. `old` means using %
# formatting, `new` is for `{}` formatting,and `fstr` is for f-strings.
logging-format-style=old
# Logging modules to check that the string format arguments are in logging
# function parameter format.
logging-modules=logging
[STRING]
# This flag controls whether the implicit-str-concat-in-sequence should
# generate a warning on implicit string concatenation in sequences defined over
# several lines.
check-str-concat-over-line-jumps=no
[IMPORTS]
# List of modules that can be imported at any level, not just the top level
# one.
allow-any-import-level=
# Allow wildcard imports from modules that define __all__.
allow-wildcard-with-all=no
# Analyse import fallback blocks. This can be used to support both Python 2 and
# 3 compatible code, which means that the block might have code that exists
# only in one or another interpreter, leading to false positives when analysed.
analyse-fallback-blocks=no
# Deprecated modules which should not be used, separated by a comma.
deprecated-modules=optparse,tkinter.tix
# Create a graph of external dependencies in the given file (report RP0402 must
# not be disabled).
ext-import-graph=
# Create a graph of every (i.e. internal and external) dependencies in the
# given file (report RP0402 must not be disabled).
import-graph=
# Create a graph of internal dependencies in the given file (report RP0402 must
# not be disabled).
int-import-graph=
# Force import order to recognize a module as part of the standard
# compatibility libraries.
known-standard-library=
# Force import order to recognize a module as part of a third party library.
known-third-party=enchant
# Couples of modules and preferred modules, separated by a comma.
preferred-modules=
[CLASSES]
# List of method names used to declare (i.e. assign) instance attributes.
defining-attr-methods=__init__,
__new__,
setUp,
__post_init__
# List of member names, which should be excluded from the protected access
# warning.
exclude-protected=_asdict,
_fields,
_replace,
_source,
_make
# List of valid names for the first argument in a class method.
valid-classmethod-first-arg=cls
# List of valid names for the first argument in a metaclass class method.
valid-metaclass-classmethod-first-arg=cls
[DESIGN]
# Maximum number of arguments for function / method.
max-args=5
# Maximum number of attributes for a class (see R0902).
max-attributes=7
# Maximum number of boolean expressions in an if statement (see R0916).
max-bool-expr=5
# Maximum number of branch for function / method body.
max-branches=12
# Maximum number of locals for function / method body.
max-locals=15
# Maximum number of parents for a class (see R0901).
max-parents=7
# Maximum number of public methods for a class (see R0904).
max-public-methods=20
# Maximum number of return / yield for function / method body.
max-returns=6
# Maximum number of statements in function / method body.
max-statements=50
# Minimum number of public methods for a class (see R0903).
min-public-methods=2
[EXCEPTIONS]
# Exceptions that will emit a warning when being caught. Defaults to
# "BaseException, Exception".
overgeneral-exceptions=BaseException,
Exception
# String used as indentation unit. We differ from PEP8's normal 4 spaces.
indent-string=' '
# Maximum number of characters on a single line.
max-line-length=80

5
.style.yapf Normal file
View File

@ -0,0 +1,5 @@
[style]
based_on_style = google
column_limit = 80
indent_width = 2
split_before_named_assigns = true

View File

@ -12,6 +12,10 @@ install:
matrix:
include:
- name: "presubmit"
install:
- pip install -r infra/dev-requirements.txt
script: ./infra/presubmit.py
- name: "libfuzzer address x86_64"
env:
- TRAVIS_ENGINE=libfuzzer

View File

@ -0,0 +1,5 @@
# Requirements for submitting code changes to infra/ (needed by presubmit.py).
pylint==2.4.4
yapf==0.28.0
PyYAML==5.1

312
infra/presubmit.py Executable file
View File

@ -0,0 +1,312 @@
#!/usr/bin/env python3
# Copyright 2020 Google LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
################################################################################
"""Check code for common issues before submitting."""
import argparse
import os
import subprocess
import sys
import yaml
_SRC_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
def _is_project_file(actual_path, expected_filename):
"""Returns True if actual_path's name is |expected_filename| and is a file
that exists and is in in projects/."""
if os.path.basename(actual_path) != expected_filename:
return False
if os.path.basename(os.path.dirname(
os.path.dirname(actual_path))) != 'projects':
return False
return os.path.exists(actual_path)
# TODO: Check for -fsanitize=fuzzer in files as well.
def _check_one_lib_fuzzing_engine(build_sh_file):
"""Returns False if |build_sh_file| contains -lFuzzingEngine.
This is deprecated behavior. $LIB_FUZZING_ENGINE should be used instead
so that -fsanitize=fuzzer is used."""
if not _is_project_file(build_sh_file, 'build.sh'):
return True
with open(build_sh_file) as build_sh:
build_sh_lines = build_sh.readlines()
for line_num, line in enumerate(build_sh_lines):
uncommented_code = line.split('#')[0]
if '-lFuzzingEngine' in uncommented_code:
print(
'Error: build.sh contains deprecated "-lFuzzingEngine" on line: {0}. '
'Please use "$LIB_FUZZING_ENGINE" instead.'.format(line_num))
return False
return True
def check_lib_fuzzing_engine(paths):
"""Call _check_one_lib_fuzzing_engine on each path in |paths|. Return True if
the result of every call is True."""
return all([_check_one_lib_fuzzing_engine(path) for path in paths])
class ProjectYamlChecker:
"""Checks for a project.yaml file."""
# Sections in a project.yaml and the constant values that they are allowed
# to have.
SECTIONS_AND_CONSTANTS = {
'sanitizers': {'address', 'none', 'memory', 'undefined', 'dataflow'},
'architectures': {'i386', 'x86_64'},
'engines': {'afl', 'libfuzzer', 'honggfuzz', 'dataflow'}
}
# Note: this list must be updated when we allow new sections.
VALID_SECTION_NAMES = [
'homepage', 'primary_contact', 'auto_ccs', 'sanitizers', 'architectures',
'disabled', 'view_restrictions', 'coverage_extra_args', 'vendor_ccs'
]
# Note that some projects like boost only have auto-ccs. However, forgetting
# primary contact is probably a mistake.
REQUIRED_SECTIONS = ['primary_contact']
def __init__(self, filename):
self.filename = filename
with open(filename) as file_handle:
self.data = yaml.safe_load(file_handle)
self.success = True
def do_checks(self):
"""Do all project.yaml checks. Return True if they pass."""
if self.is_disabled():
return True
checks = [
self.check_project_yaml_constants, self.check_required_sections,
self.check_valid_section_names, self.check_valid_emails
]
for check_function in checks:
check_function()
return self.success
def is_disabled(self):
"""Is this project disabled."""
return self.data.get('disabled', False)
def print_error(self, message, *args):
"""Print an error message and set self.success to False."""
self.success = False
message = message % args
print('Error in %s: %s' % (self.filename, message))
def check_project_yaml_constants(self):
"""Check that certain sections only have certain constant values."""
for section, allowed_constants in self.SECTIONS_AND_CONSTANTS.items():
if section not in self.data:
continue
actual_constants = self.data[section]
for constant in actual_constants:
if constant not in allowed_constants:
self.print_error('%s (in %s section) is not one of %s', constant,
section, allowed_constants)
def check_valid_section_names(self):
"""Check that only valid sections are included."""
for name in self.data:
if name not in self.VALID_SECTION_NAMES:
self.print_error('%s not a valid section name (%s)', name,
self.VALID_SECTION_NAMES)
def check_required_sections(self):
"""Check that all required sections are present."""
for section in self.REQUIRED_SECTIONS:
if section not in self.data:
self.print_error('No %s section.', section)
def check_valid_emails(self):
"""Check that emails are valid looking."""
# Get email addresses.
email_addresses = []
for section in ['auto_ccs', 'primary_contact']:
email_addresses.extend(self.data.get(section, []))
# Sanity check them.
for email_address in email_addresses:
if '@' not in email_address or '.' not in email_address:
self.print_error('%s is an invalid email address.', email_address)
def _check_one_project_yaml(project_yaml_filename):
"""Do checks on the project.yaml file."""
if not _is_project_file(project_yaml_filename, 'project.yaml'):
return True
checker = ProjectYamlChecker(project_yaml_filename)
return checker.do_checks()
def check_project_yaml(paths):
"""Call _check_one_project_yaml on each path in |paths|. Return True if
the result of every call is True."""
return all([_check_one_project_yaml(path) for path in paths])
def do_checks(changed_files):
"""Run all presubmit checks return False if any fails."""
checks = [
check_license, yapf, lint, check_project_yaml, check_lib_fuzzing_engine
]
# Use a list comprehension here and in other cases where we use all() so that
# we don't quit early on failure. This is more user-friendly since the more
# errors we spit out at once, the less frequently the less check-fix-check
# cycles they need to do.
return all([check(changed_files) for check in checks])
_CHECK_LICENSE_FILENAMES = ['Dockerfile']
_CHECK_LICENSE_EXTENSIONS = [
'.bash',
'.c',
'.cc',
'.cpp',
'.css',
'.h',
'.htm',
'.html',
'.js',
'.proto',
'.py',
'.sh',
'.yaml',
]
_LICENSE_STRING = 'http://www.apache.org/licenses/LICENSE-2.0'
def check_license(paths):
"""Validate license header."""
if not paths:
return True
success = True
for path in paths:
filename = os.path.basename(path)
extension = os.path.splitext(path)[1]
if (filename not in _CHECK_LICENSE_FILENAMES and
extension not in _CHECK_LICENSE_EXTENSIONS):
continue
with open(path) as file_handle:
if _LICENSE_STRING not in file_handle.read():
print('Missing license header in file %s.' % str(path))
success = False
return success
def bool_to_returncode(success):
"""Return 0 if |success|. Otherwise return 1."""
if success:
print('Success.')
return 0
print('Failed.')
return 1
def is_python(path):
"""Returns True if |path| ends in .py."""
return os.path.splitext(path)[1] == '.py'
def lint(paths):
"""Run python's linter on |paths| if it is a python file. Return False if it
fails linting."""
paths = [path for path in paths if is_python(path)]
if not paths:
return True
command = ['python3', '-m', 'pylint', '-j', '0']
command.extend(paths)
returncode = subprocess.run(command, check=False).returncode
return returncode == 0
def yapf(paths, validate=True):
"""Do yapf on |path| if it is Python file. Only validates format if
|validate| otherwise, formats the file. Returns False if validation
or formatting fails."""
paths = [path for path in paths if is_python(path)]
if not paths:
return True
validate_argument = '-d' if validate else '-i'
command = ['yapf', validate_argument, '-p']
command.extend(paths)
returncode = subprocess.run(command, check=False).returncode
return returncode == 0
def get_changed_files():
"""Return a list of absolute paths of files changed in this git branch."""
# FIXME: This doesn't work if branch is behind master.
diff_command = ['git', 'diff', '--name-only', 'FETCH_HEAD']
return [
os.path.abspath(path)
for path in subprocess.check_output(diff_command).decode().splitlines()
if os.path.isfile(path)
]
def main():
"""Check changes on a branch for common issues before submitting."""
# Get program arguments.
parser = argparse.ArgumentParser(description='Presubmit script for oss-fuzz.')
parser.add_argument('command',
choices=['format', 'lint', 'license'],
nargs='?')
args = parser.parse_args()
changed_files = get_changed_files()
os.chdir(_SRC_ROOT)
# Do one specific check if the user asked for it.
if args.command == 'format':
success = yapf(changed_files, False)
return bool_to_returncode(success)
if args.command == 'lint':
success = lint(changed_files)
return bool_to_returncode(success)
if args.command == 'license':
success = check_license(changed_files)
return bool_to_returncode(success)
# Otherwise, do all of them.
success = do_checks(changed_files)
return bool_to_returncode(success)
if __name__ == '__main__':
sys.exit(main())