gh-90110: Fix the c-analyzer Tool (gh-96731)

This includes:

* update the whitelists
* fixes so we can stop ignoring some of the files
* ensure Include/cpython/*.h get analyzed
This commit is contained in:
Eric Snow 2022-09-12 11:09:31 -06:00 committed by GitHub
parent 662782e95f
commit 1756ffd66a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 499 additions and 118 deletions

View File

@ -18,7 +18,6 @@ extern "C" {
#include "pycore_exceptions.h" // struct _Py_exc_state
#include "pycore_floatobject.h" // struct _Py_float_state
#include "pycore_genobject.h" // struct _Py_async_gen_state
#include "pycore_gil.h" // struct _gil_runtime_state
#include "pycore_gc.h" // struct _gc_runtime_state
#include "pycore_list.h" // struct _Py_list_state
#include "pycore_tuple.h" // struct _Py_tuple_state

View File

@ -104,6 +104,25 @@ def format_filename(filename, relroot=USE_CWD, *,
return filename
def match_path_tail(path1, path2):
"""Return True if one path ends the other."""
if path1 == path2:
return True
if os.path.isabs(path1):
if os.path.isabs(path2):
return False
return _match_tail(path1, path2)
elif os.path.isabs(path2):
return _match_tail(path2, path1)
else:
return _match_tail(path1, path2) or _match_tail(path2, path1)
def _match_tail(path, tail):
assert not os.path.isabs(tail), repr(tail)
return path.endswith(os.path.sep + tail)
##################################
# find files

View File

@ -22,8 +22,12 @@ def parse_files(filenames, *,
if get_file_preprocessor is None:
get_file_preprocessor = _get_preprocessor()
for filename in filenames:
yield from _parse_file(
filename, match_kind, get_file_preprocessor, file_maxsizes)
try:
yield from _parse_file(
filename, match_kind, get_file_preprocessor, file_maxsizes)
except Exception:
print(f'# requested file: <{filename}>')
raise # re-raise
def _parse_file(filename, match_kind, get_file_preprocessor, maxsizes):

View File

@ -35,9 +35,11 @@
def preprocess(source, *,
incldirs=None,
includes=None,
macros=None,
samefiles=None,
filename=None,
cwd=None,
tool=True,
):
"""...
@ -45,17 +47,27 @@ def preprocess(source, *,
CWD should be the project root and "source" should be relative.
"""
if tool:
logger.debug(f'CWD: {os.getcwd()!r}')
logger.debug(f'incldirs: {incldirs!r}')
logger.debug(f'macros: {macros!r}')
if not cwd:
cwd = os.getcwd()
logger.debug(f'CWD: {cwd!r}')
logger.debug(f'incldirs: {incldirs!r}')
logger.debug(f'includes: {includes!r}')
logger.debug(f'macros: {macros!r}')
logger.debug(f'samefiles: {samefiles!r}')
_preprocess = _get_preprocessor(tool)
with _good_file(source, filename) as source:
return _preprocess(source, incldirs, macros, samefiles) or ()
return _preprocess(
source,
incldirs,
includes,
macros,
samefiles,
cwd,
) or ()
else:
source, filename = _resolve_source(source, filename)
# We ignore "includes", "macros", etc.
return _pure.preprocess(source, filename)
return _pure.preprocess(source, filename, cwd)
# if _run() returns just the lines:
# text = _run(source)
@ -72,6 +84,7 @@ def preprocess(source, *,
def get_preprocessor(*,
file_macros=None,
file_includes=None,
file_incldirs=None,
file_same=None,
ignore_exc=False,
@ -80,10 +93,12 @@ def get_preprocessor(*,
_preprocess = preprocess
if file_macros:
file_macros = tuple(_parse_macros(file_macros))
if file_includes:
file_includes = tuple(_parse_includes(file_includes))
if file_incldirs:
file_incldirs = tuple(_parse_incldirs(file_incldirs))
if file_same:
file_same = tuple(file_same)
file_same = dict(file_same or ())
if not callable(ignore_exc):
ignore_exc = (lambda exc, _ig=ignore_exc: _ig)
@ -91,16 +106,26 @@ def get_file_preprocessor(filename):
filename = filename.strip()
if file_macros:
macros = list(_resolve_file_values(filename, file_macros))
if file_includes:
# There's a small chance we could need to filter out any
# includes that import "filename". It isn't clear that it's
# a problem any longer. If we do end up filtering then
# it may make sense to use c_common.fsutil.match_path_tail().
includes = [i for i, in _resolve_file_values(filename, file_includes)]
if file_incldirs:
incldirs = [v for v, in _resolve_file_values(filename, file_incldirs)]
if file_same:
samefiles = _resolve_samefiles(filename, file_same)
def preprocess(**kwargs):
if file_macros and 'macros' not in kwargs:
kwargs['macros'] = macros
if file_includes and 'includes' not in kwargs:
kwargs['includes'] = includes
if file_incldirs and 'incldirs' not in kwargs:
kwargs['incldirs'] = [v for v, in _resolve_file_values(filename, file_incldirs)]
if file_same and 'file_same' not in kwargs:
kwargs['samefiles'] = file_same
kwargs['incldirs'] = incldirs
if file_same and 'samefiles' not in kwargs:
kwargs['samefiles'] = samefiles
kwargs.setdefault('filename', filename)
with handling_errors(ignore_exc, log_err=log_err):
return _preprocess(filename, **kwargs)
@ -120,6 +145,11 @@ def _parse_macros(macros):
yield row
def _parse_includes(includes):
for row, srcfile in _parse_table(includes, '\t', 'glob\tinclude', default=None):
yield row
def _parse_incldirs(incldirs):
for row, srcfile in _parse_table(incldirs, '\t', 'glob\tdirname', default=None):
glob, dirname = row
@ -130,6 +160,43 @@ def _parse_incldirs(incldirs):
yield row
def _resolve_samefiles(filename, file_same):
assert '*' not in filename, (filename,)
assert os.path.normpath(filename) == filename, (filename,)
_, suffix = os.path.splitext(filename)
samefiles = []
for patterns, in _resolve_file_values(filename, file_same.items()):
for pattern in patterns:
same = _resolve_samefile(filename, pattern, suffix)
if not same:
continue
samefiles.append(same)
return samefiles
def _resolve_samefile(filename, pattern, suffix):
if pattern == filename:
return None
if pattern.endswith(os.path.sep):
pattern += f'*{suffix}'
assert os.path.normpath(pattern) == pattern, (pattern,)
if '*' in os.path.dirname(pattern):
raise NotImplementedError((filename, pattern))
if '*' not in os.path.basename(pattern):
return pattern
common = os.path.commonpath([filename, pattern])
relpattern = pattern[len(common) + len(os.path.sep):]
relpatterndir = os.path.dirname(relpattern)
relfile = filename[len(common) + len(os.path.sep):]
if os.path.basename(pattern) == '*':
return os.path.join(common, relpatterndir, relfile)
elif os.path.basename(relpattern) == '*' + suffix:
return os.path.join(common, relpatterndir, relfile)
else:
raise NotImplementedError((filename, pattern))
@contextlib.contextmanager
def handling_errors(ignore_exc=None, *, log_err=None):
try:

View File

@ -44,7 +44,7 @@ def run_cmd(argv, *,
return proc.stdout
def preprocess(tool, filename, **kwargs):
def preprocess(tool, filename, cwd=None, **kwargs):
argv = _build_argv(tool, filename, **kwargs)
logger.debug(' '.join(shlex.quote(v) for v in argv))
@ -59,19 +59,24 @@ def preprocess(tool, filename, **kwargs):
# distutil compiler object's preprocess() method, since that
# one writes to stdout/stderr and it's simpler to do it directly
# through subprocess.
return run_cmd(argv)
return run_cmd(argv, cwd=cwd)
def _build_argv(
tool,
filename,
incldirs=None,
includes=None,
macros=None,
preargs=None,
postargs=None,
executable=None,
compiler=None,
):
if includes:
includes = tuple(f'-include{i}' for i in includes)
postargs = (includes + postargs) if postargs else includes
compiler = distutils.ccompiler.new_compiler(
compiler=compiler or tool,
)

View File

@ -7,7 +7,12 @@
TOOL = 'gcc'
# https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
LINE_MARKER_RE = re.compile(r'^# (\d+) "([^"]+)"(?: [1234])*$')
# flags:
# 1 start of a new file
# 2 returning to a file (after including another)
# 3 following text comes from a system header file
# 4 following text treated wrapped in implicit extern "C" block
LINE_MARKER_RE = re.compile(r'^# (\d+) "([^"]+)"((?: [1234])*)$')
PREPROC_DIRECTIVE_RE = re.compile(r'^\s*#\s*(\w+)\b.*')
COMPILER_DIRECTIVE_RE = re.compile(r'''
^
@ -40,32 +45,112 @@
)
def preprocess(filename, incldirs=None, macros=None, samefiles=None):
def preprocess(filename,
incldirs=None,
includes=None,
macros=None,
samefiles=None,
cwd=None,
):
if not cwd or not os.path.isabs(cwd):
cwd = os.path.abspath(cwd or '.')
filename = _normpath(filename, cwd)
text = _common.preprocess(
TOOL,
filename,
incldirs=incldirs,
includes=includes,
macros=macros,
#preargs=PRE_ARGS,
postargs=POST_ARGS,
executable=['gcc'],
compiler='unix',
cwd=cwd,
)
return _iter_lines(text, filename, samefiles)
return _iter_lines(text, filename, samefiles, cwd)
def _iter_lines(text, filename, samefiles, *, raw=False):
def _iter_lines(text, reqfile, samefiles, cwd, raw=False):
lines = iter(text.splitlines())
# Build the lines and filter out directives.
partial = 0 # depth
origfile = None
# The first line is special.
# The next two lines are consistent.
for expected in [
f'# 1 "{reqfile}"',
'# 1 "<built-in>"',
'# 1 "<command-line>"',
]:
line = next(lines)
if line != expected:
raise NotImplementedError((line, expected))
# Do all the CLI-provided includes.
filter_reqfile = (lambda f: _filter_reqfile(f, reqfile, samefiles))
make_info = (lambda lno: _common.FileInfo(reqfile, lno))
last = None
for line in lines:
m = LINE_MARKER_RE.match(line)
if m:
lno, origfile = m.groups()
lno = int(lno)
elif _filter_orig_file(origfile, filename, samefiles):
assert last != reqfile, (last,)
lno, included, flags = _parse_marker_line(line, reqfile)
if not included:
raise NotImplementedError((line,))
if included == reqfile:
# This will be the last one.
assert not flags, (line, flags)
else:
assert 1 in flags, (line, flags)
yield from _iter_top_include_lines(
lines,
_normpath(included, cwd),
cwd,
filter_reqfile,
make_info,
raw,
)
last = included
# The last one is always the requested file.
assert included == reqfile, (line,)
def _iter_top_include_lines(lines, topfile, cwd,
filter_reqfile, make_info,
raw):
partial = 0 # depth
files = [topfile]
# We start at 1 in case there are source lines (including blank onces)
# before the first marker line. Also, we already verified in
# _parse_marker_line() that the preprocessor reported lno as 1.
lno = 1
for line in lines:
if line == '# 1 "<command-line>" 2':
# We're done with this top-level include.
return
_lno, included, flags = _parse_marker_line(line)
if included:
lno = _lno
included = _normpath(included, cwd)
# We hit a marker line.
if 1 in flags:
# We're entering a file.
# XXX Cycles are unexpected?
#assert included not in files, (line, files)
files.append(included)
elif 2 in flags:
# We're returning to a file.
assert files and included in files, (line, files)
assert included != files[-1], (line, files)
while files[-1] != included:
files.pop()
# XXX How can a file return to line 1?
#assert lno > 1, (line, lno)
else:
# It's the next line from the file.
assert included == files[-1], (line, files)
assert lno > 1, (line, lno)
elif not files:
raise NotImplementedError((line,))
elif filter_reqfile(files[-1]):
assert lno is not None, (line, files[-1])
if (m := PREPROC_DIRECTIVE_RE.match(line)):
name, = m.groups()
if name != 'pragma':
@ -74,7 +159,7 @@ def _iter_lines(text, filename, samefiles, *, raw=False):
if not raw:
line, partial = _strip_directives(line, partial=partial)
yield _common.SourceLine(
_common.FileInfo(filename, lno),
make_info(lno),
'source',
line or '',
None,
@ -82,6 +167,34 @@ def _iter_lines(text, filename, samefiles, *, raw=False):
lno += 1
def _parse_marker_line(line, reqfile=None):
m = LINE_MARKER_RE.match(line)
if not m:
return None, None, None
lno, origfile, flags = m.groups()
lno = int(lno)
assert lno > 0, (line, lno)
assert origfile not in ('<built-in>', '<command-line>'), (line,)
flags = set(int(f) for f in flags.split()) if flags else ()
if 1 in flags:
# We're entering a file.
assert lno == 1, (line, lno)
assert 2 not in flags, (line,)
elif 2 in flags:
# We're returning to a file.
#assert lno > 1, (line, lno)
pass
elif reqfile and origfile == reqfile:
# We're starting the requested file.
assert lno == 1, (line, lno)
assert not flags, (line, flags)
else:
# It's the next line from the file.
assert lno > 1, (line, lno)
return lno, origfile, flags
def _strip_directives(line, partial=0):
# We assume there are no string literals with parens in directive bodies.
while partial > 0:
@ -106,18 +219,16 @@ def _strip_directives(line, partial=0):
return line, partial
def _filter_orig_file(origfile, current, samefiles):
if origfile == current:
def _filter_reqfile(current, reqfile, samefiles):
if current == reqfile:
return True
if origfile == '<stdin>':
if current == '<stdin>':
return True
if current in samefiles:
return True
if os.path.isabs(origfile):
return False
for filename in samefiles or ():
if filename.endswith(os.path.sep):
filename += os.path.basename(current)
if origfile == filename:
return True
return False
def _normpath(filename, cwd):
assert cwd
return os.path.normpath(os.path.join(cwd, filename))

View File

@ -4,7 +4,7 @@
from . import common as _common
def preprocess(lines, filename=None):
def preprocess(lines, filename=None, cwd=None):
if isinstance(lines, str):
with _open_source(lines, filename) as (lines, filename):
yield from preprocess(lines, filename)

View File

@ -50,9 +50,6 @@ def clean_lines(text):
EXCLUDED = clean_lines('''
# @begin=conf@
# Rather than fixing for this one, we manually make sure it's okay.
Modules/_sha3/kcp/KeccakP-1600-opt64.c
# OSX
#Modules/_ctypes/darwin/*.c
#Modules/_ctypes/libffi_osx/*.c
@ -69,12 +66,11 @@ def clean_lines(text):
Python/dynload_aix.c # sys/ldr.h
Python/dynload_dl.c # dl.h
Python/dynload_hpux.c # dl.h
Python/thread_pthread.h
Python/emscripten_signal.c
Python/thread_pthread.h
Python/thread_pthread_stubs.h
# only huge constants (safe but parsing is slow)
Modules/_blake2/impl/blake2-kat.h
Modules/_ssl_data.h
Modules/_ssl_data_300.h
Modules/_ssl_data_111.h
@ -93,20 +89,9 @@ def clean_lines(text):
# XXX Fix the parser.
EXCLUDED += clean_lines('''
# The tool should be able to parse these...
Modules/hashlib.h
Objects/stringlib/codecs.h
Objects/stringlib/count.h
Objects/stringlib/ctype.h
Objects/stringlib/fastsearch.h
Objects/stringlib/find.h
Objects/stringlib/find_max_char.h
Objects/stringlib/partition.h
Objects/stringlib/replace.h
Objects/stringlib/split.h
Modules/_dbmmodule.c
Modules/cjkcodecs/_codecs_*.c
# The problem with xmlparse.c is that something
# has gone wrong where # we handle "maybe inline actual"
# in Tools/c-analyzer/c_parser/parser/_global.py.
Modules/expat/xmlparse.c
''')
@ -121,6 +106,44 @@ def clean_lines(text):
Modules/_tkinter.c /usr/include/tcl8.6
Modules/tkappinit.c /usr/include/tcl
Modules/_decimal/**/*.c Modules/_decimal/libmpdec
Objects/stringlib/*.h Objects
# @end=tsv@
''')[1:]
INCLUDES = clean_lines('''
# @begin=tsv@
glob include
**/*.h Python.h
Include/**/*.h object.h
# for Py_HAVE_CONDVAR
Include/internal/pycore_gil.h pycore_condvar.h
Python/thread_pthread.h pycore_condvar.h
# other
Objects/stringlib/join.h stringlib/stringdefs.h
Objects/stringlib/ctype.h stringlib/stringdefs.h
Objects/stringlib/transmogrify.h stringlib/stringdefs.h
#Objects/stringlib/fastsearch.h stringlib/stringdefs.h
#Objects/stringlib/count.h stringlib/stringdefs.h
#Objects/stringlib/find.h stringlib/stringdefs.h
#Objects/stringlib/partition.h stringlib/stringdefs.h
#Objects/stringlib/split.h stringlib/stringdefs.h
Objects/stringlib/fastsearch.h stringlib/ucs1lib.h
Objects/stringlib/count.h stringlib/ucs1lib.h
Objects/stringlib/find.h stringlib/ucs1lib.h
Objects/stringlib/partition.h stringlib/ucs1lib.h
Objects/stringlib/split.h stringlib/ucs1lib.h
Objects/stringlib/find_max_char.h Objects/stringlib/ucs1lib.h
Objects/stringlib/count.h Objects/stringlib/fastsearch.h
Objects/stringlib/find.h Objects/stringlib/fastsearch.h
Objects/stringlib/partition.h Objects/stringlib/fastsearch.h
Objects/stringlib/replace.h Objects/stringlib/fastsearch.h
Objects/stringlib/split.h Objects/stringlib/fastsearch.h
# @end=tsv@
''')[1:]
@ -132,9 +155,11 @@ def clean_lines(text):
Include/internal/*.h Py_BUILD_CORE 1
Python/**/*.c Py_BUILD_CORE 1
Python/**/*.h Py_BUILD_CORE 1
Parser/**/*.c Py_BUILD_CORE 1
Parser/**/*.h Py_BUILD_CORE 1
Objects/**/*.c Py_BUILD_CORE 1
Objects/**/*.h Py_BUILD_CORE 1
Modules/_asynciomodule.c Py_BUILD_CORE 1
Modules/_codecsmodule.c Py_BUILD_CORE 1
@ -170,11 +195,6 @@ def clean_lines(text):
Modules/symtablemodule.c Py_BUILD_CORE 1
Modules/timemodule.c Py_BUILD_CORE 1
Modules/unicodedata.c Py_BUILD_CORE 1
Objects/stringlib/codecs.h Py_BUILD_CORE 1
Objects/stringlib/unicode_format.h Py_BUILD_CORE 1
Parser/string_parser.h Py_BUILD_CORE 1
Parser/pegen.h Py_BUILD_CORE 1
Python/condvar.h Py_BUILD_CORE 1
Modules/_json.c Py_BUILD_CORE_BUILTIN 1
Modules/_pickle.c Py_BUILD_CORE_BUILTIN 1
@ -202,45 +222,22 @@ def clean_lines(text):
Include/cpython/traceback.h Py_CPYTHON_TRACEBACK_H 1
Include/cpython/tupleobject.h Py_CPYTHON_TUPLEOBJECT_H 1
Include/cpython/unicodeobject.h Py_CPYTHON_UNICODEOBJECT_H 1
Include/internal/pycore_code.h SIZEOF_VOID_P 8
Include/internal/pycore_frame.h SIZEOF_VOID_P 8
# implied include of pyport.h
Include/**/*.h PyAPI_DATA(RTYPE) extern RTYPE
Include/**/*.h PyAPI_FUNC(RTYPE) RTYPE
Include/**/*.h Py_DEPRECATED(VER) /* */
Include/**/*.h _Py_NO_RETURN /* */
Include/**/*.h PYLONG_BITS_IN_DIGIT 30
Modules/**/*.c PyMODINIT_FUNC PyObject*
Objects/unicodeobject.c PyMODINIT_FUNC PyObject*
Python/marshal.c PyMODINIT_FUNC PyObject*
Python/_warnings.c PyMODINIT_FUNC PyObject*
Python/Python-ast.c PyMODINIT_FUNC PyObject*
Python/import.c PyMODINIT_FUNC PyObject*
Modules/_testcapimodule.c PyAPI_FUNC(RTYPE) RTYPE
Python/getargs.c PyAPI_FUNC(RTYPE) RTYPE
Objects/stringlib/unicode_format.h Py_LOCAL_INLINE(type) static inline type
Include/pymath.h _Py__has_builtin(x) 0
# implied include of pymacro.h
*/clinic/*.c.h PyDoc_VAR(name) static const char name[]
*/clinic/*.c.h PyDoc_STR(str) str
*/clinic/*.c.h PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
# implied include of exports.h
#Modules/_io/bytesio.c Py_EXPORTED_SYMBOL /* */
# implied include of object.h
Include/**/*.h PyObject_HEAD PyObject ob_base;
Include/**/*.h PyObject_VAR_HEAD PyVarObject ob_base;
# implied include of pyconfig.h
Include/**/*.h SIZEOF_WCHAR_T 4
# implied include of <unistd.h>
Include/**/*.h _POSIX_THREADS 1
Include/**/*.h HAVE_PTHREAD_H 1
# from pyconfig.h
Include/cpython/pthread_stubs.h HAVE_PTHREAD_STUBS 1
Python/thread_pthread_stubs.h HAVE_PTHREAD_STUBS 1
# from Objects/bytesobject.c
Objects/stringlib/partition.h STRINGLIB_GET_EMPTY() bytes_get_empty()
Objects/stringlib/join.h STRINGLIB_MUTABLE 0
Objects/stringlib/partition.h STRINGLIB_MUTABLE 0
Objects/stringlib/split.h STRINGLIB_MUTABLE 0
Objects/stringlib/transmogrify.h STRINGLIB_MUTABLE 0
# from Makefile
Modules/getpath.c PYTHONPATH 1
Modules/getpath.c PREFIX ...
@ -248,13 +245,9 @@ def clean_lines(text):
Modules/getpath.c VERSION ...
Modules/getpath.c VPATH ...
Modules/getpath.c PLATLIBDIR ...
# from Modules/_sha3/sha3module.c
Modules/_sha3/kcp/KeccakP-1600-inplace32BI.c PLATFORM_BYTE_ORDER 4321 # force big-endian
Modules/_sha3/kcp/*.c KeccakOpt 64
Modules/_sha3/kcp/*.c KeccakP200_excluded 1
Modules/_sha3/kcp/*.c KeccakP400_excluded 1
Modules/_sha3/kcp/*.c KeccakP800_excluded 1
#Modules/_dbmmodule.c USE_GDBM_COMPAT 1
Modules/_dbmmodule.c USE_NDBM 1
#Modules/_dbmmodule.c USE_BERKDB 1
# See: setup.py
Modules/_decimal/**/*.c CONFIG_64 1
@ -263,11 +256,17 @@ def clean_lines(text):
Modules/expat/xmlparse.c XML_POOR_ENTROPY 1
Modules/_dbmmodule.c HAVE_GDBM_DASH_NDBM_H 1
# from Modules/_sha3/sha3module.c
Modules/_sha3/kcp/KeccakP-1600-inplace32BI.c PLATFORM_BYTE_ORDER 4321 # force big-endian
Modules/_sha3/kcp/*.c KeccakOpt 64
Modules/_sha3/kcp/*.c KeccakP200_excluded 1
Modules/_sha3/kcp/*.c KeccakP400_excluded 1
Modules/_sha3/kcp/*.c KeccakP800_excluded 1
# others
Modules/_sre/sre_lib.h LOCAL(type) static inline type
Modules/_sre/sre_lib.h SRE(F) sre_ucs2_##F
Objects/stringlib/codecs.h STRINGLIB_IS_UNICODE 1
Include/internal/pycore_bitutils.h _Py__has_builtin(B) 0
# @end=tsv@
''')[1:]
@ -284,16 +283,14 @@ def clean_lines(text):
# -Wno-missing-field-initializers
# -Werror=implicit-function-declaration
SAME = [
'./Include/cpython/',
]
SAME = {
_abs('Include/*.h'): [_abs('Include/cpython/')],
}
MAX_SIZES = {
# GLOB: (MAXTEXT, MAXLINES),
# default: (10_000, 200)
# First match wins.
_abs('Include/internal/pycore_global_strings.h'): (5_000, 1000),
_abs('Include/internal/pycore_runtime_init_generated.h'): (5_000, 1000),
_abs('Include/**/*.h'): (5_000, 500),
_abs('Modules/_ctypes/ctypes.h'): (5_000, 500),
_abs('Modules/_datetimemodule.c'): (20_000, 300),
_abs('Modules/posixmodule.c'): (20_000, 500),
@ -301,19 +298,37 @@ def clean_lines(text):
_abs('Modules/_testcapimodule.c'): (20_000, 400),
_abs('Modules/expat/expat.h'): (10_000, 400),
_abs('Objects/stringlib/unicode_format.h'): (10_000, 400),
_abs('Objects/typeobject.c'): (20_000, 200),
_abs('Objects/typeobject.c'): (35_000, 200),
_abs('Python/compile.c'): (20_000, 500),
_abs('Python/deepfreeze/*.c'): (20_000, 500),
_abs('Python/frozen_modules/*.h'): (20_000, 500),
_abs('Python/pylifecycle.c'): (500_000, 5000),
_abs('Python/pystate.c'): (500_000, 5000),
# Generated files:
_abs('Include/internal/pycore_opcode.h'): (10_000, 1000),
_abs('Include/internal/pycore_global_strings.h'): (5_000, 1000),
_abs('Include/internal/pycore_runtime_init_generated.h'): (5_000, 1000),
_abs('Python/deepfreeze/*.c'): (20_000, 500),
_abs('Python/frozen_modules/*.h'): (20_000, 500),
_abs('Python/opcode_targets.h'): (10_000, 500),
_abs('Python/stdlib_module_names.h'): (5_000, 500),
# These large files are currently ignored (see above).
_abs('Modules/_ssl_data.h'): (80_000, 10_000),
_abs('Modules/_ssl_data_300.h'): (80_000, 10_000),
_abs('Modules/_ssl_data_111.h'): (80_000, 10_000),
_abs('Modules/cjkcodecs/mappings_*.h'): (160_000, 2_000),
_abs('Modules/unicodedata_db.h'): (180_000, 3_000),
_abs('Modules/unicodename_db.h'): (1_200_000, 15_000),
_abs('Objects/unicodetype_db.h'): (240_000, 3_000),
# Catch-alls:
_abs('Include/**/*.h'): (5_000, 500),
}
def get_preprocessor(*,
file_macros=None,
file_includes=None,
file_incldirs=None,
file_same=None,
**kwargs
@ -321,13 +336,20 @@ def get_preprocessor(*,
macros = tuple(MACROS)
if file_macros:
macros += tuple(file_macros)
includes = tuple(INCLUDES)
if file_includes:
includes += tuple(file_includes)
incldirs = tuple(INCL_DIRS)
if file_incldirs:
incldirs += tuple(file_incldirs)
samefiles = dict(SAME)
if file_same:
samefiles.update(file_same)
return _get_preprocessor(
file_macros=macros,
file_includes=includes,
file_incldirs=incldirs,
file_same=file_same,
file_same=samefiles,
**kwargs
)

View File

@ -130,6 +130,8 @@ Python/symtable.c - PySTEntry_Type -
# private static builtin types
Objects/setobject.c - _PySetDummy_Type -
Objects/stringlib/unicode_format.h - PyFormatterIter_Type -
Objects/stringlib/unicode_format.h - PyFieldNameIter_Type -
Objects/unicodeobject.c - EncodingMapType -
#Objects/unicodeobject.c - PyFieldNameIter_Type -
#Objects/unicodeobject.c - PyFormatterIter_Type -
@ -296,13 +298,76 @@ Objects/setobject.c - _dummy_struct -
Objects/setobject.c - _PySet_Dummy -
Objects/sliceobject.c - _Py_EllipsisObject -
#-----------------------
# statically initialized
# argument clinic
Objects/clinic/odictobject.c.h OrderedDict_fromkeys _kwtuple -
Objects/clinic/odictobject.c.h OrderedDict_setdefault _kwtuple -
Objects/clinic/odictobject.c.h OrderedDict_pop _kwtuple -
Objects/clinic/odictobject.c.h OrderedDict_popitem _kwtuple -
Objects/clinic/odictobject.c.h OrderedDict_move_to_end _kwtuple -
Objects/clinic/funcobject.c.h func_new _kwtuple -
Objects/clinic/longobject.c.h long_new _kwtuple -
Objects/clinic/longobject.c.h int_to_bytes _kwtuple -
Objects/clinic/longobject.c.h int_from_bytes _kwtuple -
Objects/clinic/listobject.c.h list_sort _kwtuple -
Objects/clinic/bytearrayobject.c.h bytearray___init__ _kwtuple -
Objects/clinic/bytearrayobject.c.h bytearray_translate _kwtuple -
Objects/clinic/bytearrayobject.c.h bytearray_split _kwtuple -
Objects/clinic/bytearrayobject.c.h bytearray_rsplit _kwtuple -
Objects/clinic/bytearrayobject.c.h bytearray_decode _kwtuple -
Objects/clinic/bytearrayobject.c.h bytearray_splitlines _kwtuple -
Objects/clinic/bytearrayobject.c.h bytearray_hex _kwtuple -
Objects/clinic/memoryobject.c.h memoryview _kwtuple -
Objects/clinic/memoryobject.c.h memoryview_cast _kwtuple -
Objects/clinic/memoryobject.c.h memoryview_tobytes _kwtuple -
Objects/clinic/memoryobject.c.h memoryview_hex _kwtuple -
Objects/clinic/enumobject.c.h enum_new _kwtuple -
Objects/clinic/structseq.c.h structseq_new _kwtuple -
Objects/clinic/descrobject.c.h mappingproxy_new _kwtuple -
Objects/clinic/descrobject.c.h property_init _kwtuple -
Objects/clinic/codeobject.c.h code_replace _kwtuple -
Objects/clinic/codeobject.c.h code__varname_from_oparg _kwtuple -
Objects/clinic/moduleobject.c.h module___init__ _kwtuple -
Objects/clinic/bytesobject.c.h bytes_split _kwtuple -
Objects/clinic/bytesobject.c.h bytes_rsplit _kwtuple -
Objects/clinic/bytesobject.c.h bytes_translate _kwtuple -
Objects/clinic/bytesobject.c.h bytes_decode _kwtuple -
Objects/clinic/bytesobject.c.h bytes_splitlines _kwtuple -
Objects/clinic/bytesobject.c.h bytes_hex _kwtuple -
Objects/clinic/bytesobject.c.h bytes_new _kwtuple -
Objects/clinic/unicodeobject.c.h unicode_encode _kwtuple -
Objects/clinic/unicodeobject.c.h unicode_expandtabs _kwtuple -
Objects/clinic/unicodeobject.c.h unicode_split _kwtuple -
Objects/clinic/unicodeobject.c.h unicode_rsplit _kwtuple -
Objects/clinic/unicodeobject.c.h unicode_splitlines _kwtuple -
Objects/clinic/unicodeobject.c.h unicode_new _kwtuple -
Objects/clinic/complexobject.c.h complex_new _kwtuple -
Python/clinic/traceback.c.h tb_new _kwtuple -
Python/clinic/_warnings.c.h warnings_warn _kwtuple -
Python/clinic/_warnings.c.h warnings_warn_explicit _kwtuple -
Python/clinic/bltinmodule.c.h builtin___import__ _kwtuple -
Python/clinic/bltinmodule.c.h builtin_compile _kwtuple -
Python/clinic/bltinmodule.c.h builtin_exec _kwtuple -
Python/clinic/bltinmodule.c.h builtin_pow _kwtuple -
Python/clinic/bltinmodule.c.h builtin_print _kwtuple -
Python/clinic/bltinmodule.c.h builtin_round _kwtuple -
Python/clinic/bltinmodule.c.h builtin_sum _kwtuple -
Python/clinic/import.c.h _imp_find_frozen _kwtuple -
Python/clinic/import.c.h _imp_source_hash _kwtuple -
Python/clinic/Python-tokenize.c.h tokenizeriter_new _kwtuple -
Python/clinic/sysmodule.c.h sys_addaudithook _kwtuple -
Python/clinic/sysmodule.c.h sys_set_coroutine_origin_tracking_depth _kwtuple -
Python/clinic/sysmodule.c.h sys_set_int_max_str_digits _kwtuple -
#-----------------------
# cached - initialized once
# manually cached PyUnicodeObject
Python/ast_unparse.c - _str_replace_inf -
# holds strings
# holds statically-initialized strings
Objects/typeobject.c - slotdefs -
# other
@ -334,10 +399,10 @@ Python/import.c - extensions -
Modules/getbuildinfo.c Py_GetBuildInfo buildinfo -
# during init
Objects/typeobject.c - slotdefs_initialized -
Objects/unicodeobject.c - bloom_linebreak -
Python/bootstrap_hash.c - _Py_HashSecret_Initialized -
Python/bootstrap_hash.c py_getrandom getrandom_works -
Python/initconfig.c - _Py_global_config_int_max_str_digits -
Python/initconfig.c - Py_DebugFlag -
Python/initconfig.c - Py_UTF8Mode -
Python/initconfig.c - Py_DebugFlag -
@ -365,6 +430,8 @@ Python/sysmodule.c - _PySys_ImplName -
Python/sysmodule.c - _preinit_warnoptions -
Python/sysmodule.c - _preinit_xoptions -
Python/thread.c - initialized -
Python/thread_pthread.h - condattr_monotonic -
Python/thread_pthread.h init_condattr ca -
# set by embedders during init
Python/initconfig.c - _Py_StandardStreamEncoding -
@ -376,6 +443,7 @@ Objects/floatobject.c - float_format -
Objects/longobject.c PyLong_FromString log_base_BASE -
Objects/longobject.c PyLong_FromString convwidth_base -
Objects/longobject.c PyLong_FromString convmultmax_base -
Objects/perf_trampoline.c - perf_map_file -
Objects/unicodeobject.c - ucnhash_capi -
Parser/action_helpers.c _PyPegen_dummy_name cache -
Python/dtoa.c - p5s -
@ -451,6 +519,10 @@ Objects/dictobject.c - next_dict_keys_version -
Objects/funcobject.c - next_func_version -
Objects/moduleobject.c - max_module_number -
Objects/object.c - _Py_RefTotal -
Objects/perf_trampoline.c - perf_status -
Objects/perf_trampoline.c - extra_code_index -
Objects/perf_trampoline.c - code_arena -
Objects/perf_trampoline.c - trampoline_api -
Objects/typeobject.c - next_version_tag -
Objects/typeobject.c resolve_slotdups ptrs -
Parser/pegen.c - memo_statistics -
@ -463,6 +535,7 @@ Python/import.c - import_lock_thread -
Python/import.c import_find_and_load accumulated -
Python/import.c import_find_and_load import_level -
Python/modsupport.c - _Py_PackageContext -
Python/thread_pthread_stubs.h - py_tls_entries -
Python/pyfpe.c - PyFPE_counter -
Python/pylifecycle.c _Py_FatalErrorFormat reentrant -
Python/pylifecycle.c - _Py_UnhandledKeyboardInterrupt -
@ -612,6 +685,24 @@ Modules/_ctypes/_ctypes.c - Union_Type -
Modules/_ctypes/callbacks.c - PyCThunk_Type -
Modules/_ctypes/callproc.c - PyCArg_Type -
Modules/_ctypes/cfield.c - PyCField_Type -
Modules/_ctypes/ctypes.h - PyCArg_Type -
Modules/_ctypes/ctypes.h - PyCArrayType_Type -
Modules/_ctypes/ctypes.h - PyCArray_Type -
Modules/_ctypes/ctypes.h - PyCData_Type -
Modules/_ctypes/ctypes.h - PyCField_Type -
Modules/_ctypes/ctypes.h - PyCFuncPtrType_Type -
Modules/_ctypes/ctypes.h - PyCFuncPtr_Type -
Modules/_ctypes/ctypes.h - PyCPointerType_Type -
Modules/_ctypes/ctypes.h - PyCPointer_Type -
Modules/_ctypes/ctypes.h - PyCSimpleType_Type -
Modules/_ctypes/ctypes.h - PyCStgDict_Type -
Modules/_ctypes/ctypes.h - PyCStructType_Type -
Modules/_ctypes/ctypes.h - PyCThunk_Type -
Modules/_ctypes/ctypes.h - PyExc_ArgError -
Modules/_ctypes/ctypes.h - _ctypes_conversion_encoding -
Modules/_ctypes/ctypes.h - _ctypes_conversion_errors -
Modules/_ctypes/ctypes.h - _ctypes_ptrtype_cache -
Modules/_ctypes/ctypes.h - basespec_string -
Modules/_ctypes/stgdict.c - PyCStgDict_Type -
Modules/_cursesmodule.c - PyCursesWindow_Type -
Modules/_datetimemodule.c - PyDateTime_DateTimeType -
@ -849,6 +940,27 @@ Modules/_decimal/_decimal.c - _py_float_abs -
Modules/_decimal/_decimal.c - _py_long_bit_length -
Modules/_decimal/_decimal.c - _py_float_as_integer_ratio -
Modules/_elementtree.c - expat_capi -
Modules/cjkcodecs/_codecs_hk.c - big5_encmap -
Modules/cjkcodecs/_codecs_hk.c - big5_decmap -
Modules/cjkcodecs/_codecs_hk.c big5hkscs_codec_init initialized -
Modules/cjkcodecs/_codecs_iso2022.c - cp949_encmap -
Modules/cjkcodecs/_codecs_iso2022.c - ksx1001_decmap -
Modules/cjkcodecs/_codecs_iso2022.c - jisxcommon_encmap -
Modules/cjkcodecs/_codecs_iso2022.c - jisx0208_decmap -
Modules/cjkcodecs/_codecs_iso2022.c - jisx0212_decmap -
Modules/cjkcodecs/_codecs_iso2022.c - jisx0213_bmp_encmap -
Modules/cjkcodecs/_codecs_iso2022.c - jisx0213_1_bmp_decmap -
Modules/cjkcodecs/_codecs_iso2022.c - jisx0213_2_bmp_decmap -
Modules/cjkcodecs/_codecs_iso2022.c - jisx0213_emp_encmap -
Modules/cjkcodecs/_codecs_iso2022.c - jisx0213_1_emp_decmap -
Modules/cjkcodecs/_codecs_iso2022.c - jisx0213_2_emp_decmap -
Modules/cjkcodecs/_codecs_iso2022.c - gbcommon_encmap -
Modules/cjkcodecs/_codecs_iso2022.c - gb2312_decmap -
Modules/cjkcodecs/_codecs_iso2022.c ksx1001_init initialized -
Modules/cjkcodecs/_codecs_iso2022.c jisx0208_init initialized -
Modules/cjkcodecs/_codecs_iso2022.c jisx0212_init initialized -
Modules/cjkcodecs/_codecs_iso2022.c jisx0213_init initialized -
Modules/cjkcodecs/_codecs_iso2022.c gb2312_init initialized -
Modules/cjkcodecs/cjkcodecs.h - codec_list -
Modules/cjkcodecs/cjkcodecs.h - mapping_list -
Modules/readline.c - libedit_append_replace_history_offset -

Can't render this file because it has a wrong number of fields in line 4.

View File

@ -77,6 +77,8 @@ Objects/object.c - _Py_GenericAliasIterType -
Objects/object.c - _PyMemoryIter_Type -
Objects/object.c - _PyLineIterator -
Objects/object.c - _PyPositionsIterator -
Objects/perf_trampoline.c - _Py_trampoline_func_start -
Objects/perf_trampoline.c - _Py_trampoline_func_end -
Python/importdl.h - _PyImport_DynLoadFiletab -
Modules/expat/xmlrole.c - prolog0 -
@ -340,6 +342,7 @@ Modules/_xxtestfuzz/fuzzer.c LLVMFuzzerTestOneInput AST_LITERAL_EVAL_INITIALIZED
#-----------------------
# other vars that are actually constant
Include/internal/pycore_blocks_output_buffer.h - BUFFER_BLOCK_SIZE -
Modules/_csv.c - quote_styles -
Modules/_ctypes/cfield.c - ffi_type_double -
Modules/_ctypes/cfield.c - ffi_type_float -
@ -371,6 +374,7 @@ Modules/_elementtree.c - ExpatMemoryHandler -
Modules/_io/_iomodule.c - static_types -
Modules/_io/textio.c - encodefuncs -
Modules/_localemodule.c - langinfo_constants -
Modules/_pickle.c - READ_WHOLE_LINE -
Modules/_sqlite/module.c - error_codes -
Modules/_sre/sre.c pattern_repr flag_names -
# XXX I'm pretty sure this is actually constant:
@ -385,6 +389,39 @@ Modules/_zoneinfo.c - DAYS_BEFORE_MONTH -
Modules/_zoneinfo.c - DAYS_IN_MONTH -
Modules/arraymodule.c - descriptors -
Modules/arraymodule.c - emptybuf -
Modules/cjkcodecs/_codecs_cn.c - _mapping_list -
Modules/cjkcodecs/_codecs_cn.c - mapping_list -
Modules/cjkcodecs/_codecs_cn.c - _codec_list -
Modules/cjkcodecs/_codecs_cn.c - codec_list -
Modules/cjkcodecs/_codecs_hk.c - big5hkscs_pairenc_table -
Modules/cjkcodecs/_codecs_hk.c - _mapping_list -
Modules/cjkcodecs/_codecs_hk.c - mapping_list -
Modules/cjkcodecs/_codecs_hk.c - _codec_list -
Modules/cjkcodecs/_codecs_hk.c - codec_list -
Modules/cjkcodecs/_codecs_iso2022.c - iso2022_kr_config -
Modules/cjkcodecs/_codecs_iso2022.c - iso2022_jp_config -
Modules/cjkcodecs/_codecs_iso2022.c - iso2022_jp_1_config -
Modules/cjkcodecs/_codecs_iso2022.c - iso2022_jp_2_config -
Modules/cjkcodecs/_codecs_iso2022.c - iso2022_jp_2004_config -
Modules/cjkcodecs/_codecs_iso2022.c - iso2022_jp_3_config -
Modules/cjkcodecs/_codecs_iso2022.c - iso2022_jp_ext_config -
Modules/cjkcodecs/_codecs_iso2022.c - _mapping_list -
Modules/cjkcodecs/_codecs_iso2022.c - mapping_list -
Modules/cjkcodecs/_codecs_iso2022.c - _codec_list -
Modules/cjkcodecs/_codecs_iso2022.c - codec_list -
Modules/cjkcodecs/_codecs_jp.c - _mapping_list -
Modules/cjkcodecs/_codecs_jp.c - mapping_list -
Modules/cjkcodecs/_codecs_jp.c - _codec_list -
Modules/cjkcodecs/_codecs_jp.c - codec_list -
Modules/cjkcodecs/_codecs_kr.c - u2johabjamo -
Modules/cjkcodecs/_codecs_kr.c - _mapping_list -
Modules/cjkcodecs/_codecs_kr.c - mapping_list -
Modules/cjkcodecs/_codecs_kr.c - _codec_list -
Modules/cjkcodecs/_codecs_kr.c - codec_list -
Modules/cjkcodecs/_codecs_tw.c - _mapping_list -
Modules/cjkcodecs/_codecs_tw.c - mapping_list -
Modules/cjkcodecs/_codecs_tw.c - _codec_list -
Modules/cjkcodecs/_codecs_tw.c - codec_list -
Modules/cjkcodecs/cjkcodecs.h - __methods -
Modules/cmathmodule.c - acos_special_values -
Modules/cmathmodule.c - acosh_special_values -
@ -404,6 +441,8 @@ Modules/nismodule.c - TIMEOUT -
Modules/nismodule.c - aliases -
Modules/ossaudiodev.c - control_labels -
Modules/ossaudiodev.c - control_names -
Modules/posixmodule.c os_getxattr_impl buffer_sizes -
Modules/posixmodule.c os_listxattr_impl buffer_sizes -
Modules/posixmodule.c - posix_constants_confstr -
Modules/posixmodule.c - posix_constants_pathconf -
Modules/posixmodule.c - posix_constants_sysconf -
@ -426,6 +465,7 @@ Objects/obmalloc.c - _PyMem_Debug -
Objects/obmalloc.c - _PyMem_Raw -
Objects/obmalloc.c - _PyObject -
Objects/obmalloc.c - usedpools -
Objects/perf_trampoline.c - _Py_perfmap_callbacks -
Objects/typeobject.c - name_op -
Objects/unicodeobject.c - stripfuncnames -
Objects/unicodeobject.c - utf7_category -
@ -437,6 +477,7 @@ Parser/parser.c - reserved_keywords -
Parser/parser.c - soft_keywords -
Parser/tokenizer.c - type_comment_prefix -
Python/ast_opt.c fold_unaryop ops -
Python/ceval.c - binary_ops -
Python/codecs.c - Py_hexdigits -
Python/codecs.c - ucnhash_capi -
Python/codecs.c _PyCodecRegistry_Init methods -
@ -456,6 +497,7 @@ Python/pyhash.c - PyHash_Func -
Python/pylifecycle.c - _C_LOCALE_WARNING -
Python/pylifecycle.c - _PyOS_mystrnicmp_hack -
Python/pylifecycle.c - _TARGET_LOCALES -
Python/pystate.c - initial -
Python/specialize.c - adaptive_opcodes -
Python/specialize.c - cache_requirements -
Python/specialize.c - compare_masks -

Can't render this file because it has a wrong number of fields in line 4.