From 1756ffd66a38755cd45de51316d66266ae30e132 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 12 Sep 2022 11:09:31 -0600 Subject: [PATCH] gh-90110: Fix the c-analyzer Tool (gh-96731) This includes: * update the whitelists * fixes so we can stop ignoring some of the files * ensure Include/cpython/*.h get analyzed --- Include/internal/pycore_interp.h | 1 - Tools/c-analyzer/c_common/fsutil.py | 19 ++ Tools/c-analyzer/c_parser/__init__.py | 8 +- .../c_parser/preprocessor/__init__.py | 85 ++++++++- .../c_parser/preprocessor/common.py | 9 +- Tools/c-analyzer/c_parser/preprocessor/gcc.py | 161 +++++++++++++--- .../c-analyzer/c_parser/preprocessor/pure.py | 2 +- Tools/c-analyzer/cpython/_parser.py | 174 ++++++++++-------- Tools/c-analyzer/cpython/globals-to-fix.tsv | 116 +++++++++++- Tools/c-analyzer/cpython/ignored.tsv | 42 +++++ 10 files changed, 499 insertions(+), 118 deletions(-) diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index a5ddcf2d72f..e7f914ec2fe 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -18,7 +18,6 @@ extern "C" { #include "pycore_exceptions.h" // struct _Py_exc_state #include "pycore_floatobject.h" // struct _Py_float_state #include "pycore_genobject.h" // struct _Py_async_gen_state -#include "pycore_gil.h" // struct _gil_runtime_state #include "pycore_gc.h" // struct _gc_runtime_state #include "pycore_list.h" // struct _Py_list_state #include "pycore_tuple.h" // struct _Py_tuple_state diff --git a/Tools/c-analyzer/c_common/fsutil.py b/Tools/c-analyzer/c_common/fsutil.py index 120a140288f..a8cf8d0537e 100644 --- a/Tools/c-analyzer/c_common/fsutil.py +++ b/Tools/c-analyzer/c_common/fsutil.py @@ -104,6 +104,25 @@ def format_filename(filename, relroot=USE_CWD, *, return filename +def match_path_tail(path1, path2): + """Return True if one path ends the other.""" + if path1 == path2: + return True + if os.path.isabs(path1): + if os.path.isabs(path2): + return False + return _match_tail(path1, path2) + elif os.path.isabs(path2): + return _match_tail(path2, path1) + else: + return _match_tail(path1, path2) or _match_tail(path2, path1) + + +def _match_tail(path, tail): + assert not os.path.isabs(tail), repr(tail) + return path.endswith(os.path.sep + tail) + + ################################## # find files diff --git a/Tools/c-analyzer/c_parser/__init__.py b/Tools/c-analyzer/c_parser/__init__.py index fc10aff9450..c7316fcfb74 100644 --- a/Tools/c-analyzer/c_parser/__init__.py +++ b/Tools/c-analyzer/c_parser/__init__.py @@ -22,8 +22,12 @@ def parse_files(filenames, *, if get_file_preprocessor is None: get_file_preprocessor = _get_preprocessor() for filename in filenames: - yield from _parse_file( - filename, match_kind, get_file_preprocessor, file_maxsizes) + try: + yield from _parse_file( + filename, match_kind, get_file_preprocessor, file_maxsizes) + except Exception: + print(f'# requested file: <{filename}>') + raise # re-raise def _parse_file(filename, match_kind, get_file_preprocessor, maxsizes): diff --git a/Tools/c-analyzer/c_parser/preprocessor/__init__.py b/Tools/c-analyzer/c_parser/preprocessor/__init__.py index c154137bf42..cdc1a4e1269 100644 --- a/Tools/c-analyzer/c_parser/preprocessor/__init__.py +++ b/Tools/c-analyzer/c_parser/preprocessor/__init__.py @@ -35,9 +35,11 @@ def preprocess(source, *, incldirs=None, + includes=None, macros=None, samefiles=None, filename=None, + cwd=None, tool=True, ): """... @@ -45,17 +47,27 @@ def preprocess(source, *, CWD should be the project root and "source" should be relative. """ if tool: - logger.debug(f'CWD: {os.getcwd()!r}') - logger.debug(f'incldirs: {incldirs!r}') - logger.debug(f'macros: {macros!r}') + if not cwd: + cwd = os.getcwd() + logger.debug(f'CWD: {cwd!r}') + logger.debug(f'incldirs: {incldirs!r}') + logger.debug(f'includes: {includes!r}') + logger.debug(f'macros: {macros!r}') logger.debug(f'samefiles: {samefiles!r}') _preprocess = _get_preprocessor(tool) with _good_file(source, filename) as source: - return _preprocess(source, incldirs, macros, samefiles) or () + return _preprocess( + source, + incldirs, + includes, + macros, + samefiles, + cwd, + ) or () else: source, filename = _resolve_source(source, filename) # We ignore "includes", "macros", etc. - return _pure.preprocess(source, filename) + return _pure.preprocess(source, filename, cwd) # if _run() returns just the lines: # text = _run(source) @@ -72,6 +84,7 @@ def preprocess(source, *, def get_preprocessor(*, file_macros=None, + file_includes=None, file_incldirs=None, file_same=None, ignore_exc=False, @@ -80,10 +93,12 @@ def get_preprocessor(*, _preprocess = preprocess if file_macros: file_macros = tuple(_parse_macros(file_macros)) + if file_includes: + file_includes = tuple(_parse_includes(file_includes)) if file_incldirs: file_incldirs = tuple(_parse_incldirs(file_incldirs)) if file_same: - file_same = tuple(file_same) + file_same = dict(file_same or ()) if not callable(ignore_exc): ignore_exc = (lambda exc, _ig=ignore_exc: _ig) @@ -91,16 +106,26 @@ def get_file_preprocessor(filename): filename = filename.strip() if file_macros: macros = list(_resolve_file_values(filename, file_macros)) + if file_includes: + # There's a small chance we could need to filter out any + # includes that import "filename". It isn't clear that it's + # a problem any longer. If we do end up filtering then + # it may make sense to use c_common.fsutil.match_path_tail(). + includes = [i for i, in _resolve_file_values(filename, file_includes)] if file_incldirs: incldirs = [v for v, in _resolve_file_values(filename, file_incldirs)] + if file_same: + samefiles = _resolve_samefiles(filename, file_same) def preprocess(**kwargs): if file_macros and 'macros' not in kwargs: kwargs['macros'] = macros + if file_includes and 'includes' not in kwargs: + kwargs['includes'] = includes if file_incldirs and 'incldirs' not in kwargs: - kwargs['incldirs'] = [v for v, in _resolve_file_values(filename, file_incldirs)] - if file_same and 'file_same' not in kwargs: - kwargs['samefiles'] = file_same + kwargs['incldirs'] = incldirs + if file_same and 'samefiles' not in kwargs: + kwargs['samefiles'] = samefiles kwargs.setdefault('filename', filename) with handling_errors(ignore_exc, log_err=log_err): return _preprocess(filename, **kwargs) @@ -120,6 +145,11 @@ def _parse_macros(macros): yield row +def _parse_includes(includes): + for row, srcfile in _parse_table(includes, '\t', 'glob\tinclude', default=None): + yield row + + def _parse_incldirs(incldirs): for row, srcfile in _parse_table(incldirs, '\t', 'glob\tdirname', default=None): glob, dirname = row @@ -130,6 +160,43 @@ def _parse_incldirs(incldirs): yield row +def _resolve_samefiles(filename, file_same): + assert '*' not in filename, (filename,) + assert os.path.normpath(filename) == filename, (filename,) + _, suffix = os.path.splitext(filename) + samefiles = [] + for patterns, in _resolve_file_values(filename, file_same.items()): + for pattern in patterns: + same = _resolve_samefile(filename, pattern, suffix) + if not same: + continue + samefiles.append(same) + return samefiles + + +def _resolve_samefile(filename, pattern, suffix): + if pattern == filename: + return None + if pattern.endswith(os.path.sep): + pattern += f'*{suffix}' + assert os.path.normpath(pattern) == pattern, (pattern,) + if '*' in os.path.dirname(pattern): + raise NotImplementedError((filename, pattern)) + if '*' not in os.path.basename(pattern): + return pattern + + common = os.path.commonpath([filename, pattern]) + relpattern = pattern[len(common) + len(os.path.sep):] + relpatterndir = os.path.dirname(relpattern) + relfile = filename[len(common) + len(os.path.sep):] + if os.path.basename(pattern) == '*': + return os.path.join(common, relpatterndir, relfile) + elif os.path.basename(relpattern) == '*' + suffix: + return os.path.join(common, relpatterndir, relfile) + else: + raise NotImplementedError((filename, pattern)) + + @contextlib.contextmanager def handling_errors(ignore_exc=None, *, log_err=None): try: diff --git a/Tools/c-analyzer/c_parser/preprocessor/common.py b/Tools/c-analyzer/c_parser/preprocessor/common.py index 63681025c63..4291a066337 100644 --- a/Tools/c-analyzer/c_parser/preprocessor/common.py +++ b/Tools/c-analyzer/c_parser/preprocessor/common.py @@ -44,7 +44,7 @@ def run_cmd(argv, *, return proc.stdout -def preprocess(tool, filename, **kwargs): +def preprocess(tool, filename, cwd=None, **kwargs): argv = _build_argv(tool, filename, **kwargs) logger.debug(' '.join(shlex.quote(v) for v in argv)) @@ -59,19 +59,24 @@ def preprocess(tool, filename, **kwargs): # distutil compiler object's preprocess() method, since that # one writes to stdout/stderr and it's simpler to do it directly # through subprocess. - return run_cmd(argv) + return run_cmd(argv, cwd=cwd) def _build_argv( tool, filename, incldirs=None, + includes=None, macros=None, preargs=None, postargs=None, executable=None, compiler=None, ): + if includes: + includes = tuple(f'-include{i}' for i in includes) + postargs = (includes + postargs) if postargs else includes + compiler = distutils.ccompiler.new_compiler( compiler=compiler or tool, ) diff --git a/Tools/c-analyzer/c_parser/preprocessor/gcc.py b/Tools/c-analyzer/c_parser/preprocessor/gcc.py index bb404a487b7..77080225379 100644 --- a/Tools/c-analyzer/c_parser/preprocessor/gcc.py +++ b/Tools/c-analyzer/c_parser/preprocessor/gcc.py @@ -7,7 +7,12 @@ TOOL = 'gcc' # https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html -LINE_MARKER_RE = re.compile(r'^# (\d+) "([^"]+)"(?: [1234])*$') +# flags: +# 1 start of a new file +# 2 returning to a file (after including another) +# 3 following text comes from a system header file +# 4 following text treated wrapped in implicit extern "C" block +LINE_MARKER_RE = re.compile(r'^# (\d+) "([^"]+)"((?: [1234])*)$') PREPROC_DIRECTIVE_RE = re.compile(r'^\s*#\s*(\w+)\b.*') COMPILER_DIRECTIVE_RE = re.compile(r''' ^ @@ -40,32 +45,112 @@ ) -def preprocess(filename, incldirs=None, macros=None, samefiles=None): +def preprocess(filename, + incldirs=None, + includes=None, + macros=None, + samefiles=None, + cwd=None, + ): + if not cwd or not os.path.isabs(cwd): + cwd = os.path.abspath(cwd or '.') + filename = _normpath(filename, cwd) text = _common.preprocess( TOOL, filename, incldirs=incldirs, + includes=includes, macros=macros, #preargs=PRE_ARGS, postargs=POST_ARGS, executable=['gcc'], compiler='unix', + cwd=cwd, ) - return _iter_lines(text, filename, samefiles) + return _iter_lines(text, filename, samefiles, cwd) -def _iter_lines(text, filename, samefiles, *, raw=False): +def _iter_lines(text, reqfile, samefiles, cwd, raw=False): lines = iter(text.splitlines()) - # Build the lines and filter out directives. - partial = 0 # depth - origfile = None + # The first line is special. + # The next two lines are consistent. + for expected in [ + f'# 1 "{reqfile}"', + '# 1 ""', + '# 1 ""', + ]: + line = next(lines) + if line != expected: + raise NotImplementedError((line, expected)) + + # Do all the CLI-provided includes. + filter_reqfile = (lambda f: _filter_reqfile(f, reqfile, samefiles)) + make_info = (lambda lno: _common.FileInfo(reqfile, lno)) + last = None for line in lines: - m = LINE_MARKER_RE.match(line) - if m: - lno, origfile = m.groups() - lno = int(lno) - elif _filter_orig_file(origfile, filename, samefiles): + assert last != reqfile, (last,) + lno, included, flags = _parse_marker_line(line, reqfile) + if not included: + raise NotImplementedError((line,)) + if included == reqfile: + # This will be the last one. + assert not flags, (line, flags) + else: + assert 1 in flags, (line, flags) + yield from _iter_top_include_lines( + lines, + _normpath(included, cwd), + cwd, + filter_reqfile, + make_info, + raw, + ) + last = included + # The last one is always the requested file. + assert included == reqfile, (line,) + + +def _iter_top_include_lines(lines, topfile, cwd, + filter_reqfile, make_info, + raw): + partial = 0 # depth + files = [topfile] + # We start at 1 in case there are source lines (including blank onces) + # before the first marker line. Also, we already verified in + # _parse_marker_line() that the preprocessor reported lno as 1. + lno = 1 + for line in lines: + if line == '# 1 "" 2': + # We're done with this top-level include. + return + + _lno, included, flags = _parse_marker_line(line) + if included: + lno = _lno + included = _normpath(included, cwd) + # We hit a marker line. + if 1 in flags: + # We're entering a file. + # XXX Cycles are unexpected? + #assert included not in files, (line, files) + files.append(included) + elif 2 in flags: + # We're returning to a file. + assert files and included in files, (line, files) + assert included != files[-1], (line, files) + while files[-1] != included: + files.pop() + # XXX How can a file return to line 1? + #assert lno > 1, (line, lno) + else: + # It's the next line from the file. + assert included == files[-1], (line, files) + assert lno > 1, (line, lno) + elif not files: + raise NotImplementedError((line,)) + elif filter_reqfile(files[-1]): + assert lno is not None, (line, files[-1]) if (m := PREPROC_DIRECTIVE_RE.match(line)): name, = m.groups() if name != 'pragma': @@ -74,7 +159,7 @@ def _iter_lines(text, filename, samefiles, *, raw=False): if not raw: line, partial = _strip_directives(line, partial=partial) yield _common.SourceLine( - _common.FileInfo(filename, lno), + make_info(lno), 'source', line or '', None, @@ -82,6 +167,34 @@ def _iter_lines(text, filename, samefiles, *, raw=False): lno += 1 +def _parse_marker_line(line, reqfile=None): + m = LINE_MARKER_RE.match(line) + if not m: + return None, None, None + lno, origfile, flags = m.groups() + lno = int(lno) + assert lno > 0, (line, lno) + assert origfile not in ('', ''), (line,) + flags = set(int(f) for f in flags.split()) if flags else () + + if 1 in flags: + # We're entering a file. + assert lno == 1, (line, lno) + assert 2 not in flags, (line,) + elif 2 in flags: + # We're returning to a file. + #assert lno > 1, (line, lno) + pass + elif reqfile and origfile == reqfile: + # We're starting the requested file. + assert lno == 1, (line, lno) + assert not flags, (line, flags) + else: + # It's the next line from the file. + assert lno > 1, (line, lno) + return lno, origfile, flags + + def _strip_directives(line, partial=0): # We assume there are no string literals with parens in directive bodies. while partial > 0: @@ -106,18 +219,16 @@ def _strip_directives(line, partial=0): return line, partial -def _filter_orig_file(origfile, current, samefiles): - if origfile == current: +def _filter_reqfile(current, reqfile, samefiles): + if current == reqfile: return True - if origfile == '': + if current == '': + return True + if current in samefiles: return True - if os.path.isabs(origfile): - return False - - for filename in samefiles or (): - if filename.endswith(os.path.sep): - filename += os.path.basename(current) - if origfile == filename: - return True - return False + + +def _normpath(filename, cwd): + assert cwd + return os.path.normpath(os.path.join(cwd, filename)) diff --git a/Tools/c-analyzer/c_parser/preprocessor/pure.py b/Tools/c-analyzer/c_parser/preprocessor/pure.py index e971389b188..f94447ad819 100644 --- a/Tools/c-analyzer/c_parser/preprocessor/pure.py +++ b/Tools/c-analyzer/c_parser/preprocessor/pure.py @@ -4,7 +4,7 @@ from . import common as _common -def preprocess(lines, filename=None): +def preprocess(lines, filename=None, cwd=None): if isinstance(lines, str): with _open_source(lines, filename) as (lines, filename): yield from preprocess(lines, filename) diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py index dc8423bfcba..78241f0ea08 100644 --- a/Tools/c-analyzer/cpython/_parser.py +++ b/Tools/c-analyzer/cpython/_parser.py @@ -50,9 +50,6 @@ def clean_lines(text): EXCLUDED = clean_lines(''' # @begin=conf@ -# Rather than fixing for this one, we manually make sure it's okay. -Modules/_sha3/kcp/KeccakP-1600-opt64.c - # OSX #Modules/_ctypes/darwin/*.c #Modules/_ctypes/libffi_osx/*.c @@ -69,12 +66,11 @@ def clean_lines(text): Python/dynload_aix.c # sys/ldr.h Python/dynload_dl.c # dl.h Python/dynload_hpux.c # dl.h -Python/thread_pthread.h Python/emscripten_signal.c +Python/thread_pthread.h Python/thread_pthread_stubs.h # only huge constants (safe but parsing is slow) -Modules/_blake2/impl/blake2-kat.h Modules/_ssl_data.h Modules/_ssl_data_300.h Modules/_ssl_data_111.h @@ -93,20 +89,9 @@ def clean_lines(text): # XXX Fix the parser. EXCLUDED += clean_lines(''' # The tool should be able to parse these... - -Modules/hashlib.h -Objects/stringlib/codecs.h -Objects/stringlib/count.h -Objects/stringlib/ctype.h -Objects/stringlib/fastsearch.h -Objects/stringlib/find.h -Objects/stringlib/find_max_char.h -Objects/stringlib/partition.h -Objects/stringlib/replace.h -Objects/stringlib/split.h - -Modules/_dbmmodule.c -Modules/cjkcodecs/_codecs_*.c +# The problem with xmlparse.c is that something +# has gone wrong where # we handle "maybe inline actual" +# in Tools/c-analyzer/c_parser/parser/_global.py. Modules/expat/xmlparse.c ''') @@ -121,6 +106,44 @@ def clean_lines(text): Modules/_tkinter.c /usr/include/tcl8.6 Modules/tkappinit.c /usr/include/tcl Modules/_decimal/**/*.c Modules/_decimal/libmpdec +Objects/stringlib/*.h Objects + +# @end=tsv@ +''')[1:] + +INCLUDES = clean_lines(''' +# @begin=tsv@ + +glob include + +**/*.h Python.h +Include/**/*.h object.h + +# for Py_HAVE_CONDVAR +Include/internal/pycore_gil.h pycore_condvar.h +Python/thread_pthread.h pycore_condvar.h + +# other + +Objects/stringlib/join.h stringlib/stringdefs.h +Objects/stringlib/ctype.h stringlib/stringdefs.h +Objects/stringlib/transmogrify.h stringlib/stringdefs.h +#Objects/stringlib/fastsearch.h stringlib/stringdefs.h +#Objects/stringlib/count.h stringlib/stringdefs.h +#Objects/stringlib/find.h stringlib/stringdefs.h +#Objects/stringlib/partition.h stringlib/stringdefs.h +#Objects/stringlib/split.h stringlib/stringdefs.h +Objects/stringlib/fastsearch.h stringlib/ucs1lib.h +Objects/stringlib/count.h stringlib/ucs1lib.h +Objects/stringlib/find.h stringlib/ucs1lib.h +Objects/stringlib/partition.h stringlib/ucs1lib.h +Objects/stringlib/split.h stringlib/ucs1lib.h +Objects/stringlib/find_max_char.h Objects/stringlib/ucs1lib.h +Objects/stringlib/count.h Objects/stringlib/fastsearch.h +Objects/stringlib/find.h Objects/stringlib/fastsearch.h +Objects/stringlib/partition.h Objects/stringlib/fastsearch.h +Objects/stringlib/replace.h Objects/stringlib/fastsearch.h +Objects/stringlib/split.h Objects/stringlib/fastsearch.h # @end=tsv@ ''')[1:] @@ -132,9 +155,11 @@ def clean_lines(text): Include/internal/*.h Py_BUILD_CORE 1 Python/**/*.c Py_BUILD_CORE 1 +Python/**/*.h Py_BUILD_CORE 1 Parser/**/*.c Py_BUILD_CORE 1 Parser/**/*.h Py_BUILD_CORE 1 Objects/**/*.c Py_BUILD_CORE 1 +Objects/**/*.h Py_BUILD_CORE 1 Modules/_asynciomodule.c Py_BUILD_CORE 1 Modules/_codecsmodule.c Py_BUILD_CORE 1 @@ -170,11 +195,6 @@ def clean_lines(text): Modules/symtablemodule.c Py_BUILD_CORE 1 Modules/timemodule.c Py_BUILD_CORE 1 Modules/unicodedata.c Py_BUILD_CORE 1 -Objects/stringlib/codecs.h Py_BUILD_CORE 1 -Objects/stringlib/unicode_format.h Py_BUILD_CORE 1 -Parser/string_parser.h Py_BUILD_CORE 1 -Parser/pegen.h Py_BUILD_CORE 1 -Python/condvar.h Py_BUILD_CORE 1 Modules/_json.c Py_BUILD_CORE_BUILTIN 1 Modules/_pickle.c Py_BUILD_CORE_BUILTIN 1 @@ -202,45 +222,22 @@ def clean_lines(text): Include/cpython/traceback.h Py_CPYTHON_TRACEBACK_H 1 Include/cpython/tupleobject.h Py_CPYTHON_TUPLEOBJECT_H 1 Include/cpython/unicodeobject.h Py_CPYTHON_UNICODEOBJECT_H 1 -Include/internal/pycore_code.h SIZEOF_VOID_P 8 -Include/internal/pycore_frame.h SIZEOF_VOID_P 8 - -# implied include of pyport.h -Include/**/*.h PyAPI_DATA(RTYPE) extern RTYPE -Include/**/*.h PyAPI_FUNC(RTYPE) RTYPE -Include/**/*.h Py_DEPRECATED(VER) /* */ -Include/**/*.h _Py_NO_RETURN /* */ -Include/**/*.h PYLONG_BITS_IN_DIGIT 30 -Modules/**/*.c PyMODINIT_FUNC PyObject* -Objects/unicodeobject.c PyMODINIT_FUNC PyObject* -Python/marshal.c PyMODINIT_FUNC PyObject* -Python/_warnings.c PyMODINIT_FUNC PyObject* -Python/Python-ast.c PyMODINIT_FUNC PyObject* -Python/import.c PyMODINIT_FUNC PyObject* -Modules/_testcapimodule.c PyAPI_FUNC(RTYPE) RTYPE -Python/getargs.c PyAPI_FUNC(RTYPE) RTYPE -Objects/stringlib/unicode_format.h Py_LOCAL_INLINE(type) static inline type -Include/pymath.h _Py__has_builtin(x) 0 - -# implied include of pymacro.h -*/clinic/*.c.h PyDoc_VAR(name) static const char name[] -*/clinic/*.c.h PyDoc_STR(str) str -*/clinic/*.c.h PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str) - -# implied include of exports.h -#Modules/_io/bytesio.c Py_EXPORTED_SYMBOL /* */ - -# implied include of object.h -Include/**/*.h PyObject_HEAD PyObject ob_base; -Include/**/*.h PyObject_VAR_HEAD PyVarObject ob_base; - -# implied include of pyconfig.h -Include/**/*.h SIZEOF_WCHAR_T 4 # implied include of Include/**/*.h _POSIX_THREADS 1 Include/**/*.h HAVE_PTHREAD_H 1 +# from pyconfig.h +Include/cpython/pthread_stubs.h HAVE_PTHREAD_STUBS 1 +Python/thread_pthread_stubs.h HAVE_PTHREAD_STUBS 1 + +# from Objects/bytesobject.c +Objects/stringlib/partition.h STRINGLIB_GET_EMPTY() bytes_get_empty() +Objects/stringlib/join.h STRINGLIB_MUTABLE 0 +Objects/stringlib/partition.h STRINGLIB_MUTABLE 0 +Objects/stringlib/split.h STRINGLIB_MUTABLE 0 +Objects/stringlib/transmogrify.h STRINGLIB_MUTABLE 0 + # from Makefile Modules/getpath.c PYTHONPATH 1 Modules/getpath.c PREFIX ... @@ -248,13 +245,9 @@ def clean_lines(text): Modules/getpath.c VERSION ... Modules/getpath.c VPATH ... Modules/getpath.c PLATLIBDIR ... - -# from Modules/_sha3/sha3module.c -Modules/_sha3/kcp/KeccakP-1600-inplace32BI.c PLATFORM_BYTE_ORDER 4321 # force big-endian -Modules/_sha3/kcp/*.c KeccakOpt 64 -Modules/_sha3/kcp/*.c KeccakP200_excluded 1 -Modules/_sha3/kcp/*.c KeccakP400_excluded 1 -Modules/_sha3/kcp/*.c KeccakP800_excluded 1 +#Modules/_dbmmodule.c USE_GDBM_COMPAT 1 +Modules/_dbmmodule.c USE_NDBM 1 +#Modules/_dbmmodule.c USE_BERKDB 1 # See: setup.py Modules/_decimal/**/*.c CONFIG_64 1 @@ -263,11 +256,17 @@ def clean_lines(text): Modules/expat/xmlparse.c XML_POOR_ENTROPY 1 Modules/_dbmmodule.c HAVE_GDBM_DASH_NDBM_H 1 +# from Modules/_sha3/sha3module.c +Modules/_sha3/kcp/KeccakP-1600-inplace32BI.c PLATFORM_BYTE_ORDER 4321 # force big-endian +Modules/_sha3/kcp/*.c KeccakOpt 64 +Modules/_sha3/kcp/*.c KeccakP200_excluded 1 +Modules/_sha3/kcp/*.c KeccakP400_excluded 1 +Modules/_sha3/kcp/*.c KeccakP800_excluded 1 + # others Modules/_sre/sre_lib.h LOCAL(type) static inline type Modules/_sre/sre_lib.h SRE(F) sre_ucs2_##F Objects/stringlib/codecs.h STRINGLIB_IS_UNICODE 1 -Include/internal/pycore_bitutils.h _Py__has_builtin(B) 0 # @end=tsv@ ''')[1:] @@ -284,16 +283,14 @@ def clean_lines(text): # -Wno-missing-field-initializers # -Werror=implicit-function-declaration -SAME = [ - './Include/cpython/', -] +SAME = { + _abs('Include/*.h'): [_abs('Include/cpython/')], +} MAX_SIZES = { # GLOB: (MAXTEXT, MAXLINES), + # default: (10_000, 200) # First match wins. - _abs('Include/internal/pycore_global_strings.h'): (5_000, 1000), - _abs('Include/internal/pycore_runtime_init_generated.h'): (5_000, 1000), - _abs('Include/**/*.h'): (5_000, 500), _abs('Modules/_ctypes/ctypes.h'): (5_000, 500), _abs('Modules/_datetimemodule.c'): (20_000, 300), _abs('Modules/posixmodule.c'): (20_000, 500), @@ -301,19 +298,37 @@ def clean_lines(text): _abs('Modules/_testcapimodule.c'): (20_000, 400), _abs('Modules/expat/expat.h'): (10_000, 400), _abs('Objects/stringlib/unicode_format.h'): (10_000, 400), - _abs('Objects/typeobject.c'): (20_000, 200), + _abs('Objects/typeobject.c'): (35_000, 200), _abs('Python/compile.c'): (20_000, 500), - _abs('Python/deepfreeze/*.c'): (20_000, 500), - _abs('Python/frozen_modules/*.h'): (20_000, 500), _abs('Python/pylifecycle.c'): (500_000, 5000), _abs('Python/pystate.c'): (500_000, 5000), + + # Generated files: + _abs('Include/internal/pycore_opcode.h'): (10_000, 1000), + _abs('Include/internal/pycore_global_strings.h'): (5_000, 1000), + _abs('Include/internal/pycore_runtime_init_generated.h'): (5_000, 1000), + _abs('Python/deepfreeze/*.c'): (20_000, 500), + _abs('Python/frozen_modules/*.h'): (20_000, 500), _abs('Python/opcode_targets.h'): (10_000, 500), _abs('Python/stdlib_module_names.h'): (5_000, 500), + + # These large files are currently ignored (see above). + _abs('Modules/_ssl_data.h'): (80_000, 10_000), + _abs('Modules/_ssl_data_300.h'): (80_000, 10_000), + _abs('Modules/_ssl_data_111.h'): (80_000, 10_000), + _abs('Modules/cjkcodecs/mappings_*.h'): (160_000, 2_000), + _abs('Modules/unicodedata_db.h'): (180_000, 3_000), + _abs('Modules/unicodename_db.h'): (1_200_000, 15_000), + _abs('Objects/unicodetype_db.h'): (240_000, 3_000), + + # Catch-alls: + _abs('Include/**/*.h'): (5_000, 500), } def get_preprocessor(*, file_macros=None, + file_includes=None, file_incldirs=None, file_same=None, **kwargs @@ -321,13 +336,20 @@ def get_preprocessor(*, macros = tuple(MACROS) if file_macros: macros += tuple(file_macros) + includes = tuple(INCLUDES) + if file_includes: + includes += tuple(file_includes) incldirs = tuple(INCL_DIRS) if file_incldirs: incldirs += tuple(file_incldirs) + samefiles = dict(SAME) + if file_same: + samefiles.update(file_same) return _get_preprocessor( file_macros=macros, + file_includes=includes, file_incldirs=incldirs, - file_same=file_same, + file_same=samefiles, **kwargs ) diff --git a/Tools/c-analyzer/cpython/globals-to-fix.tsv b/Tools/c-analyzer/cpython/globals-to-fix.tsv index e3a0f1a3760..83da54fdd28 100644 --- a/Tools/c-analyzer/cpython/globals-to-fix.tsv +++ b/Tools/c-analyzer/cpython/globals-to-fix.tsv @@ -130,6 +130,8 @@ Python/symtable.c - PySTEntry_Type - # private static builtin types Objects/setobject.c - _PySetDummy_Type - +Objects/stringlib/unicode_format.h - PyFormatterIter_Type - +Objects/stringlib/unicode_format.h - PyFieldNameIter_Type - Objects/unicodeobject.c - EncodingMapType - #Objects/unicodeobject.c - PyFieldNameIter_Type - #Objects/unicodeobject.c - PyFormatterIter_Type - @@ -296,13 +298,76 @@ Objects/setobject.c - _dummy_struct - Objects/setobject.c - _PySet_Dummy - Objects/sliceobject.c - _Py_EllipsisObject - +#----------------------- +# statically initialized + +# argument clinic +Objects/clinic/odictobject.c.h OrderedDict_fromkeys _kwtuple - +Objects/clinic/odictobject.c.h OrderedDict_setdefault _kwtuple - +Objects/clinic/odictobject.c.h OrderedDict_pop _kwtuple - +Objects/clinic/odictobject.c.h OrderedDict_popitem _kwtuple - +Objects/clinic/odictobject.c.h OrderedDict_move_to_end _kwtuple - +Objects/clinic/funcobject.c.h func_new _kwtuple - +Objects/clinic/longobject.c.h long_new _kwtuple - +Objects/clinic/longobject.c.h int_to_bytes _kwtuple - +Objects/clinic/longobject.c.h int_from_bytes _kwtuple - +Objects/clinic/listobject.c.h list_sort _kwtuple - +Objects/clinic/bytearrayobject.c.h bytearray___init__ _kwtuple - +Objects/clinic/bytearrayobject.c.h bytearray_translate _kwtuple - +Objects/clinic/bytearrayobject.c.h bytearray_split _kwtuple - +Objects/clinic/bytearrayobject.c.h bytearray_rsplit _kwtuple - +Objects/clinic/bytearrayobject.c.h bytearray_decode _kwtuple - +Objects/clinic/bytearrayobject.c.h bytearray_splitlines _kwtuple - +Objects/clinic/bytearrayobject.c.h bytearray_hex _kwtuple - +Objects/clinic/memoryobject.c.h memoryview _kwtuple - +Objects/clinic/memoryobject.c.h memoryview_cast _kwtuple - +Objects/clinic/memoryobject.c.h memoryview_tobytes _kwtuple - +Objects/clinic/memoryobject.c.h memoryview_hex _kwtuple - +Objects/clinic/enumobject.c.h enum_new _kwtuple - +Objects/clinic/structseq.c.h structseq_new _kwtuple - +Objects/clinic/descrobject.c.h mappingproxy_new _kwtuple - +Objects/clinic/descrobject.c.h property_init _kwtuple - +Objects/clinic/codeobject.c.h code_replace _kwtuple - +Objects/clinic/codeobject.c.h code__varname_from_oparg _kwtuple - +Objects/clinic/moduleobject.c.h module___init__ _kwtuple - +Objects/clinic/bytesobject.c.h bytes_split _kwtuple - +Objects/clinic/bytesobject.c.h bytes_rsplit _kwtuple - +Objects/clinic/bytesobject.c.h bytes_translate _kwtuple - +Objects/clinic/bytesobject.c.h bytes_decode _kwtuple - +Objects/clinic/bytesobject.c.h bytes_splitlines _kwtuple - +Objects/clinic/bytesobject.c.h bytes_hex _kwtuple - +Objects/clinic/bytesobject.c.h bytes_new _kwtuple - +Objects/clinic/unicodeobject.c.h unicode_encode _kwtuple - +Objects/clinic/unicodeobject.c.h unicode_expandtabs _kwtuple - +Objects/clinic/unicodeobject.c.h unicode_split _kwtuple - +Objects/clinic/unicodeobject.c.h unicode_rsplit _kwtuple - +Objects/clinic/unicodeobject.c.h unicode_splitlines _kwtuple - +Objects/clinic/unicodeobject.c.h unicode_new _kwtuple - +Objects/clinic/complexobject.c.h complex_new _kwtuple - +Python/clinic/traceback.c.h tb_new _kwtuple - +Python/clinic/_warnings.c.h warnings_warn _kwtuple - +Python/clinic/_warnings.c.h warnings_warn_explicit _kwtuple - +Python/clinic/bltinmodule.c.h builtin___import__ _kwtuple - +Python/clinic/bltinmodule.c.h builtin_compile _kwtuple - +Python/clinic/bltinmodule.c.h builtin_exec _kwtuple - +Python/clinic/bltinmodule.c.h builtin_pow _kwtuple - +Python/clinic/bltinmodule.c.h builtin_print _kwtuple - +Python/clinic/bltinmodule.c.h builtin_round _kwtuple - +Python/clinic/bltinmodule.c.h builtin_sum _kwtuple - +Python/clinic/import.c.h _imp_find_frozen _kwtuple - +Python/clinic/import.c.h _imp_source_hash _kwtuple - +Python/clinic/Python-tokenize.c.h tokenizeriter_new _kwtuple - +Python/clinic/sysmodule.c.h sys_addaudithook _kwtuple - +Python/clinic/sysmodule.c.h sys_set_coroutine_origin_tracking_depth _kwtuple - +Python/clinic/sysmodule.c.h sys_set_int_max_str_digits _kwtuple - + #----------------------- # cached - initialized once # manually cached PyUnicodeObject Python/ast_unparse.c - _str_replace_inf - -# holds strings +# holds statically-initialized strings Objects/typeobject.c - slotdefs - # other @@ -334,10 +399,10 @@ Python/import.c - extensions - Modules/getbuildinfo.c Py_GetBuildInfo buildinfo - # during init -Objects/typeobject.c - slotdefs_initialized - Objects/unicodeobject.c - bloom_linebreak - Python/bootstrap_hash.c - _Py_HashSecret_Initialized - Python/bootstrap_hash.c py_getrandom getrandom_works - +Python/initconfig.c - _Py_global_config_int_max_str_digits - Python/initconfig.c - Py_DebugFlag - Python/initconfig.c - Py_UTF8Mode - Python/initconfig.c - Py_DebugFlag - @@ -365,6 +430,8 @@ Python/sysmodule.c - _PySys_ImplName - Python/sysmodule.c - _preinit_warnoptions - Python/sysmodule.c - _preinit_xoptions - Python/thread.c - initialized - +Python/thread_pthread.h - condattr_monotonic - +Python/thread_pthread.h init_condattr ca - # set by embedders during init Python/initconfig.c - _Py_StandardStreamEncoding - @@ -376,6 +443,7 @@ Objects/floatobject.c - float_format - Objects/longobject.c PyLong_FromString log_base_BASE - Objects/longobject.c PyLong_FromString convwidth_base - Objects/longobject.c PyLong_FromString convmultmax_base - +Objects/perf_trampoline.c - perf_map_file - Objects/unicodeobject.c - ucnhash_capi - Parser/action_helpers.c _PyPegen_dummy_name cache - Python/dtoa.c - p5s - @@ -451,6 +519,10 @@ Objects/dictobject.c - next_dict_keys_version - Objects/funcobject.c - next_func_version - Objects/moduleobject.c - max_module_number - Objects/object.c - _Py_RefTotal - +Objects/perf_trampoline.c - perf_status - +Objects/perf_trampoline.c - extra_code_index - +Objects/perf_trampoline.c - code_arena - +Objects/perf_trampoline.c - trampoline_api - Objects/typeobject.c - next_version_tag - Objects/typeobject.c resolve_slotdups ptrs - Parser/pegen.c - memo_statistics - @@ -463,6 +535,7 @@ Python/import.c - import_lock_thread - Python/import.c import_find_and_load accumulated - Python/import.c import_find_and_load import_level - Python/modsupport.c - _Py_PackageContext - +Python/thread_pthread_stubs.h - py_tls_entries - Python/pyfpe.c - PyFPE_counter - Python/pylifecycle.c _Py_FatalErrorFormat reentrant - Python/pylifecycle.c - _Py_UnhandledKeyboardInterrupt - @@ -612,6 +685,24 @@ Modules/_ctypes/_ctypes.c - Union_Type - Modules/_ctypes/callbacks.c - PyCThunk_Type - Modules/_ctypes/callproc.c - PyCArg_Type - Modules/_ctypes/cfield.c - PyCField_Type - +Modules/_ctypes/ctypes.h - PyCArg_Type - +Modules/_ctypes/ctypes.h - PyCArrayType_Type - +Modules/_ctypes/ctypes.h - PyCArray_Type - +Modules/_ctypes/ctypes.h - PyCData_Type - +Modules/_ctypes/ctypes.h - PyCField_Type - +Modules/_ctypes/ctypes.h - PyCFuncPtrType_Type - +Modules/_ctypes/ctypes.h - PyCFuncPtr_Type - +Modules/_ctypes/ctypes.h - PyCPointerType_Type - +Modules/_ctypes/ctypes.h - PyCPointer_Type - +Modules/_ctypes/ctypes.h - PyCSimpleType_Type - +Modules/_ctypes/ctypes.h - PyCStgDict_Type - +Modules/_ctypes/ctypes.h - PyCStructType_Type - +Modules/_ctypes/ctypes.h - PyCThunk_Type - +Modules/_ctypes/ctypes.h - PyExc_ArgError - +Modules/_ctypes/ctypes.h - _ctypes_conversion_encoding - +Modules/_ctypes/ctypes.h - _ctypes_conversion_errors - +Modules/_ctypes/ctypes.h - _ctypes_ptrtype_cache - +Modules/_ctypes/ctypes.h - basespec_string - Modules/_ctypes/stgdict.c - PyCStgDict_Type - Modules/_cursesmodule.c - PyCursesWindow_Type - Modules/_datetimemodule.c - PyDateTime_DateTimeType - @@ -849,6 +940,27 @@ Modules/_decimal/_decimal.c - _py_float_abs - Modules/_decimal/_decimal.c - _py_long_bit_length - Modules/_decimal/_decimal.c - _py_float_as_integer_ratio - Modules/_elementtree.c - expat_capi - +Modules/cjkcodecs/_codecs_hk.c - big5_encmap - +Modules/cjkcodecs/_codecs_hk.c - big5_decmap - +Modules/cjkcodecs/_codecs_hk.c big5hkscs_codec_init initialized - +Modules/cjkcodecs/_codecs_iso2022.c - cp949_encmap - +Modules/cjkcodecs/_codecs_iso2022.c - ksx1001_decmap - +Modules/cjkcodecs/_codecs_iso2022.c - jisxcommon_encmap - +Modules/cjkcodecs/_codecs_iso2022.c - jisx0208_decmap - +Modules/cjkcodecs/_codecs_iso2022.c - jisx0212_decmap - +Modules/cjkcodecs/_codecs_iso2022.c - jisx0213_bmp_encmap - +Modules/cjkcodecs/_codecs_iso2022.c - jisx0213_1_bmp_decmap - +Modules/cjkcodecs/_codecs_iso2022.c - jisx0213_2_bmp_decmap - +Modules/cjkcodecs/_codecs_iso2022.c - jisx0213_emp_encmap - +Modules/cjkcodecs/_codecs_iso2022.c - jisx0213_1_emp_decmap - +Modules/cjkcodecs/_codecs_iso2022.c - jisx0213_2_emp_decmap - +Modules/cjkcodecs/_codecs_iso2022.c - gbcommon_encmap - +Modules/cjkcodecs/_codecs_iso2022.c - gb2312_decmap - +Modules/cjkcodecs/_codecs_iso2022.c ksx1001_init initialized - +Modules/cjkcodecs/_codecs_iso2022.c jisx0208_init initialized - +Modules/cjkcodecs/_codecs_iso2022.c jisx0212_init initialized - +Modules/cjkcodecs/_codecs_iso2022.c jisx0213_init initialized - +Modules/cjkcodecs/_codecs_iso2022.c gb2312_init initialized - Modules/cjkcodecs/cjkcodecs.h - codec_list - Modules/cjkcodecs/cjkcodecs.h - mapping_list - Modules/readline.c - libedit_append_replace_history_offset - diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index b6508a0c499..28c2325c263 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -77,6 +77,8 @@ Objects/object.c - _Py_GenericAliasIterType - Objects/object.c - _PyMemoryIter_Type - Objects/object.c - _PyLineIterator - Objects/object.c - _PyPositionsIterator - +Objects/perf_trampoline.c - _Py_trampoline_func_start - +Objects/perf_trampoline.c - _Py_trampoline_func_end - Python/importdl.h - _PyImport_DynLoadFiletab - Modules/expat/xmlrole.c - prolog0 - @@ -340,6 +342,7 @@ Modules/_xxtestfuzz/fuzzer.c LLVMFuzzerTestOneInput AST_LITERAL_EVAL_INITIALIZED #----------------------- # other vars that are actually constant +Include/internal/pycore_blocks_output_buffer.h - BUFFER_BLOCK_SIZE - Modules/_csv.c - quote_styles - Modules/_ctypes/cfield.c - ffi_type_double - Modules/_ctypes/cfield.c - ffi_type_float - @@ -371,6 +374,7 @@ Modules/_elementtree.c - ExpatMemoryHandler - Modules/_io/_iomodule.c - static_types - Modules/_io/textio.c - encodefuncs - Modules/_localemodule.c - langinfo_constants - +Modules/_pickle.c - READ_WHOLE_LINE - Modules/_sqlite/module.c - error_codes - Modules/_sre/sre.c pattern_repr flag_names - # XXX I'm pretty sure this is actually constant: @@ -385,6 +389,39 @@ Modules/_zoneinfo.c - DAYS_BEFORE_MONTH - Modules/_zoneinfo.c - DAYS_IN_MONTH - Modules/arraymodule.c - descriptors - Modules/arraymodule.c - emptybuf - +Modules/cjkcodecs/_codecs_cn.c - _mapping_list - +Modules/cjkcodecs/_codecs_cn.c - mapping_list - +Modules/cjkcodecs/_codecs_cn.c - _codec_list - +Modules/cjkcodecs/_codecs_cn.c - codec_list - +Modules/cjkcodecs/_codecs_hk.c - big5hkscs_pairenc_table - +Modules/cjkcodecs/_codecs_hk.c - _mapping_list - +Modules/cjkcodecs/_codecs_hk.c - mapping_list - +Modules/cjkcodecs/_codecs_hk.c - _codec_list - +Modules/cjkcodecs/_codecs_hk.c - codec_list - +Modules/cjkcodecs/_codecs_iso2022.c - iso2022_kr_config - +Modules/cjkcodecs/_codecs_iso2022.c - iso2022_jp_config - +Modules/cjkcodecs/_codecs_iso2022.c - iso2022_jp_1_config - +Modules/cjkcodecs/_codecs_iso2022.c - iso2022_jp_2_config - +Modules/cjkcodecs/_codecs_iso2022.c - iso2022_jp_2004_config - +Modules/cjkcodecs/_codecs_iso2022.c - iso2022_jp_3_config - +Modules/cjkcodecs/_codecs_iso2022.c - iso2022_jp_ext_config - +Modules/cjkcodecs/_codecs_iso2022.c - _mapping_list - +Modules/cjkcodecs/_codecs_iso2022.c - mapping_list - +Modules/cjkcodecs/_codecs_iso2022.c - _codec_list - +Modules/cjkcodecs/_codecs_iso2022.c - codec_list - +Modules/cjkcodecs/_codecs_jp.c - _mapping_list - +Modules/cjkcodecs/_codecs_jp.c - mapping_list - +Modules/cjkcodecs/_codecs_jp.c - _codec_list - +Modules/cjkcodecs/_codecs_jp.c - codec_list - +Modules/cjkcodecs/_codecs_kr.c - u2johabjamo - +Modules/cjkcodecs/_codecs_kr.c - _mapping_list - +Modules/cjkcodecs/_codecs_kr.c - mapping_list - +Modules/cjkcodecs/_codecs_kr.c - _codec_list - +Modules/cjkcodecs/_codecs_kr.c - codec_list - +Modules/cjkcodecs/_codecs_tw.c - _mapping_list - +Modules/cjkcodecs/_codecs_tw.c - mapping_list - +Modules/cjkcodecs/_codecs_tw.c - _codec_list - +Modules/cjkcodecs/_codecs_tw.c - codec_list - Modules/cjkcodecs/cjkcodecs.h - __methods - Modules/cmathmodule.c - acos_special_values - Modules/cmathmodule.c - acosh_special_values - @@ -404,6 +441,8 @@ Modules/nismodule.c - TIMEOUT - Modules/nismodule.c - aliases - Modules/ossaudiodev.c - control_labels - Modules/ossaudiodev.c - control_names - +Modules/posixmodule.c os_getxattr_impl buffer_sizes - +Modules/posixmodule.c os_listxattr_impl buffer_sizes - Modules/posixmodule.c - posix_constants_confstr - Modules/posixmodule.c - posix_constants_pathconf - Modules/posixmodule.c - posix_constants_sysconf - @@ -426,6 +465,7 @@ Objects/obmalloc.c - _PyMem_Debug - Objects/obmalloc.c - _PyMem_Raw - Objects/obmalloc.c - _PyObject - Objects/obmalloc.c - usedpools - +Objects/perf_trampoline.c - _Py_perfmap_callbacks - Objects/typeobject.c - name_op - Objects/unicodeobject.c - stripfuncnames - Objects/unicodeobject.c - utf7_category - @@ -437,6 +477,7 @@ Parser/parser.c - reserved_keywords - Parser/parser.c - soft_keywords - Parser/tokenizer.c - type_comment_prefix - Python/ast_opt.c fold_unaryop ops - +Python/ceval.c - binary_ops - Python/codecs.c - Py_hexdigits - Python/codecs.c - ucnhash_capi - Python/codecs.c _PyCodecRegistry_Init methods - @@ -456,6 +497,7 @@ Python/pyhash.c - PyHash_Func - Python/pylifecycle.c - _C_LOCALE_WARNING - Python/pylifecycle.c - _PyOS_mystrnicmp_hack - Python/pylifecycle.c - _TARGET_LOCALES - +Python/pystate.c - initial - Python/specialize.c - adaptive_opcodes - Python/specialize.c - cache_requirements - Python/specialize.c - compare_masks -