From 17c16aea66b606d66f71ae9af381bc34d0ef3f5f Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Sun, 24 Nov 2024 14:42:50 -0800 Subject: [PATCH] GH-115869: Make jit_stencils.h reproducible (GH-127166) --- ...-11-22-08-46-46.gh-issue-115869.UVLSKd.rst | 1 + Tools/jit/_stencils.py | 3 ++- Tools/jit/_targets.py | 19 +++++++++++++------ Tools/jit/_writer.py | 2 +- Tools/jit/build.py | 2 +- 5 files changed, 18 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2024-11-22-08-46-46.gh-issue-115869.UVLSKd.rst diff --git a/Misc/NEWS.d/next/Build/2024-11-22-08-46-46.gh-issue-115869.UVLSKd.rst b/Misc/NEWS.d/next/Build/2024-11-22-08-46-46.gh-issue-115869.UVLSKd.rst new file mode 100644 index 00000000000..9e8a078983f --- /dev/null +++ b/Misc/NEWS.d/next/Build/2024-11-22-08-46-46.gh-issue-115869.UVLSKd.rst @@ -0,0 +1 @@ +Make ``jit_stencils.h`` (which is produced during JIT builds) reproducible. diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py index 61be8fd3bbd..ee761a73fa8 100644 --- a/Tools/jit/_stencils.py +++ b/Tools/jit/_stencils.py @@ -202,7 +202,8 @@ def pad(self, alignment: int) -> None: """Pad the stencil to the given alignment.""" offset = len(self.body) padding = -offset % alignment - self.disassembly.append(f"{offset:x}: {' '.join(['00'] * padding)}") + if padding: + self.disassembly.append(f"{offset:x}: {' '.join(['00'] * padding)}") self.body.extend([0] * padding) def remove_jump(self, *, alignment: int = 1) -> None: diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index d8dce0a905c..d23ced19842 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -61,10 +61,11 @@ async def _parse(self, path: pathlib.Path) -> _stencils.StencilGroup: args = ["--disassemble", "--reloc", f"{path}"] output = await _llvm.maybe_run("llvm-objdump", args, echo=self.verbose) if output is not None: + # Make sure that full paths don't leak out (for reproducibility): + long, short = str(path), str(path.name) group.code.disassembly.extend( - line.expandtabs().strip() + line.expandtabs().strip().replace(long, short) for line in output.splitlines() - if not line.isspace() ) args = [ "--elf-output-style=JSON", @@ -90,9 +91,6 @@ async def _parse(self, path: pathlib.Path) -> _stencils.StencilGroup: if group.data.body: line = f"0: {str(bytes(group.data.body)).removeprefix('b')}" group.data.disassembly.append(line) - group.process_relocations( - known_symbols=self.known_symbols, alignment=self.alignment - ) return group def _handle_section(self, section: _S, group: _stencils.StencilGroup) -> None: @@ -122,6 +120,10 @@ async def _compile( f"-I{CPYTHON / 'Tools' / 'jit'}", "-O3", "-c", + # Shorten full absolute file paths in the generated code (like the + # __FILE__ macro and assert failure messages) for reproducibility: + f"-ffile-prefix-map={CPYTHON}=.", + f"-ffile-prefix-map={tempdir}=.", # This debug info isn't necessary, and bloats out the JIT'ed code. # We *may* be able to re-enable this, process it, and JIT it for a # nicer debugging experience... but that needs a lot more research: @@ -167,7 +169,12 @@ async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]: c.write_text(template.replace("CASE", case)) coro = self._compile(opname, c, work) tasks.append(group.create_task(coro, name=opname)) - return {task.get_name(): task.result() for task in tasks} + stencil_groups = {task.get_name(): task.result() for task in tasks} + for stencil_group in stencil_groups.values(): + stencil_group.process_relocations( + known_symbols=self.known_symbols, alignment=self.alignment + ) + return stencil_groups def build( self, out: pathlib.Path, *, comment: str = "", force: bool = False diff --git a/Tools/jit/_writer.py b/Tools/jit/_writer.py index 81a9f08db31..5588784544e 100644 --- a/Tools/jit/_writer.py +++ b/Tools/jit/_writer.py @@ -77,6 +77,6 @@ def dump( groups: dict[str, _stencils.StencilGroup], symbols: dict[str, int] ) -> typing.Iterator[str]: """Yield a JIT compiler line-by-line as a C header file.""" - for opname, group in sorted(groups.items()): + for opname, group in groups.items(): yield from _dump_stencil(opname, group) yield from _dump_footer(groups, symbols) diff --git a/Tools/jit/build.py b/Tools/jit/build.py index 4a23c6f0afa..a8cb0f67c36 100644 --- a/Tools/jit/build.py +++ b/Tools/jit/build.py @@ -8,7 +8,7 @@ import _targets if __name__ == "__main__": - comment = f"$ {shlex.join([sys.executable] + sys.argv)}" + comment = f"$ {shlex.join([pathlib.Path(sys.executable).name] + sys.argv)}" parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( "target", type=_targets.get_target, help="a PEP 11 target triple to compile for"