GH-115869: Make jit_stencils.h reproducible (GH-127166)

This commit is contained in:
Brandt Bucher 2024-11-24 14:42:50 -08:00 committed by GitHub
parent 307c633586
commit 17c16aea66
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 18 additions and 9 deletions

View File

@ -0,0 +1 @@
Make ``jit_stencils.h`` (which is produced during JIT builds) reproducible.

View File

@ -202,7 +202,8 @@ def pad(self, alignment: int) -> None:
"""Pad the stencil to the given alignment."""
offset = len(self.body)
padding = -offset % alignment
self.disassembly.append(f"{offset:x}: {' '.join(['00'] * padding)}")
if padding:
self.disassembly.append(f"{offset:x}: {' '.join(['00'] * padding)}")
self.body.extend([0] * padding)
def remove_jump(self, *, alignment: int = 1) -> None:

View File

@ -61,10 +61,11 @@ async def _parse(self, path: pathlib.Path) -> _stencils.StencilGroup:
args = ["--disassemble", "--reloc", f"{path}"]
output = await _llvm.maybe_run("llvm-objdump", args, echo=self.verbose)
if output is not None:
# Make sure that full paths don't leak out (for reproducibility):
long, short = str(path), str(path.name)
group.code.disassembly.extend(
line.expandtabs().strip()
line.expandtabs().strip().replace(long, short)
for line in output.splitlines()
if not line.isspace()
)
args = [
"--elf-output-style=JSON",
@ -90,9 +91,6 @@ async def _parse(self, path: pathlib.Path) -> _stencils.StencilGroup:
if group.data.body:
line = f"0: {str(bytes(group.data.body)).removeprefix('b')}"
group.data.disassembly.append(line)
group.process_relocations(
known_symbols=self.known_symbols, alignment=self.alignment
)
return group
def _handle_section(self, section: _S, group: _stencils.StencilGroup) -> None:
@ -122,6 +120,10 @@ async def _compile(
f"-I{CPYTHON / 'Tools' / 'jit'}",
"-O3",
"-c",
# Shorten full absolute file paths in the generated code (like the
# __FILE__ macro and assert failure messages) for reproducibility:
f"-ffile-prefix-map={CPYTHON}=.",
f"-ffile-prefix-map={tempdir}=.",
# This debug info isn't necessary, and bloats out the JIT'ed code.
# We *may* be able to re-enable this, process it, and JIT it for a
# nicer debugging experience... but that needs a lot more research:
@ -167,7 +169,12 @@ async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]:
c.write_text(template.replace("CASE", case))
coro = self._compile(opname, c, work)
tasks.append(group.create_task(coro, name=opname))
return {task.get_name(): task.result() for task in tasks}
stencil_groups = {task.get_name(): task.result() for task in tasks}
for stencil_group in stencil_groups.values():
stencil_group.process_relocations(
known_symbols=self.known_symbols, alignment=self.alignment
)
return stencil_groups
def build(
self, out: pathlib.Path, *, comment: str = "", force: bool = False

View File

@ -77,6 +77,6 @@ def dump(
groups: dict[str, _stencils.StencilGroup], symbols: dict[str, int]
) -> typing.Iterator[str]:
"""Yield a JIT compiler line-by-line as a C header file."""
for opname, group in sorted(groups.items()):
for opname, group in groups.items():
yield from _dump_stencil(opname, group)
yield from _dump_footer(groups, symbols)

View File

@ -8,7 +8,7 @@
import _targets
if __name__ == "__main__":
comment = f"$ {shlex.join([sys.executable] + sys.argv)}"
comment = f"$ {shlex.join([pathlib.Path(sys.executable).name] + sys.argv)}"
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"target", type=_targets.get_target, help="a PEP 11 target triple to compile for"