"""Target-specific code generation, parsing, and processing.""" import asyncio import dataclasses import hashlib import json import os import pathlib import re import sys import tempfile import typing import _llvm import _schema import _stencils import _writer if sys.version_info < (3, 11): raise RuntimeError("Building the JIT compiler requires Python 3.11 or newer!") TOOLS_JIT_BUILD = pathlib.Path(__file__).resolve() TOOLS_JIT = TOOLS_JIT_BUILD.parent TOOLS = TOOLS_JIT.parent CPYTHON = TOOLS.parent PYTHON_EXECUTOR_CASES_C_H = CPYTHON / "Python" / "executor_cases.c.h" TOOLS_JIT_TEMPLATE_C = TOOLS_JIT / "template.c" _S = typing.TypeVar("_S", _schema.COFFSection, _schema.ELFSection, _schema.MachOSection) _R = typing.TypeVar( "_R", _schema.COFFRelocation, _schema.ELFRelocation, _schema.MachORelocation ) @dataclasses.dataclass class _Target(typing.Generic[_S, _R]): triple: str _: dataclasses.KW_ONLY alignment: int = 1 args: typing.Sequence[str] = () ghccc: bool = False prefix: str = "" stable: bool = False debug: bool = False verbose: bool = False known_symbols: dict[str, int] = dataclasses.field(default_factory=dict) def _compute_digest(self, out: pathlib.Path) -> str: hasher = hashlib.sha256() hasher.update(self.triple.encode()) hasher.update(self.debug.to_bytes()) # These dependencies are also reflected in _JITSources in regen.targets: hasher.update(PYTHON_EXECUTOR_CASES_C_H.read_bytes()) hasher.update((out / "pyconfig.h").read_bytes()) for dirpath, _, filenames in sorted(os.walk(TOOLS_JIT)): for filename in filenames: hasher.update(pathlib.Path(dirpath, filename).read_bytes()) return hasher.hexdigest() async def _parse(self, path: pathlib.Path) -> _stencils.StencilGroup: group = _stencils.StencilGroup() args = ["--disassemble", "--reloc", f"{path}"] output = await _llvm.maybe_run("llvm-objdump", args, echo=self.verbose) if output is not None: group.code.disassembly.extend( line.expandtabs().strip() for line in output.splitlines() if not line.isspace() ) args = [ "--elf-output-style=JSON", "--expand-relocs", # "--pretty-print", "--section-data", "--section-relocations", "--section-symbols", "--sections", f"{path}", ] output = await _llvm.run("llvm-readobj", args, echo=self.verbose) # --elf-output-style=JSON is only *slightly* broken on Mach-O... output = output.replace("PrivateExtern\n", "\n") output = output.replace("Extern\n", "\n") # ...and also COFF: output = output[output.index("[", 1, None) :] output = output[: output.rindex("]", None, -1) + 1] sections: list[dict[typing.Literal["Section"], _S]] = json.loads(output) for wrapped_section in sections: self._handle_section(wrapped_section["Section"], group) # The trampoline's entry point is just named "_ENTRY", since on some # platforms we later assume that any function starting with "_JIT_" uses # the GHC calling convention: entry_symbol = "_JIT_ENTRY" if "_JIT_ENTRY" in group.symbols else "_ENTRY" assert group.symbols[entry_symbol] == (_stencils.HoleValue.CODE, 0) if group.data.body: line = f"0: {str(bytes(group.data.body)).removeprefix('b')}" group.data.disassembly.append(line) group.process_relocations( known_symbols=self.known_symbols, alignment=self.alignment ) return group def _handle_section(self, section: _S, group: _stencils.StencilGroup) -> None: raise NotImplementedError(type(self)) def _handle_relocation( self, base: int, relocation: _R, raw: bytes ) -> _stencils.Hole: raise NotImplementedError(type(self)) async def _compile( self, opname: str, c: pathlib.Path, tempdir: pathlib.Path ) -> _stencils.StencilGroup: # "Compile" the trampoline to an empty stencil group if it's not needed: if opname == "trampoline" and not self.ghccc: return _stencils.StencilGroup() o = tempdir / f"{opname}.o" args = [ f"--target={self.triple}", "-DPy_BUILD_CORE_MODULE", "-D_DEBUG" if self.debug else "-DNDEBUG", f"-D_JIT_OPCODE={opname}", "-D_PyJIT_ACTIVE", "-D_Py_JIT", "-I.", f"-I{CPYTHON / 'Include'}", f"-I{CPYTHON / 'Include' / 'internal'}", f"-I{CPYTHON / 'Include' / 'internal' / 'mimalloc'}", f"-I{CPYTHON / 'Python'}", "-O3", "-c", # This debug info isn't necessary, and bloats out the JIT'ed code. # We *may* be able to re-enable this, process it, and JIT it for a # nicer debugging experience... but that needs a lot more research: "-fno-asynchronous-unwind-tables", # Don't call built-in functions that we can't find or patch: "-fno-builtin", # Emit relaxable 64-bit calls/jumps, so we don't have to worry about # about emitting in-range trampolines for out-of-range targets. # We can probably remove this and emit trampolines in the future: "-fno-plt", # Don't call stack-smashing canaries that we can't find or patch: "-fno-stack-protector", "-std=c11", *self.args, ] if self.ghccc: # This is a bit of an ugly workaround, but it makes the code much # smaller and faster, so it's worth it. We want to use the GHC # calling convention, but Clang doesn't support it. So, we *first* # compile the code to LLVM IR, perform some text replacements on the # IR to change the calling convention(!), and then compile *that*. # Once we have access to Clang 19, we can get rid of this and use # __attribute__((preserve_none)) directly in the C code instead: ll = tempdir / f"{opname}.ll" args_ll = args + [ # -fomit-frame-pointer is necessary because the GHC calling # convention uses RBP to pass arguments: "-S", "-emit-llvm", "-fomit-frame-pointer", "-o", f"{ll}", f"{c}", ] await _llvm.run("clang", args_ll, echo=self.verbose) ir = ll.read_text() # This handles declarations, definitions, and calls to named symbols # starting with "_JIT_": ir = re.sub( r"(((noalias|nonnull|noundef) )*ptr @_JIT_\w+\()", r"ghccc \1", ir ) # This handles calls to anonymous callees, since anything with # "musttail" needs to use the same calling convention: ir = ir.replace("musttail call", "musttail call ghccc") # Sometimes *both* replacements happen at the same site, so fix it: ir = ir.replace("ghccc ghccc", "ghccc") ll.write_text(ir) args_o = args + ["-Wno-unused-command-line-argument", "-o", f"{o}", f"{ll}"] else: args_o = args + ["-o", f"{o}", f"{c}"] await _llvm.run("clang", args_o, echo=self.verbose) return await self._parse(o) async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]: generated_cases = PYTHON_EXECUTOR_CASES_C_H.read_text() cases_and_opnames = sorted( re.findall( r"\n {8}(case (\w+): \{\n.*?\n {8}\})", generated_cases, flags=re.DOTALL ) ) tasks = [] with tempfile.TemporaryDirectory() as tempdir: work = pathlib.Path(tempdir).resolve() async with asyncio.TaskGroup() as group: coro = self._compile("trampoline", TOOLS_JIT / "trampoline.c", work) tasks.append(group.create_task(coro, name="trampoline")) template = TOOLS_JIT_TEMPLATE_C.read_text() for case, opname in cases_and_opnames: # Write out a copy of the template with *only* this case # inserted. This is about twice as fast as #include'ing all # of executor_cases.c.h each time we compile (since the C # compiler wastes a bunch of time parsing the dead code for # all of the other cases): c = work / f"{opname}.c" c.write_text(template.replace("CASE", case)) coro = self._compile(opname, c, work) tasks.append(group.create_task(coro, name=opname)) return {task.get_name(): task.result() for task in tasks} def build( self, out: pathlib.Path, *, comment: str = "", force: bool = False ) -> None: """Build jit_stencils.h in the given directory.""" if not self.stable: warning = f"JIT support for {self.triple} is still experimental!" request = "Please report any issues you encounter.".center(len(warning)) outline = "=" * len(warning) print("\n".join(["", outline, warning, request, outline, ""])) digest = f"// {self._compute_digest(out)}\n" jit_stencils = out / "jit_stencils.h" if ( not force and jit_stencils.exists() and jit_stencils.read_text().startswith(digest) ): return stencil_groups = asyncio.run(self._build_stencils()) jit_stencils_new = out / "jit_stencils.h.new" try: with jit_stencils_new.open("w") as file: file.write(digest) if comment: file.write(f"// {comment}\n") file.write("\n") for line in _writer.dump(stencil_groups, self.known_symbols): file.write(f"{line}\n") try: jit_stencils_new.replace(jit_stencils) except FileNotFoundError: # another process probably already moved the file if not jit_stencils.is_file(): raise finally: jit_stencils_new.unlink(missing_ok=True) class _COFF( _Target[_schema.COFFSection, _schema.COFFRelocation] ): # pylint: disable = too-few-public-methods def _handle_section( self, section: _schema.COFFSection, group: _stencils.StencilGroup ) -> None: flags = {flag["Name"] for flag in section["Characteristics"]["Flags"]} if "SectionData" in section: section_data_bytes = section["SectionData"]["Bytes"] else: # Zeroed BSS data, seen with printf debugging calls: section_data_bytes = [0] * section["RawDataSize"] if "IMAGE_SCN_MEM_EXECUTE" in flags: value = _stencils.HoleValue.CODE stencil = group.code elif "IMAGE_SCN_MEM_READ" in flags: value = _stencils.HoleValue.DATA stencil = group.data else: return base = len(stencil.body) group.symbols[section["Number"]] = value, base stencil.body.extend(section_data_bytes) for wrapped_symbol in section["Symbols"]: symbol = wrapped_symbol["Symbol"] offset = base + symbol["Value"] name = symbol["Name"] name = name.removeprefix(self.prefix) if name not in group.symbols: group.symbols[name] = value, offset for wrapped_relocation in section["Relocations"]: relocation = wrapped_relocation["Relocation"] hole = self._handle_relocation(base, relocation, stencil.body) stencil.holes.append(hole) def _unwrap_dllimport(self, name: str) -> tuple[_stencils.HoleValue, str | None]: if name.startswith("__imp_"): name = name.removeprefix("__imp_") name = name.removeprefix(self.prefix) return _stencils.HoleValue.GOT, name name = name.removeprefix(self.prefix) return _stencils.symbol_to_value(name) def _handle_relocation( self, base: int, relocation: _schema.COFFRelocation, raw: bytes ) -> _stencils.Hole: match relocation: case { "Offset": offset, "Symbol": s, "Type": {"Name": "IMAGE_REL_I386_DIR32" as kind}, }: offset += base value, symbol = self._unwrap_dllimport(s) addend = int.from_bytes(raw[offset : offset + 4], "little") case { "Offset": offset, "Symbol": s, "Type": { "Name": "IMAGE_REL_AMD64_REL32" | "IMAGE_REL_I386_REL32" as kind }, }: offset += base value, symbol = self._unwrap_dllimport(s) addend = ( int.from_bytes(raw[offset : offset + 4], "little", signed=True) - 4 ) case { "Offset": offset, "Symbol": s, "Type": { "Name": "IMAGE_REL_ARM64_BRANCH26" | "IMAGE_REL_ARM64_PAGEBASE_REL21" | "IMAGE_REL_ARM64_PAGEOFFSET_12A" | "IMAGE_REL_ARM64_PAGEOFFSET_12L" as kind }, }: offset += base value, symbol = self._unwrap_dllimport(s) addend = 0 case _: raise NotImplementedError(relocation) return _stencils.Hole(offset, kind, value, symbol, addend) class _ELF( _Target[_schema.ELFSection, _schema.ELFRelocation] ): # pylint: disable = too-few-public-methods def _handle_section( self, section: _schema.ELFSection, group: _stencils.StencilGroup ) -> None: section_type = section["Type"]["Name"] flags = {flag["Name"] for flag in section["Flags"]["Flags"]} if section_type == "SHT_RELA": assert "SHF_INFO_LINK" in flags, flags assert not section["Symbols"] value, base = group.symbols[section["Info"]] if value is _stencils.HoleValue.CODE: stencil = group.code else: assert value is _stencils.HoleValue.DATA stencil = group.data for wrapped_relocation in section["Relocations"]: relocation = wrapped_relocation["Relocation"] hole = self._handle_relocation(base, relocation, stencil.body) stencil.holes.append(hole) elif section_type == "SHT_PROGBITS": if "SHF_ALLOC" not in flags: return if "SHF_EXECINSTR" in flags: value = _stencils.HoleValue.CODE stencil = group.code else: value = _stencils.HoleValue.DATA stencil = group.data group.symbols[section["Index"]] = value, len(stencil.body) for wrapped_symbol in section["Symbols"]: symbol = wrapped_symbol["Symbol"] offset = len(stencil.body) + symbol["Value"] name = symbol["Name"]["Name"] name = name.removeprefix(self.prefix) group.symbols[name] = value, offset stencil.body.extend(section["SectionData"]["Bytes"]) assert not section["Relocations"] else: assert section_type in { "SHT_GROUP", "SHT_LLVM_ADDRSIG", "SHT_NOTE", "SHT_NULL", "SHT_STRTAB", "SHT_SYMTAB", }, section_type def _handle_relocation( self, base: int, relocation: _schema.ELFRelocation, raw: bytes ) -> _stencils.Hole: symbol: str | None match relocation: case { "Addend": addend, "Offset": offset, "Symbol": {"Name": s}, "Type": { "Name": "R_AARCH64_ADR_GOT_PAGE" | "R_AARCH64_LD64_GOT_LO12_NC" | "R_X86_64_GOTPCREL" | "R_X86_64_GOTPCRELX" | "R_X86_64_REX_GOTPCRELX" as kind }, }: offset += base s = s.removeprefix(self.prefix) value, symbol = _stencils.HoleValue.GOT, s case { "Addend": addend, "Offset": offset, "Symbol": {"Name": s}, "Type": {"Name": kind}, }: offset += base s = s.removeprefix(self.prefix) value, symbol = _stencils.symbol_to_value(s) case _: raise NotImplementedError(relocation) return _stencils.Hole(offset, kind, value, symbol, addend) class _MachO( _Target[_schema.MachOSection, _schema.MachORelocation] ): # pylint: disable = too-few-public-methods def _handle_section( self, section: _schema.MachOSection, group: _stencils.StencilGroup ) -> None: assert section["Address"] >= len(group.code.body) assert "SectionData" in section flags = {flag["Name"] for flag in section["Attributes"]["Flags"]} name = section["Name"]["Value"] name = name.removeprefix(self.prefix) if "Debug" in flags: return if "SomeInstructions" in flags: value = _stencils.HoleValue.CODE stencil = group.code start_address = 0 group.symbols[name] = value, section["Address"] - start_address else: value = _stencils.HoleValue.DATA stencil = group.data start_address = len(group.code.body) group.symbols[name] = value, len(group.code.body) base = section["Address"] - start_address group.symbols[section["Index"]] = value, base stencil.body.extend( [0] * (section["Address"] - len(group.code.body) - len(group.data.body)) ) stencil.body.extend(section["SectionData"]["Bytes"]) assert "Symbols" in section for wrapped_symbol in section["Symbols"]: symbol = wrapped_symbol["Symbol"] offset = symbol["Value"] - start_address name = symbol["Name"]["Name"] name = name.removeprefix(self.prefix) group.symbols[name] = value, offset assert "Relocations" in section for wrapped_relocation in section["Relocations"]: relocation = wrapped_relocation["Relocation"] hole = self._handle_relocation(base, relocation, stencil.body) stencil.holes.append(hole) def _handle_relocation( self, base: int, relocation: _schema.MachORelocation, raw: bytes ) -> _stencils.Hole: symbol: str | None match relocation: case { "Offset": offset, "Symbol": {"Name": s}, "Type": { "Name": "ARM64_RELOC_GOT_LOAD_PAGE21" | "ARM64_RELOC_GOT_LOAD_PAGEOFF12" as kind }, }: offset += base s = s.removeprefix(self.prefix) value, symbol = _stencils.HoleValue.GOT, s addend = 0 case { "Offset": offset, "Symbol": {"Name": s}, "Type": {"Name": "X86_64_RELOC_GOT" | "X86_64_RELOC_GOT_LOAD" as kind}, }: offset += base s = s.removeprefix(self.prefix) value, symbol = _stencils.HoleValue.GOT, s addend = ( int.from_bytes(raw[offset : offset + 4], "little", signed=True) - 4 ) case { "Offset": offset, "Section": {"Name": s}, "Type": {"Name": "X86_64_RELOC_SIGNED" as kind}, } | { "Offset": offset, "Symbol": {"Name": s}, "Type": {"Name": "X86_64_RELOC_BRANCH" | "X86_64_RELOC_SIGNED" as kind}, }: offset += base s = s.removeprefix(self.prefix) value, symbol = _stencils.symbol_to_value(s) addend = ( int.from_bytes(raw[offset : offset + 4], "little", signed=True) - 4 ) case { "Offset": offset, "Section": {"Name": s}, "Type": {"Name": kind}, } | { "Offset": offset, "Symbol": {"Name": s}, "Type": {"Name": kind}, }: offset += base s = s.removeprefix(self.prefix) value, symbol = _stencils.symbol_to_value(s) addend = 0 case _: raise NotImplementedError(relocation) return _stencils.Hole(offset, kind, value, symbol, addend) def get_target(host: str) -> _COFF | _ELF | _MachO: """Build a _Target for the given host "triple" and options.""" # ghccc currently crashes Clang when combined with musttail on aarch64. :( target: _COFF | _ELF | _MachO if re.fullmatch(r"aarch64-apple-darwin.*", host): target = _MachO(host, alignment=8, prefix="_") elif re.fullmatch(r"aarch64-pc-windows-msvc", host): args = ["-fms-runtime-lib=dll"] target = _COFF(host, alignment=8, args=args) elif re.fullmatch(r"aarch64-.*-linux-gnu", host): args = [ "-fpic", # On aarch64 Linux, intrinsics were being emitted and this flag # was required to disable them. "-mno-outline-atomics", ] target = _ELF(host, alignment=8, args=args) elif re.fullmatch(r"i686-pc-windows-msvc", host): args = ["-DPy_NO_ENABLE_SHARED"] target = _COFF(host, args=args, ghccc=True, prefix="_") elif re.fullmatch(r"x86_64-apple-darwin.*", host): target = _MachO(host, ghccc=True, prefix="_") elif re.fullmatch(r"x86_64-pc-windows-msvc", host): args = ["-fms-runtime-lib=dll"] target = _COFF(host, args=args, ghccc=True) elif re.fullmatch(r"x86_64-.*-linux-gnu", host): args = ["-fpic"] target = _ELF(host, args=args, ghccc=True) else: raise ValueError(host) return target