From c29bbe21018dc1602ea70f34621de67cce782ed2 Mon Sep 17 00:00:00 2001 From: Savannah Ostrowski Date: Wed, 30 Oct 2024 12:03:31 -0700 Subject: [PATCH] GH-125498: Update JIT builds to use LLVM 19 and preserve_none (GH-125499) --- .github/workflows/jit.yml | 15 ++++- ...-09-14-20-09-39.gh-issue-123714.o1mbe4.rst | 1 + ...-10-22-04-18-53.gh-issue-125498.cFjPIn.rst | 4 ++ Tools/jit/README.md | 21 +++--- Tools/jit/_llvm.py | 2 +- Tools/jit/_stencils.py | 14 +++- Tools/jit/_targets.py | 67 +++++-------------- Tools/jit/_writer.py | 2 +- Tools/jit/jit.h | 4 ++ Tools/jit/template.c | 8 ++- Tools/jit/trampoline.c | 9 ++- 11 files changed, 69 insertions(+), 78 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-09-14-20-09-39.gh-issue-123714.o1mbe4.rst create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-10-22-04-18-53.gh-issue-125498.cFjPIn.rst create mode 100644 Tools/jit/jit.h diff --git a/.github/workflows/jit.yml b/.github/workflows/jit.yml index 754f179f105..5fb599b232d 100644 --- a/.github/workflows/jit.yml +++ b/.github/workflows/jit.yml @@ -61,7 +61,7 @@ jobs: - true - false llvm: - - 18 + - 19 include: - target: i686-pc-windows-msvc/msvc architecture: Win32 @@ -121,10 +121,15 @@ jobs: choco install llvm --allow-downgrade --no-progress --version ${{ matrix.llvm }}.1.0 ./PCbuild/build.bat --experimental-jit ${{ matrix.debug && '-d' || '' }} -p ${{ matrix.architecture }} + # The `find` line is required as a result of https://github.com/actions/runner-images/issues/9966. + # This is a bug in the macOS runner image where the pre-installed Python is installed in the same + # directory as the Homebrew Python, which causes the build to fail for macos-13. This line removes + # the symlink to the pre-installed Python so that the Homebrew Python is used instead. - name: Native macOS if: runner.os == 'macOS' run: | brew update + find /usr/local/bin -lname '*/Library/Frameworks/Python.framework/*' -delete brew install llvm@${{ matrix.llvm }} SDKROOT="$(xcrun --show-sdk-path)" \ ./configure --enable-experimental-jit ${{ matrix.debug && '--with-pydebug' || '--enable-optimizations --with-lto' }} @@ -165,6 +170,10 @@ jobs: name: Free-Threaded (Debug) needs: interpreter runs-on: ubuntu-latest + strategy: + matrix: + llvm: + - 19 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -172,8 +181,8 @@ jobs: python-version: '3.11' - name: Build with JIT enabled and GIL disabled run: | - sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ./llvm.sh 18 - export PATH="$(llvm-config-18 --bindir):$PATH" + sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ./llvm.sh ${{ matrix.llvm }} + export PATH="$(llvm-config-${{ matrix.llvm }} --bindir):$PATH" ./configure --enable-experimental-jit --with-pydebug --disable-gil make all --jobs 4 - name: Run tests diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-09-14-20-09-39.gh-issue-123714.o1mbe4.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-14-20-09-39.gh-issue-123714.o1mbe4.rst new file mode 100644 index 00000000000..03a2ef63238 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-09-14-20-09-39.gh-issue-123714.o1mbe4.rst @@ -0,0 +1 @@ +Update JIT compilation to use LLVM 19 diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-10-22-04-18-53.gh-issue-125498.cFjPIn.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-22-04-18-53.gh-issue-125498.cFjPIn.rst new file mode 100644 index 00000000000..807c2e07210 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-10-22-04-18-53.gh-issue-125498.cFjPIn.rst @@ -0,0 +1,4 @@ +The JIT has been updated to leverage Clang 19’s new ``preserve_none`` attribute, +which supports more platforms and is more useful than LLVM's existing ``ghccc`` +calling convention. This also removes the need to manually patch the calling +convention in LLVM IR, simplifying the JIT compilation process. diff --git a/Tools/jit/README.md b/Tools/jit/README.md index bc6f793b296..801c64e4059 100644 --- a/Tools/jit/README.md +++ b/Tools/jit/README.md @@ -7,49 +7,46 @@ ## Installing LLVM The JIT compiler does not require end users to install any third-party dependencies, but part of it must be *built* using LLVM[^why-llvm]. You are *not* required to build the rest of CPython using LLVM, or even the same version of LLVM (in fact, this is uncommon). -LLVM version 18 is required. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-18`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code. +LLVM version 19 is required. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-19`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code. It's easy to install all of the required tools: ### Linux -Install LLVM 18 on Ubuntu/Debian: +Install LLVM 19 on Ubuntu/Debian: ```sh wget https://apt.llvm.org/llvm.sh chmod +x llvm.sh -sudo ./llvm.sh 18 +sudo ./llvm.sh 19 ``` -Install LLVM 18 on Fedora Linux 40 or newer: +Install LLVM 19 on Fedora Linux 40 or newer: ```sh -sudo dnf install 'clang(major) = 18' 'llvm(major) = 18' +sudo dnf install 'clang(major) = 19' 'llvm(major) = 19' ``` ### macOS -Install LLVM 18 with [Homebrew](https://brew.sh): +Install LLVM 19 with [Homebrew](https://brew.sh): ```sh -brew install llvm@18 +brew install llvm@19 ``` Homebrew won't add any of the tools to your `$PATH`. That's okay; the build script knows how to find them. ### Windows -Install LLVM 18 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=18), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".** +Install LLVM 19 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=19), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".** Alternatively, you can use [chocolatey](https://chocolatey.org): ```sh -choco install llvm --version=18.1.6 +choco install llvm --version=19.1.0 ``` -### Dev Containers - -If you are working CPython in a [Codespaces instance](https://devguide.python.org/getting-started/setup-building/#using-codespaces), there's no need to install LLVM as the Fedora 40 base image includes LLVM 18 out of the box. ## Building diff --git a/Tools/jit/_llvm.py b/Tools/jit/_llvm.py index 606f280a14d..925b56ac669 100644 --- a/Tools/jit/_llvm.py +++ b/Tools/jit/_llvm.py @@ -8,7 +8,7 @@ import subprocess import typing -_LLVM_VERSION = 18 +_LLVM_VERSION = 19 _LLVM_VERSION_PATTERN = re.compile(rf"version\s+{_LLVM_VERSION}\.\d+\.\d+\S*\s+") _P = typing.ParamSpec("_P") diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py index e4b2bf6e470..2cd051b0a77 100644 --- a/Tools/jit/_stencils.py +++ b/Tools/jit/_stencils.py @@ -2,6 +2,7 @@ import dataclasses import enum +import sys import typing import _schema @@ -132,8 +133,18 @@ class Hole: def __post_init__(self) -> None: self.func = _PATCH_FUNCS[self.kind] - def fold(self, other: typing.Self) -> typing.Self | None: + def fold(self, other: typing.Self, body: bytes) -> typing.Self | None: """Combine two holes into a single hole, if possible.""" + instruction_a = int.from_bytes( + body[self.offset : self.offset + 4], byteorder=sys.byteorder + ) + instruction_b = int.from_bytes( + body[other.offset : other.offset + 4], byteorder=sys.byteorder + ) + reg_a = instruction_a & 0b11111 + reg_b1 = instruction_b & 0b11111 + reg_b2 = (instruction_b >> 5) & 0b11111 + if ( self.offset + 4 == other.offset and self.value == other.value @@ -141,6 +152,7 @@ def fold(self, other: typing.Self) -> typing.Self | None: and self.addend == other.addend and self.func == "patch_aarch64_21rx" and other.func == "patch_aarch64_12x" + and reg_a == reg_b1 == reg_b2 ): # These can *only* be properly relaxed when they appear together and # patch the same value: diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 5eb316e782f..634208da3c8 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -26,7 +26,6 @@ PYTHON_EXECUTOR_CASES_C_H = CPYTHON / "Python" / "executor_cases.c.h" TOOLS_JIT_TEMPLATE_C = TOOLS_JIT / "template.c" - _S = typing.TypeVar("_S", _schema.COFFSection, _schema.ELFSection, _schema.MachOSection) _R = typing.TypeVar( "_R", _schema.COFFRelocation, _schema.ELFRelocation, _schema.MachORelocation @@ -39,7 +38,6 @@ class _Target(typing.Generic[_S, _R]): _: dataclasses.KW_ONLY alignment: int = 1 args: typing.Sequence[str] = () - ghccc: bool = False prefix: str = "" stable: bool = False debug: bool = False @@ -88,11 +86,7 @@ async def _parse(self, path: pathlib.Path) -> _stencils.StencilGroup: sections: list[dict[typing.Literal["Section"], _S]] = json.loads(output) for wrapped_section in sections: self._handle_section(wrapped_section["Section"], group) - # The trampoline's entry point is just named "_ENTRY", since on some - # platforms we later assume that any function starting with "_JIT_" uses - # the GHC calling convention: - entry_symbol = "_JIT_ENTRY" if "_JIT_ENTRY" in group.symbols else "_ENTRY" - assert group.symbols[entry_symbol] == (_stencils.HoleValue.CODE, 0) + assert group.symbols["_JIT_ENTRY"] == (_stencils.HoleValue.CODE, 0) if group.data.body: line = f"0: {str(bytes(group.data.body)).removeprefix('b')}" group.data.disassembly.append(line) @@ -112,9 +106,6 @@ def _handle_relocation( async def _compile( self, opname: str, c: pathlib.Path, tempdir: pathlib.Path ) -> _stencils.StencilGroup: - # "Compile" the trampoline to an empty stencil group if it's not needed: - if opname == "trampoline" and not self.ghccc: - return _stencils.StencilGroup() o = tempdir / f"{opname}.o" args = [ f"--target={self.triple}", @@ -128,6 +119,7 @@ async def _compile( f"-I{CPYTHON / 'Include' / 'internal'}", f"-I{CPYTHON / 'Include' / 'internal' / 'mimalloc'}", f"-I{CPYTHON / 'Python'}", + f"-I{CPYTHON / 'Tools' / 'jit'}", "-O3", "-c", # This debug info isn't necessary, and bloats out the JIT'ed code. @@ -143,44 +135,12 @@ async def _compile( # Don't call stack-smashing canaries that we can't find or patch: "-fno-stack-protector", "-std=c11", + "-o", + f"{o}", + f"{c}", *self.args, ] - if self.ghccc: - # This is a bit of an ugly workaround, but it makes the code much - # smaller and faster, so it's worth it. We want to use the GHC - # calling convention, but Clang doesn't support it. So, we *first* - # compile the code to LLVM IR, perform some text replacements on the - # IR to change the calling convention(!), and then compile *that*. - # Once we have access to Clang 19, we can get rid of this and use - # __attribute__((preserve_none)) directly in the C code instead: - ll = tempdir / f"{opname}.ll" - args_ll = args + [ - # -fomit-frame-pointer is necessary because the GHC calling - # convention uses RBP to pass arguments: - "-S", - "-emit-llvm", - "-fomit-frame-pointer", - "-o", - f"{ll}", - f"{c}", - ] - await _llvm.run("clang", args_ll, echo=self.verbose) - ir = ll.read_text() - # This handles declarations, definitions, and calls to named symbols - # starting with "_JIT_": - ir = re.sub( - r"(((noalias|nonnull|noundef) )*ptr @_JIT_\w+\()", r"ghccc \1", ir - ) - # This handles calls to anonymous callees, since anything with - # "musttail" needs to use the same calling convention: - ir = ir.replace("musttail call", "musttail call ghccc") - # Sometimes *both* replacements happen at the same site, so fix it: - ir = ir.replace("ghccc ghccc", "ghccc") - ll.write_text(ir) - args_o = args + ["-Wno-unused-command-line-argument", "-o", f"{o}", f"{ll}"] - else: - args_o = args + ["-o", f"{o}", f"{c}"] - await _llvm.run("clang", args_o, echo=self.verbose) + await _llvm.run("clang", args, echo=self.verbose) return await self._parse(o) async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]: @@ -519,7 +479,6 @@ def _handle_relocation( def get_target(host: str) -> _COFF | _ELF | _MachO: """Build a _Target for the given host "triple" and options.""" - # ghccc currently crashes Clang when combined with musttail on aarch64. :( target: _COFF | _ELF | _MachO if re.fullmatch(r"aarch64-apple-darwin.*", host): target = _MachO(host, alignment=8, prefix="_") @@ -535,16 +494,20 @@ def get_target(host: str) -> _COFF | _ELF | _MachO: ] target = _ELF(host, alignment=8, args=args) elif re.fullmatch(r"i686-pc-windows-msvc", host): - args = ["-DPy_NO_ENABLE_SHARED"] - target = _COFF(host, args=args, ghccc=True, prefix="_") + args = [ + "-DPy_NO_ENABLE_SHARED", + # __attribute__((preserve_none)) is not supported + "-Wno-ignored-attributes", + ] + target = _COFF(host, args=args, prefix="_") elif re.fullmatch(r"x86_64-apple-darwin.*", host): - target = _MachO(host, ghccc=True, prefix="_") + target = _MachO(host, prefix="_") elif re.fullmatch(r"x86_64-pc-windows-msvc", host): args = ["-fms-runtime-lib=dll"] - target = _COFF(host, args=args, ghccc=True) + target = _COFF(host, args=args) elif re.fullmatch(r"x86_64-.*-linux-gnu", host): args = ["-fpic"] - target = _ELF(host, args=args, ghccc=True) + target = _ELF(host, args=args) else: raise ValueError(host) return target diff --git a/Tools/jit/_writer.py b/Tools/jit/_writer.py index 4e7f614b0e9..f33d8ef322f 100644 --- a/Tools/jit/_writer.py +++ b/Tools/jit/_writer.py @@ -65,7 +65,7 @@ def _dump_stencil(opname: str, group: _stencils.StencilGroup) -> typing.Iterator if skip: skip = False continue - if pair and (folded := hole.fold(pair)): + if pair and (folded := hole.fold(pair, stencil.body)): skip = True hole = folded yield f" {hole.as_c(part)}" diff --git a/Tools/jit/jit.h b/Tools/jit/jit.h new file mode 100644 index 00000000000..47da64cb12b --- /dev/null +++ b/Tools/jit/jit.h @@ -0,0 +1,4 @@ +// To use preserve_none in JIT builds, we need to declare a separate function +// pointer with __attribute__((preserve_none)), since this attribute may not be +// supported by the compiler used to build the rest of the interpreter. +typedef jit_func __attribute__((preserve_none)) jit_func_preserve_none; diff --git a/Tools/jit/template.c b/Tools/jit/template.c index 6cf15085f79..57c1006ab42 100644 --- a/Tools/jit/template.c +++ b/Tools/jit/template.c @@ -21,6 +21,8 @@ #include "ceval_macros.h" +#include "jit.h" + #undef CURRENT_OPARG #define CURRENT_OPARG() (_oparg) @@ -49,7 +51,7 @@ do { \ OPT_STAT_INC(traces_executed); \ __attribute__((musttail)) \ - return ((jit_func)((EXECUTOR)->jit_side_entry))(frame, stack_pointer, tstate); \ + return ((jit_func_preserve_none)((EXECUTOR)->jit_side_entry))(frame, stack_pointer, tstate); \ } while (0) #undef GOTO_TIER_ONE @@ -72,7 +74,7 @@ do { \ do { \ PyAPI_DATA(void) ALIAS; \ __attribute__((musttail)) \ - return ((jit_func)&ALIAS)(frame, stack_pointer, tstate); \ + return ((jit_func_preserve_none)&ALIAS)(frame, stack_pointer, tstate); \ } while (0) #undef JUMP_TO_JUMP_TARGET @@ -86,7 +88,7 @@ do { \ #define TIER_TWO 2 -_Py_CODEUNIT * +__attribute__((preserve_none)) _Py_CODEUNIT * _JIT_ENTRY(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate) { // Locals that the instruction implementations expect to exist: diff --git a/Tools/jit/trampoline.c b/Tools/jit/trampoline.c index a0a963f2a49..f0cffa2f049 100644 --- a/Tools/jit/trampoline.c +++ b/Tools/jit/trampoline.c @@ -4,11 +4,10 @@ #include "pycore_frame.h" #include "pycore_jit.h" -// This is where the calling convention changes, on platforms that require it. -// The actual change is patched in while the JIT compiler is being built, in -// Tools/jit/_targets.py. On other platforms, this function compiles to nothing. +#include "jit.h" + _Py_CODEUNIT * -_ENTRY(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate) +_JIT_ENTRY(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate) { // This is subtle. The actual trace will return to us once it exits, so we // need to make sure that we stay alive until then. If our trace side-exits @@ -19,7 +18,7 @@ _ENTRY(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *ts Py_INCREF(executor); // Note that this is *not* a tail call: PyAPI_DATA(void) _JIT_CONTINUE; - _Py_CODEUNIT *target = ((jit_func)&_JIT_CONTINUE)(frame, stack_pointer, tstate); + _Py_CODEUNIT *target = ((jit_func_preserve_none)&_JIT_CONTINUE)(frame, stack_pointer, tstate); Py_SETREF(tstate->previous_executor, executor); return target; }