From 51185923a8dfdb59fc04f235fd19881d10d65acf Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 14 Aug 2024 07:53:46 -0700 Subject: [PATCH] GH-113464: Speed up JIT builds (GH-122839) --- Tools/jit/_targets.py | 18 +++++++++++++++--- Tools/jit/template.c | 6 ++++-- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 73d10a12875..e37ee943999 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -182,15 +182,27 @@ async def _compile( async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]: generated_cases = PYTHON_EXECUTOR_CASES_C_H.read_text() - opnames = sorted(re.findall(r"\n {8}case (\w+): \{\n", generated_cases)) + cases_and_opnames = sorted( + re.findall( + r"\n {8}(case (\w+): \{\n.*?\n {8}\})", generated_cases, flags=re.DOTALL + ) + ) tasks = [] with tempfile.TemporaryDirectory() as tempdir: work = pathlib.Path(tempdir).resolve() async with asyncio.TaskGroup() as group: coro = self._compile("trampoline", TOOLS_JIT / "trampoline.c", work) tasks.append(group.create_task(coro, name="trampoline")) - for opname in opnames: - coro = self._compile(opname, TOOLS_JIT_TEMPLATE_C, work) + template = TOOLS_JIT_TEMPLATE_C.read_text() + for case, opname in cases_and_opnames: + # Write out a copy of the template with *only* this case + # inserted. This is about twice as fast as #include'ing all + # of executor_cases.c.h each time we compile (since the C + # compiler wastes a bunch of time parsing the dead code for + # all of the other cases): + c = work / f"{opname}.c" + c.write_text(template.replace("CASE", case)) + coro = self._compile(opname, c, work) tasks.append(group.create_task(coro, name=opname)) return {task.get_name(): task.result() for task in tasks} diff --git a/Tools/jit/template.c b/Tools/jit/template.c index ec7d033e89d..6cf15085f79 100644 --- a/Tools/jit/template.c +++ b/Tools/jit/template.c @@ -84,6 +84,8 @@ do { \ #undef WITHIN_STACK_BOUNDS #define WITHIN_STACK_BOUNDS() 1 +#define TIER_TWO 2 + _Py_CODEUNIT * _JIT_ENTRY(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate) { @@ -107,9 +109,9 @@ _JIT_ENTRY(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState OPT_STAT_INC(uops_executed); UOP_STAT_INC(uopcode, execution_count); - // The actual instruction definitions (only one will be used): switch (uopcode) { -#include "executor_cases.c.h" + // The actual instruction definition gets inserted here: + CASE default: Py_UNREACHABLE(); }