From 193890c1ccab4b398a218c6c8e91831477aa2ebb Mon Sep 17 00:00:00 2001 From: mpage <mpage@meta.com> Date: Mon, 25 Nov 2024 16:53:49 -0800 Subject: [PATCH] gh-126612: Include stack effects of uops when computing maximum stack depth (#126894) --- Include/internal/pycore_opcode_metadata.h | 925 ++++++++++++++++++ Lib/test/test_generated_cases.py | 179 +++- Python/flowgraph.c | 66 +- .../opcode_metadata_generator.py | 98 +- Tools/cases_generator/stack.py | 60 +- 5 files changed, 1281 insertions(+), 47 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 53280875f10..5ce172856e1 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -949,6 +949,931 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { #endif +extern int _PyOpcode_max_stack_effect(int opcode, int oparg, int *effect); +#ifdef NEED_OPCODE_METADATA +int _PyOpcode_max_stack_effect(int opcode, int oparg, int *effect) { + switch(opcode) { + case BINARY_OP: { + *effect = 0; + return 0; + } + case BINARY_OP_ADD_FLOAT: { + *effect = 0; + return 0; + } + case BINARY_OP_ADD_INT: { + *effect = 0; + return 0; + } + case BINARY_OP_ADD_UNICODE: { + *effect = 0; + return 0; + } + case BINARY_OP_INPLACE_ADD_UNICODE: { + *effect = 0; + return 0; + } + case BINARY_OP_MULTIPLY_FLOAT: { + *effect = 0; + return 0; + } + case BINARY_OP_MULTIPLY_INT: { + *effect = 0; + return 0; + } + case BINARY_OP_SUBTRACT_FLOAT: { + *effect = 0; + return 0; + } + case BINARY_OP_SUBTRACT_INT: { + *effect = 0; + return 0; + } + case BINARY_SLICE: { + *effect = 0; + return 0; + } + case BINARY_SUBSCR: { + *effect = 0; + return 0; + } + case BINARY_SUBSCR_DICT: { + *effect = -1; + return 0; + } + case BINARY_SUBSCR_GETITEM: { + *effect = 0; + return 0; + } + case BINARY_SUBSCR_LIST_INT: { + *effect = -1; + return 0; + } + case BINARY_SUBSCR_STR_INT: { + *effect = -1; + return 0; + } + case BINARY_SUBSCR_TUPLE_INT: { + *effect = -1; + return 0; + } + case BUILD_LIST: { + *effect = 1 - oparg; + return 0; + } + case BUILD_MAP: { + *effect = 1 - oparg*2; + return 0; + } + case BUILD_SET: { + *effect = 1 - oparg; + return 0; + } + case BUILD_SLICE: { + *effect = -1 - ((oparg == 3) ? 1 : 0); + return 0; + } + case BUILD_STRING: { + *effect = 1 - oparg; + return 0; + } + case BUILD_TUPLE: { + *effect = 1 - oparg; + return 0; + } + case CACHE: { + *effect = 0; + return 0; + } + case CALL: { + int max_eff = Py_MAX(0, -1 - oparg); + max_eff = Py_MAX(max_eff, -2 - oparg); + *effect = max_eff; + return 0; + } + case CALL_ALLOC_AND_ENTER_INIT: { + int max_eff = Py_MAX(0, -1 - oparg); + max_eff = Py_MAX(max_eff, -2 - oparg); + *effect = max_eff; + return 0; + } + case CALL_BOUND_METHOD_EXACT_ARGS: { + int max_eff = Py_MAX(0, -1 - oparg); + max_eff = Py_MAX(max_eff, -2 - oparg); + *effect = max_eff; + return 0; + } + case CALL_BOUND_METHOD_GENERAL: { + int max_eff = Py_MAX(0, -1 - oparg); + max_eff = Py_MAX(max_eff, -2 - oparg); + *effect = max_eff; + return 0; + } + case CALL_BUILTIN_CLASS: { + *effect = -1 - oparg; + return 0; + } + case CALL_BUILTIN_FAST: { + *effect = -1 - oparg; + return 0; + } + case CALL_BUILTIN_FAST_WITH_KEYWORDS: { + *effect = -1 - oparg; + return 0; + } + case CALL_BUILTIN_O: { + *effect = -1 - oparg; + return 0; + } + case CALL_FUNCTION_EX: { + *effect = Py_MAX(0, -2 - (oparg & 1)); + return 0; + } + case CALL_INTRINSIC_1: { + *effect = 0; + return 0; + } + case CALL_INTRINSIC_2: { + *effect = -1; + return 0; + } + case CALL_ISINSTANCE: { + *effect = -1 - oparg; + return 0; + } + case CALL_KW: { + int max_eff = Py_MAX(0, -2 - oparg); + max_eff = Py_MAX(max_eff, -3 - oparg); + *effect = max_eff; + return 0; + } + case CALL_KW_BOUND_METHOD: { + int max_eff = Py_MAX(0, -2 - oparg); + max_eff = Py_MAX(max_eff, -3 - oparg); + *effect = max_eff; + return 0; + } + case CALL_KW_NON_PY: { + *effect = Py_MAX(0, -2 - oparg); + return 0; + } + case CALL_KW_PY: { + int max_eff = Py_MAX(0, -2 - oparg); + max_eff = Py_MAX(max_eff, -3 - oparg); + *effect = max_eff; + return 0; + } + case CALL_LEN: { + *effect = -1 - oparg; + return 0; + } + case CALL_LIST_APPEND: { + *effect = -3; + return 0; + } + case CALL_METHOD_DESCRIPTOR_FAST: { + *effect = -1 - oparg; + return 0; + } + case CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: { + *effect = -1 - oparg; + return 0; + } + case CALL_METHOD_DESCRIPTOR_NOARGS: { + *effect = -1 - oparg; + return 0; + } + case CALL_METHOD_DESCRIPTOR_O: { + *effect = -1 - oparg; + return 0; + } + case CALL_NON_PY_GENERAL: { + *effect = Py_MAX(0, -1 - oparg); + return 0; + } + case CALL_PY_EXACT_ARGS: { + int max_eff = Py_MAX(0, -1 - oparg); + max_eff = Py_MAX(max_eff, -2 - oparg); + *effect = max_eff; + return 0; + } + case CALL_PY_GENERAL: { + int max_eff = Py_MAX(0, -1 - oparg); + max_eff = Py_MAX(max_eff, -2 - oparg); + *effect = max_eff; + return 0; + } + case CALL_STR_1: { + *effect = -2; + return 0; + } + case CALL_TUPLE_1: { + *effect = -2; + return 0; + } + case CALL_TYPE_1: { + *effect = -2; + return 0; + } + case CHECK_EG_MATCH: { + *effect = 0; + return 0; + } + case CHECK_EXC_MATCH: { + *effect = 0; + return 0; + } + case CLEANUP_THROW: { + *effect = -1; + return 0; + } + case COMPARE_OP: { + *effect = 0; + return 0; + } + case COMPARE_OP_FLOAT: { + *effect = 0; + return 0; + } + case COMPARE_OP_INT: { + *effect = 0; + return 0; + } + case COMPARE_OP_STR: { + *effect = 0; + return 0; + } + case CONTAINS_OP: { + *effect = 0; + return 0; + } + case CONTAINS_OP_DICT: { + *effect = -1; + return 0; + } + case CONTAINS_OP_SET: { + *effect = -1; + return 0; + } + case CONVERT_VALUE: { + *effect = 0; + return 0; + } + case COPY: { + *effect = 1; + return 0; + } + case COPY_FREE_VARS: { + *effect = 0; + return 0; + } + case DELETE_ATTR: { + *effect = -1; + return 0; + } + case DELETE_DEREF: { + *effect = 0; + return 0; + } + case DELETE_FAST: { + *effect = 0; + return 0; + } + case DELETE_GLOBAL: { + *effect = 0; + return 0; + } + case DELETE_NAME: { + *effect = 0; + return 0; + } + case DELETE_SUBSCR: { + *effect = -2; + return 0; + } + case DICT_MERGE: { + *effect = -1; + return 0; + } + case DICT_UPDATE: { + *effect = -1; + return 0; + } + case END_ASYNC_FOR: { + *effect = -2; + return 0; + } + case END_FOR: { + *effect = -1; + return 0; + } + case END_SEND: { + *effect = -1; + return 0; + } + case ENTER_EXECUTOR: { + *effect = 0; + return 0; + } + case EXIT_INIT_CHECK: { + *effect = -1; + return 0; + } + case EXTENDED_ARG: { + *effect = 0; + return 0; + } + case FORMAT_SIMPLE: { + *effect = 0; + return 0; + } + case FORMAT_WITH_SPEC: { + *effect = -1; + return 0; + } + case FOR_ITER: { + *effect = 1; + return 0; + } + case FOR_ITER_GEN: { + *effect = 1; + return 0; + } + case FOR_ITER_LIST: { + *effect = 1; + return 0; + } + case FOR_ITER_RANGE: { + *effect = 1; + return 0; + } + case FOR_ITER_TUPLE: { + *effect = 1; + return 0; + } + case GET_AITER: { + *effect = 0; + return 0; + } + case GET_ANEXT: { + *effect = 1; + return 0; + } + case GET_AWAITABLE: { + *effect = 0; + return 0; + } + case GET_ITER: { + *effect = 0; + return 0; + } + case GET_LEN: { + *effect = 1; + return 0; + } + case GET_YIELD_FROM_ITER: { + *effect = 0; + return 0; + } + case IMPORT_FROM: { + *effect = 1; + return 0; + } + case IMPORT_NAME: { + *effect = -1; + return 0; + } + case INSTRUMENTED_CALL: { + *effect = Py_MAX(0, -1 - oparg); + return 0; + } + case INSTRUMENTED_CALL_FUNCTION_EX: { + *effect = 0; + return 0; + } + case INSTRUMENTED_CALL_KW: { + *effect = 0; + return 0; + } + case INSTRUMENTED_END_FOR: { + *effect = -1; + return 0; + } + case INSTRUMENTED_END_SEND: { + *effect = -1; + return 0; + } + case INSTRUMENTED_FOR_ITER: { + *effect = 0; + return 0; + } + case INSTRUMENTED_INSTRUCTION: { + *effect = 0; + return 0; + } + case INSTRUMENTED_JUMP_BACKWARD: { + *effect = 0; + return 0; + } + case INSTRUMENTED_JUMP_FORWARD: { + *effect = 0; + return 0; + } + case INSTRUMENTED_LINE: { + *effect = 0; + return 0; + } + case INSTRUMENTED_LOAD_SUPER_ATTR: { + *effect = 0; + return 0; + } + case INSTRUMENTED_POP_JUMP_IF_FALSE: { + *effect = 0; + return 0; + } + case INSTRUMENTED_POP_JUMP_IF_NONE: { + *effect = 0; + return 0; + } + case INSTRUMENTED_POP_JUMP_IF_NOT_NONE: { + *effect = 0; + return 0; + } + case INSTRUMENTED_POP_JUMP_IF_TRUE: { + *effect = 0; + return 0; + } + case INSTRUMENTED_RESUME: { + *effect = 0; + return 0; + } + case INSTRUMENTED_RETURN_VALUE: { + *effect = 0; + return 0; + } + case INSTRUMENTED_YIELD_VALUE: { + *effect = 0; + return 0; + } + case INTERPRETER_EXIT: { + *effect = -1; + return 0; + } + case IS_OP: { + *effect = -1; + return 0; + } + case JUMP: { + *effect = 0; + return 0; + } + case JUMP_BACKWARD: { + *effect = 0; + return 0; + } + case JUMP_BACKWARD_NO_INTERRUPT: { + *effect = 0; + return 0; + } + case JUMP_FORWARD: { + *effect = 0; + return 0; + } + case JUMP_IF_FALSE: { + *effect = 0; + return 0; + } + case JUMP_IF_TRUE: { + *effect = 0; + return 0; + } + case JUMP_NO_INTERRUPT: { + *effect = 0; + return 0; + } + case LIST_APPEND: { + *effect = -1; + return 0; + } + case LIST_EXTEND: { + *effect = -1; + return 0; + } + case LOAD_ATTR: { + *effect = Py_MAX(1, (oparg & 1)); + return 0; + } + case LOAD_ATTR_CLASS: { + *effect = Py_MAX(0, (oparg & 1)); + return 0; + } + case LOAD_ATTR_CLASS_WITH_METACLASS_CHECK: { + *effect = Py_MAX(0, (oparg & 1)); + return 0; + } + case LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN: { + *effect = 0; + return 0; + } + case LOAD_ATTR_INSTANCE_VALUE: { + *effect = Py_MAX(0, (oparg & 1)); + return 0; + } + case LOAD_ATTR_METHOD_LAZY_DICT: { + *effect = 1; + return 0; + } + case LOAD_ATTR_METHOD_NO_DICT: { + *effect = 1; + return 0; + } + case LOAD_ATTR_METHOD_WITH_VALUES: { + *effect = 1; + return 0; + } + case LOAD_ATTR_MODULE: { + *effect = Py_MAX(0, (oparg & 1)); + return 0; + } + case LOAD_ATTR_NONDESCRIPTOR_NO_DICT: { + *effect = 0; + return 0; + } + case LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: { + *effect = 0; + return 0; + } + case LOAD_ATTR_PROPERTY: { + *effect = 0; + return 0; + } + case LOAD_ATTR_SLOT: { + *effect = Py_MAX(0, (oparg & 1)); + return 0; + } + case LOAD_ATTR_WITH_HINT: { + *effect = Py_MAX(0, (oparg & 1)); + return 0; + } + case LOAD_BUILD_CLASS: { + *effect = 1; + return 0; + } + case LOAD_CLOSURE: { + *effect = 1; + return 0; + } + case LOAD_COMMON_CONSTANT: { + *effect = 1; + return 0; + } + case LOAD_CONST: { + *effect = 1; + return 0; + } + case LOAD_CONST_IMMORTAL: { + *effect = 1; + return 0; + } + case LOAD_DEREF: { + *effect = 1; + return 0; + } + case LOAD_FAST: { + *effect = 1; + return 0; + } + case LOAD_FAST_AND_CLEAR: { + *effect = 1; + return 0; + } + case LOAD_FAST_CHECK: { + *effect = 1; + return 0; + } + case LOAD_FAST_LOAD_FAST: { + *effect = 2; + return 0; + } + case LOAD_FROM_DICT_OR_DEREF: { + *effect = 0; + return 0; + } + case LOAD_FROM_DICT_OR_GLOBALS: { + *effect = 0; + return 0; + } + case LOAD_GLOBAL: { + *effect = Py_MAX(1, 1 + (oparg & 1)); + return 0; + } + case LOAD_GLOBAL_BUILTIN: { + *effect = Py_MAX(1, 1 + (oparg & 1)); + return 0; + } + case LOAD_GLOBAL_MODULE: { + *effect = Py_MAX(1, 1 + (oparg & 1)); + return 0; + } + case LOAD_LOCALS: { + *effect = 1; + return 0; + } + case LOAD_NAME: { + *effect = 1; + return 0; + } + case LOAD_SMALL_INT: { + *effect = 1; + return 0; + } + case LOAD_SPECIAL: { + *effect = 1; + return 0; + } + case LOAD_SUPER_ATTR: { + *effect = Py_MAX(0, -2 + (oparg & 1)); + return 0; + } + case LOAD_SUPER_ATTR_ATTR: { + *effect = -2; + return 0; + } + case LOAD_SUPER_ATTR_METHOD: { + *effect = -1; + return 0; + } + case MAKE_CELL: { + *effect = 0; + return 0; + } + case MAKE_FUNCTION: { + *effect = 0; + return 0; + } + case MAP_ADD: { + *effect = -2; + return 0; + } + case MATCH_CLASS: { + *effect = -2; + return 0; + } + case MATCH_KEYS: { + *effect = 1; + return 0; + } + case MATCH_MAPPING: { + *effect = 1; + return 0; + } + case MATCH_SEQUENCE: { + *effect = 1; + return 0; + } + case NOP: { + *effect = 0; + return 0; + } + case POP_BLOCK: { + *effect = 0; + return 0; + } + case POP_EXCEPT: { + *effect = -1; + return 0; + } + case POP_JUMP_IF_FALSE: { + *effect = -1; + return 0; + } + case POP_JUMP_IF_NONE: { + *effect = 0; + return 0; + } + case POP_JUMP_IF_NOT_NONE: { + *effect = 0; + return 0; + } + case POP_JUMP_IF_TRUE: { + *effect = -1; + return 0; + } + case POP_TOP: { + *effect = -1; + return 0; + } + case PUSH_EXC_INFO: { + *effect = 1; + return 0; + } + case PUSH_NULL: { + *effect = 1; + return 0; + } + case RAISE_VARARGS: { + *effect = -oparg; + return 0; + } + case RERAISE: { + *effect = -1; + return 0; + } + case RESERVED: { + *effect = 0; + return 0; + } + case RESUME: { + *effect = 0; + return 0; + } + case RESUME_CHECK: { + *effect = 0; + return 0; + } + case RETURN_GENERATOR: { + *effect = 1; + return 0; + } + case RETURN_VALUE: { + *effect = 0; + return 0; + } + case SEND: { + *effect = 0; + return 0; + } + case SEND_GEN: { + *effect = 0; + return 0; + } + case SETUP_ANNOTATIONS: { + *effect = 0; + return 0; + } + case SETUP_CLEANUP: { + *effect = 2; + return 0; + } + case SETUP_FINALLY: { + *effect = 1; + return 0; + } + case SETUP_WITH: { + *effect = 1; + return 0; + } + case SET_ADD: { + *effect = -1; + return 0; + } + case SET_FUNCTION_ATTRIBUTE: { + *effect = -1; + return 0; + } + case SET_UPDATE: { + *effect = -1; + return 0; + } + case STORE_ATTR: { + *effect = 0; + return 0; + } + case STORE_ATTR_INSTANCE_VALUE: { + *effect = 0; + return 0; + } + case STORE_ATTR_SLOT: { + *effect = 0; + return 0; + } + case STORE_ATTR_WITH_HINT: { + *effect = 0; + return 0; + } + case STORE_DEREF: { + *effect = -1; + return 0; + } + case STORE_FAST: { + *effect = -1; + return 0; + } + case STORE_FAST_LOAD_FAST: { + *effect = 0; + return 0; + } + case STORE_FAST_MAYBE_NULL: { + *effect = -1; + return 0; + } + case STORE_FAST_STORE_FAST: { + *effect = -2; + return 0; + } + case STORE_GLOBAL: { + *effect = -1; + return 0; + } + case STORE_NAME: { + *effect = -1; + return 0; + } + case STORE_SLICE: { + *effect = 0; + return 0; + } + case STORE_SUBSCR: { + *effect = 0; + return 0; + } + case STORE_SUBSCR_DICT: { + *effect = -3; + return 0; + } + case STORE_SUBSCR_LIST_INT: { + *effect = -3; + return 0; + } + case SWAP: { + *effect = 0; + return 0; + } + case TO_BOOL: { + *effect = 0; + return 0; + } + case TO_BOOL_ALWAYS_TRUE: { + *effect = 0; + return 0; + } + case TO_BOOL_BOOL: { + *effect = 0; + return 0; + } + case TO_BOOL_INT: { + *effect = 0; + return 0; + } + case TO_BOOL_LIST: { + *effect = 0; + return 0; + } + case TO_BOOL_NONE: { + *effect = 0; + return 0; + } + case TO_BOOL_STR: { + *effect = 0; + return 0; + } + case UNARY_INVERT: { + *effect = 0; + return 0; + } + case UNARY_NEGATIVE: { + *effect = 0; + return 0; + } + case UNARY_NOT: { + *effect = 0; + return 0; + } + case UNPACK_EX: { + *effect = (oparg & 0xFF) + (oparg >> 8); + return 0; + } + case UNPACK_SEQUENCE: { + *effect = Py_MAX(1, -1 + oparg); + return 0; + } + case UNPACK_SEQUENCE_LIST: { + *effect = -1 + oparg; + return 0; + } + case UNPACK_SEQUENCE_TUPLE: { + *effect = -1 + oparg; + return 0; + } + case UNPACK_SEQUENCE_TWO_TUPLE: { + *effect = 1; + return 0; + } + case WITH_EXCEPT_START: { + *effect = 1; + return 0; + } + case YIELD_VALUE: { + *effect = 0; + return 0; + } + default: + return -1; + } +} + +#endif + enum InstructionFormat { INSTR_FMT_IB = 1, INSTR_FMT_IBC = 2, diff --git a/Lib/test/test_generated_cases.py b/Lib/test/test_generated_cases.py index ff9a52b7ada..66862ec17cc 100644 --- a/Lib/test/test_generated_cases.py +++ b/Lib/test/test_generated_cases.py @@ -1,9 +1,11 @@ import contextlib import os +import re import sys import tempfile import unittest +from io import StringIO from test import support from test import test_tools @@ -29,10 +31,12 @@ def skip_if_different_mount_drives(): test_tools.skip_if_missing("cases_generator") with test_tools.imports_under_tool("cases_generator"): - from analyzer import StackItem + from analyzer import analyze_forest, StackItem + from cwriter import CWriter import parser from stack import Local, Stack import tier1_generator + import opcode_metadata_generator import optimizer_generator @@ -43,6 +47,14 @@ def handle_stderr(): return support.captured_stderr() +def parse_src(src): + p = parser.Parser(src, "test.c") + nodes = [] + while node := p.definition(): + nodes.append(node) + return nodes + + class TestEffects(unittest.TestCase): def test_effect_sizes(self): stack = Stack() @@ -65,6 +77,171 @@ def test_effect_sizes(self): self.assertEqual(stack.top_offset.to_c(), "1 - oparg - oparg*2 + oparg*4") +class TestGenerateMaxStackEffect(unittest.TestCase): + def check(self, input, output): + analysis = analyze_forest(parse_src(input)) + buf = StringIO() + writer = CWriter(buf, 0, False) + opcode_metadata_generator.generate_max_stack_effect_function(analysis, writer) + buf.seek(0) + generated = buf.read() + matches = re.search(r"(case OP: {[^}]+})", generated) + if matches is None: + self.fail(f"Couldn't find case statement for OP in:\n {generated}") + self.assertEqual(output.strip(), matches.group(1)) + + def test_push_one(self): + input = """ + inst(OP, (a -- b, c)) { + SPAM(); + } + """ + output = """ + case OP: { + *effect = 1; + return 0; + } + """ + self.check(input, output) + + def test_cond_push(self): + input = """ + inst(OP, (a -- b, c if (oparg))) { + SPAM(); + } + """ + output = """ + case OP: { + *effect = ((oparg) ? 1 : 0); + return 0; + } + """ + self.check(input, output) + + def test_ops_pass_two(self): + input = """ + op(A, (-- val1)) { + val1 = SPAM(); + } + op(B, (-- val2)) { + val2 = SPAM(); + } + op(C, (val1, val2 --)) { + } + macro(OP) = A + B + C; + """ + output = """ + case OP: { + *effect = 2; + return 0; + } + """ + self.check(input, output) + + def test_ops_pass_two_cond_push(self): + input = """ + op(A, (-- val1, val2)) { + val1 = 0; + val2 = 1; + } + op(B, (val1, val2 -- val1, val2, val3 if (oparg))) { + val3 = SPAM(); + } + macro(OP) = A + B; + """ + output = """ + case OP: { + *effect = Py_MAX(2, 2 + ((oparg) ? 1 : 0)); + return 0; + } + """ + self.check(input, output) + + def test_pop_push_array(self): + input = """ + inst(OP, (values[oparg] -- values[oparg], above)) { + SPAM(values, oparg); + above = 0; + } + """ + output = """ + case OP: { + *effect = 1; + return 0; + } + """ + self.check(input, output) + + def test_family(self): + input = """ + op(A, (-- val1, val2)) { + val1 = 0; + val2 = 1; + } + op(B, (val1, val2 -- val3)) { + val3 = 2; + } + macro(OP1) = A + B; + + inst(OP, (-- val)) { + val = 0; + } + + family(OP, 0) = { OP1 }; + """ + output = """ + case OP: { + *effect = 2; + return 0; + } + """ + self.check(input, output) + + def test_family_intermediate_array(self): + input = """ + op(A, (-- values[oparg])) { + val1 = 0; + val2 = 1; + } + op(B, (values[oparg] -- val3)) { + val3 = 2; + } + macro(OP1) = A + B; + + inst(OP, (-- val)) { + val = 0; + } + + family(OP, 0) = { OP1 }; + """ + output = """ + case OP: { + *effect = Py_MAX(1, oparg); + return 0; + } + """ + self.check(input, output) + + def test_negative_effect(self): + input = """ + op(A, (val1 -- )) { + } + op(B, (val2 --)) { + } + op(C, (val3 --)) { + } + + macro(OP) = A + B + C; + """ + output = """ + case OP: { + *effect = -1; + return 0; + } + """ + self.check(input, output) + + class TestGeneratedCases(unittest.TestCase): def setUp(self) -> None: super().setUp() diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 54181319500..b1097b64469 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -733,7 +733,7 @@ make_cfg_traversal_stack(basicblock *entryblock) { return stack; } -/* Return the stack effect of opcode with argument oparg. +/* Compute the stack effects of opcode with argument oparg. Some opcodes have different stack effect when jump to the target and when not jump. The 'jump' parameter specifies the case: @@ -742,25 +742,42 @@ make_cfg_traversal_stack(basicblock *entryblock) { * 1 -- when jump * -1 -- maximal */ +typedef struct { + /* The stack effect of the instruction. */ + int net; + + /* The maximum stack usage of the instruction. Some instructions may + * temporarily push extra values to the stack while they are executing. + */ + int max; +} stack_effects; + Py_LOCAL(int) -stack_effect(int opcode, int oparg, int jump) +get_stack_effects(int opcode, int oparg, int jump, stack_effects *effects) { if (opcode < 0) { - return PY_INVALID_STACK_EFFECT; + return -1; } if ((opcode <= MAX_REAL_OPCODE) && (_PyOpcode_Deopt[opcode] != opcode)) { // Specialized instructions are not supported. - return PY_INVALID_STACK_EFFECT; + return -1; } int popped = _PyOpcode_num_popped(opcode, oparg); int pushed = _PyOpcode_num_pushed(opcode, oparg); if (popped < 0 || pushed < 0) { - return PY_INVALID_STACK_EFFECT; + return -1; } if (IS_BLOCK_PUSH_OPCODE(opcode) && !jump) { + effects->net = 0; + effects->max = 0; return 0; } - return pushed - popped; + if (_PyOpcode_max_stack_effect(opcode, oparg, &effects->max) < 0) { + return -1; + } + effects->net = pushed - popped; + assert(effects->max >= effects->net); + return 0; } Py_LOCAL_INLINE(int) @@ -807,35 +824,30 @@ calculate_stackdepth(cfg_builder *g) basicblock *next = b->b_next; for (int i = 0; i < b->b_iused; i++) { cfg_instr *instr = &b->b_instr[i]; - int effect = stack_effect(instr->i_opcode, instr->i_oparg, 0); - if (effect == PY_INVALID_STACK_EFFECT) { + stack_effects effects; + if (get_stack_effects(instr->i_opcode, instr->i_oparg, 0, &effects) < 0) { PyErr_Format(PyExc_SystemError, "Invalid stack effect for opcode=%d, arg=%i", instr->i_opcode, instr->i_oparg); goto error; } - int new_depth = depth + effect; + int new_depth = depth + effects.net; if (new_depth < 0) { - PyErr_Format(PyExc_ValueError, - "Invalid CFG, stack underflow"); - goto error; - } - if (new_depth > maxdepth) { - maxdepth = new_depth; + PyErr_Format(PyExc_ValueError, + "Invalid CFG, stack underflow"); + goto error; } + maxdepth = Py_MAX(maxdepth, depth + effects.max); if (HAS_TARGET(instr->i_opcode)) { - effect = stack_effect(instr->i_opcode, instr->i_oparg, 1); - if (effect == PY_INVALID_STACK_EFFECT) { + if (get_stack_effects(instr->i_opcode, instr->i_oparg, 1, &effects) < 0) { PyErr_Format(PyExc_SystemError, "Invalid stack effect for opcode=%d, arg=%i", instr->i_opcode, instr->i_oparg); goto error; } - int target_depth = depth + effect; + int target_depth = depth + effects.net; assert(target_depth >= 0); /* invalid code or bug in stackdepth() */ - if (target_depth > maxdepth) { - maxdepth = target_depth; - } + maxdepth = Py_MAX(maxdepth, depth + effects.max); if (stackdepth_push(&sp, instr->i_target, target_depth) < 0) { goto error; } @@ -2936,13 +2948,21 @@ _PyCfg_JumpLabelsToTargets(cfg_builder *g) int PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump) { - return stack_effect(opcode, oparg, jump); + stack_effects effs; + if (get_stack_effects(opcode, oparg, jump, &effs) < 0) { + return PY_INVALID_STACK_EFFECT; + } + return effs.net; } int PyCompile_OpcodeStackEffect(int opcode, int oparg) { - return stack_effect(opcode, oparg, -1); + stack_effects effs; + if (get_stack_effects(opcode, oparg, -1, &effs) < 0) { + return PY_INVALID_STACK_EFFECT; + } + return effs.net; } /* Access to compiler optimizations for unit tests. diff --git a/Tools/cases_generator/opcode_metadata_generator.py b/Tools/cases_generator/opcode_metadata_generator.py index 2ad7604af9c..1a9849c0cbb 100644 --- a/Tools/cases_generator/opcode_metadata_generator.py +++ b/Tools/cases_generator/opcode_metadata_generator.py @@ -19,8 +19,9 @@ cflags, ) from cwriter import CWriter +from dataclasses import dataclass from typing import TextIO -from stack import get_stack_effect +from stack import Stack, get_stack_effect, get_stack_effects # Constants used instead of size for macro expansions. # Note: 1, 2, 4 must match actual cache entry sizes. @@ -107,6 +108,101 @@ def add(inst: Instruction | PseudoInstruction) -> None: emit_stack_effect_function(out, "popped", sorted(popped_data)) emit_stack_effect_function(out, "pushed", sorted(pushed_data)) + generate_max_stack_effect_function(analysis, out) + + +def emit_max_stack_effect_function( + out: CWriter, effects: list[tuple[str, list[str]]] +) -> None: + out.emit("extern int _PyOpcode_max_stack_effect(int opcode, int oparg, int *effect);\n") + out.emit("#ifdef NEED_OPCODE_METADATA\n") + out.emit(f"int _PyOpcode_max_stack_effect(int opcode, int oparg, int *effect) {{\n") + out.emit("switch(opcode) {\n") + for name, exprs in effects: + out.emit(f"case {name}: {{\n") + if len(exprs) == 1: + out.emit(f"*effect = {exprs[0]};\n") + elif len(exprs) == 2: + out.emit(f"*effect = Py_MAX({exprs[0]}, {exprs[1]});\n") + else: + assert len(exprs) > 2 + out.emit(f"int max_eff = Py_MAX({exprs[0]}, {exprs[1]});\n") + for expr in exprs[2:]: + out.emit(f"max_eff = Py_MAX(max_eff, {expr});\n") + out.emit(f"*effect = max_eff;\n") + out.emit(f"return 0;\n") + out.emit("}\n") + out.emit("default:\n") + out.emit(" return -1;\n") + out.emit("}\n") + out.emit("}\n\n") + out.emit("#endif\n\n") + + +@dataclass +class MaxStackEffectSet: + int_effect: int | None + cond_effects: set[str] + + def __init__(self) -> None: + self.int_effect = None + self.cond_effects = set() + + def add(self, stack: Stack) -> None: + top_off = stack.top_offset + top_off_int = top_off.as_int() + if top_off_int is not None: + if self.int_effect is None or top_off_int > self.int_effect: + self.int_effect = top_off_int + else: + self.cond_effects.add(top_off.to_c()) + + def update(self, other: "MaxStackEffectSet") -> None: + if self.int_effect is None: + if other.int_effect is not None: + self.int_effect = other.int_effect + elif other.int_effect is not None: + self.int_effect = max(self.int_effect, other.int_effect) + self.cond_effects.update(other.cond_effects) + + +def generate_max_stack_effect_function(analysis: Analysis, out: CWriter) -> None: + """Generate a function that returns the maximum stack effect of an + instruction while it is executing. + + Specialized instructions that are composed of uops may have a greater stack + effect during instruction execution than the net stack effect of the + instruction if the uops pass values on the stack. + """ + effects: dict[str, MaxStackEffectSet] = {} + + def add(inst: Instruction | PseudoInstruction) -> None: + inst_effect = MaxStackEffectSet() + for stack in get_stack_effects(inst): + inst_effect.add(stack) + effects[inst.name] = inst_effect + + # Collect unique stack effects for each instruction + for inst in analysis.instructions.values(): + add(inst) + for pseudo in analysis.pseudos.values(): + add(pseudo) + + # Merge the effects of all specializations in a family into the generic + # instruction + for family in analysis.families.values(): + for inst in family.members: + effects[family.name].update(effects[inst.name]) + + data: list[tuple[str, list[str]]] = [] + for name, effs in sorted(effects.items(), key=lambda kv: kv[0]): + exprs = [] + if effs.int_effect is not None: + exprs.append(str(effs.int_effect)) + exprs.extend(sorted(effs.cond_effects)) + data.append((name, exprs)) + emit_max_stack_effect_function(out, data) + def generate_is_pseudo(analysis: Analysis, out: CWriter) -> None: """Write the IS_PSEUDO_INSTR macro""" diff --git a/Tools/cases_generator/stack.py b/Tools/cases_generator/stack.py index a954bed4df0..286f47d0cfb 100644 --- a/Tools/cases_generator/stack.py +++ b/Tools/cases_generator/stack.py @@ -1,8 +1,9 @@ import re from analyzer import StackItem, StackEffect, Instruction, Uop, PseudoInstruction +from collections import defaultdict from dataclasses import dataclass from cwriter import CWriter -from typing import Iterator +from typing import Iterator, Tuple UNUSED = {"unused"} @@ -385,32 +386,47 @@ def merge(self, other: "Stack", out: CWriter) -> None: self.align(other, out) +def stacks(inst: Instruction | PseudoInstruction) -> Iterator[StackEffect]: + if isinstance(inst, Instruction): + for uop in inst.parts: + if isinstance(uop, Uop): + yield uop.stack + else: + assert isinstance(inst, PseudoInstruction) + yield inst.stack + + +def apply_stack_effect(stack: Stack, effect: StackEffect) -> None: + locals: dict[str, Local] = {} + for var in reversed(effect.inputs): + _, local = stack.pop(var) + if var.name != "unused": + locals[local.name] = local + for var in effect.outputs: + if var.name in locals: + local = locals[var.name] + else: + local = Local.unused(var) + stack.push(local) + + def get_stack_effect(inst: Instruction | PseudoInstruction) -> Stack: stack = Stack() - - def stacks(inst: Instruction | PseudoInstruction) -> Iterator[StackEffect]: - if isinstance(inst, Instruction): - for uop in inst.parts: - if isinstance(uop, Uop): - yield uop.stack - else: - assert isinstance(inst, PseudoInstruction) - yield inst.stack - for s in stacks(inst): - locals: dict[str, Local] = {} - for var in reversed(s.inputs): - _, local = stack.pop(var) - if var.name != "unused": - locals[local.name] = local - for var in s.outputs: - if var.name in locals: - local = locals[var.name] - else: - local = Local.unused(var) - stack.push(local) + apply_stack_effect(stack, s) return stack + +def get_stack_effects(inst: Instruction | PseudoInstruction) -> list[Stack]: + """Returns a list of stack effects after each uop""" + result = [] + stack = Stack() + for s in stacks(inst): + apply_stack_effect(stack, s) + result.append(stack.copy()) + return result + + @dataclass class Storage: