From 3e06c7f719b99cc7f5e8889319cff4980e41d3e8 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 26 Apr 2024 18:08:50 +0100 Subject: [PATCH] GH-118095: Add dynamic exit support and FOR_ITER_GEN support to tier 2 (GH-118279) --- Include/internal/pycore_opcode_metadata.h | 3 +- Include/internal/pycore_uop_ids.h | 207 +++++++++++----------- Include/internal/pycore_uop_metadata.h | 8 + Lib/test/test_capi/test_opt.py | 17 +- Python/bytecodes.c | 60 +++++-- Python/ceval.c | 4 + Python/ceval_macros.h | 1 + Python/executor_cases.c.h | 61 ++++++- Python/generated_cases.c.h | 57 ++++-- Python/optimizer.c | 19 +- Python/optimizer_cases.c.h | 13 +- Tools/jit/template.c | 4 + 12 files changed, 315 insertions(+), 139 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 400d7c334db..4b1f43cf2af 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -625,7 +625,7 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { case FOR_ITER: return 2; case FOR_ITER_GEN: - return 2; + return 1; case FOR_ITER_LIST: return 2; case FOR_ITER_RANGE: @@ -1253,6 +1253,7 @@ _PyOpcode_macro_expansion[256] = { [FORMAT_SIMPLE] = { .nuops = 1, .uops = { { _FORMAT_SIMPLE, 0, 0 } } }, [FORMAT_WITH_SPEC] = { .nuops = 1, .uops = { { _FORMAT_WITH_SPEC, 0, 0 } } }, [FOR_ITER] = { .nuops = 1, .uops = { { _FOR_ITER, 9, 0 } } }, + [FOR_ITER_GEN] = { .nuops = 3, .uops = { { _CHECK_PEP_523, 0, 0 }, { _FOR_ITER_GEN_FRAME, 0, 0 }, { _PUSH_FRAME, 0, 0 } } }, [FOR_ITER_LIST] = { .nuops = 3, .uops = { { _ITER_CHECK_LIST, 0, 0 }, { _ITER_JUMP_LIST, 9, 1 }, { _ITER_NEXT_LIST, 0, 0 } } }, [FOR_ITER_RANGE] = { .nuops = 3, .uops = { { _ITER_CHECK_RANGE, 0, 0 }, { _ITER_JUMP_RANGE, 9, 1 }, { _ITER_NEXT_RANGE, 0, 0 } } }, [FOR_ITER_TUPLE] = { .nuops = 3, .uops = { { _ITER_CHECK_TUPLE, 0, 0 }, { _ITER_JUMP_TUPLE, 9, 1 }, { _ITER_NEXT_TUPLE, 0, 0 } } }, diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index bb49d6e77d2..beb182c436d 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -91,48 +91,49 @@ extern "C" { #define _DEOPT 342 #define _DICT_MERGE DICT_MERGE #define _DICT_UPDATE DICT_UPDATE +#define _DYNAMIC_EXIT 343 #define _END_SEND END_SEND -#define _ERROR_POP_N 343 +#define _ERROR_POP_N 344 #define _EXIT_INIT_CHECK EXIT_INIT_CHECK -#define _FATAL_ERROR 344 +#define _FATAL_ERROR 345 #define _FORMAT_SIMPLE FORMAT_SIMPLE #define _FORMAT_WITH_SPEC FORMAT_WITH_SPEC -#define _FOR_ITER 345 -#define _FOR_ITER_GEN FOR_ITER_GEN -#define _FOR_ITER_TIER_TWO 346 +#define _FOR_ITER 346 +#define _FOR_ITER_GEN_FRAME 347 +#define _FOR_ITER_TIER_TWO 348 #define _GET_AITER GET_AITER #define _GET_ANEXT GET_ANEXT #define _GET_AWAITABLE GET_AWAITABLE #define _GET_ITER GET_ITER #define _GET_LEN GET_LEN #define _GET_YIELD_FROM_ITER GET_YIELD_FROM_ITER -#define _GUARD_BOTH_FLOAT 347 -#define _GUARD_BOTH_INT 348 -#define _GUARD_BOTH_UNICODE 349 -#define _GUARD_BUILTINS_VERSION 350 -#define _GUARD_DORV_NO_DICT 351 -#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 352 -#define _GUARD_GLOBALS_VERSION 353 -#define _GUARD_IS_FALSE_POP 354 -#define _GUARD_IS_NONE_POP 355 -#define _GUARD_IS_NOT_NONE_POP 356 -#define _GUARD_IS_TRUE_POP 357 -#define _GUARD_KEYS_VERSION 358 -#define _GUARD_NOS_FLOAT 359 -#define _GUARD_NOS_INT 360 -#define _GUARD_NOT_EXHAUSTED_LIST 361 -#define _GUARD_NOT_EXHAUSTED_RANGE 362 -#define _GUARD_NOT_EXHAUSTED_TUPLE 363 -#define _GUARD_TOS_FLOAT 364 -#define _GUARD_TOS_INT 365 -#define _GUARD_TYPE_VERSION 366 -#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 367 -#define _INIT_CALL_PY_EXACT_ARGS 368 -#define _INIT_CALL_PY_EXACT_ARGS_0 369 -#define _INIT_CALL_PY_EXACT_ARGS_1 370 -#define _INIT_CALL_PY_EXACT_ARGS_2 371 -#define _INIT_CALL_PY_EXACT_ARGS_3 372 -#define _INIT_CALL_PY_EXACT_ARGS_4 373 +#define _GUARD_BOTH_FLOAT 349 +#define _GUARD_BOTH_INT 350 +#define _GUARD_BOTH_UNICODE 351 +#define _GUARD_BUILTINS_VERSION 352 +#define _GUARD_DORV_NO_DICT 353 +#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 354 +#define _GUARD_GLOBALS_VERSION 355 +#define _GUARD_IS_FALSE_POP 356 +#define _GUARD_IS_NONE_POP 357 +#define _GUARD_IS_NOT_NONE_POP 358 +#define _GUARD_IS_TRUE_POP 359 +#define _GUARD_KEYS_VERSION 360 +#define _GUARD_NOS_FLOAT 361 +#define _GUARD_NOS_INT 362 +#define _GUARD_NOT_EXHAUSTED_LIST 363 +#define _GUARD_NOT_EXHAUSTED_RANGE 364 +#define _GUARD_NOT_EXHAUSTED_TUPLE 365 +#define _GUARD_TOS_FLOAT 366 +#define _GUARD_TOS_INT 367 +#define _GUARD_TYPE_VERSION 368 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 369 +#define _INIT_CALL_PY_EXACT_ARGS 370 +#define _INIT_CALL_PY_EXACT_ARGS_0 371 +#define _INIT_CALL_PY_EXACT_ARGS_1 372 +#define _INIT_CALL_PY_EXACT_ARGS_2 373 +#define _INIT_CALL_PY_EXACT_ARGS_3 374 +#define _INIT_CALL_PY_EXACT_ARGS_4 375 #define _INSTRUMENTED_CALL INSTRUMENTED_CALL #define _INSTRUMENTED_CALL_FUNCTION_EX INSTRUMENTED_CALL_FUNCTION_EX #define _INSTRUMENTED_CALL_KW INSTRUMENTED_CALL_KW @@ -149,65 +150,65 @@ extern "C" { #define _INSTRUMENTED_RETURN_CONST INSTRUMENTED_RETURN_CONST #define _INSTRUMENTED_RETURN_VALUE INSTRUMENTED_RETURN_VALUE #define _INSTRUMENTED_YIELD_VALUE INSTRUMENTED_YIELD_VALUE -#define _INTERNAL_INCREMENT_OPT_COUNTER 374 -#define _IS_NONE 375 +#define _INTERNAL_INCREMENT_OPT_COUNTER 376 +#define _IS_NONE 377 #define _IS_OP IS_OP -#define _ITER_CHECK_LIST 376 -#define _ITER_CHECK_RANGE 377 -#define _ITER_CHECK_TUPLE 378 -#define _ITER_JUMP_LIST 379 -#define _ITER_JUMP_RANGE 380 -#define _ITER_JUMP_TUPLE 381 -#define _ITER_NEXT_LIST 382 -#define _ITER_NEXT_RANGE 383 -#define _ITER_NEXT_TUPLE 384 -#define _JUMP_TO_TOP 385 +#define _ITER_CHECK_LIST 378 +#define _ITER_CHECK_RANGE 379 +#define _ITER_CHECK_TUPLE 380 +#define _ITER_JUMP_LIST 381 +#define _ITER_JUMP_RANGE 382 +#define _ITER_JUMP_TUPLE 383 +#define _ITER_NEXT_LIST 384 +#define _ITER_NEXT_RANGE 385 +#define _ITER_NEXT_TUPLE 386 +#define _JUMP_TO_TOP 387 #define _LIST_APPEND LIST_APPEND #define _LIST_EXTEND LIST_EXTEND #define _LOAD_ASSERTION_ERROR LOAD_ASSERTION_ERROR -#define _LOAD_ATTR 386 -#define _LOAD_ATTR_CLASS 387 -#define _LOAD_ATTR_CLASS_0 388 -#define _LOAD_ATTR_CLASS_1 389 +#define _LOAD_ATTR 388 +#define _LOAD_ATTR_CLASS 389 +#define _LOAD_ATTR_CLASS_0 390 +#define _LOAD_ATTR_CLASS_1 391 #define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN -#define _LOAD_ATTR_INSTANCE_VALUE 390 -#define _LOAD_ATTR_INSTANCE_VALUE_0 391 -#define _LOAD_ATTR_INSTANCE_VALUE_1 392 -#define _LOAD_ATTR_METHOD_LAZY_DICT 393 -#define _LOAD_ATTR_METHOD_NO_DICT 394 -#define _LOAD_ATTR_METHOD_WITH_VALUES 395 -#define _LOAD_ATTR_MODULE 396 -#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 397 -#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 398 +#define _LOAD_ATTR_INSTANCE_VALUE 392 +#define _LOAD_ATTR_INSTANCE_VALUE_0 393 +#define _LOAD_ATTR_INSTANCE_VALUE_1 394 +#define _LOAD_ATTR_METHOD_LAZY_DICT 395 +#define _LOAD_ATTR_METHOD_NO_DICT 396 +#define _LOAD_ATTR_METHOD_WITH_VALUES 397 +#define _LOAD_ATTR_MODULE 398 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 399 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 400 #define _LOAD_ATTR_PROPERTY LOAD_ATTR_PROPERTY -#define _LOAD_ATTR_SLOT 399 -#define _LOAD_ATTR_SLOT_0 400 -#define _LOAD_ATTR_SLOT_1 401 -#define _LOAD_ATTR_WITH_HINT 402 +#define _LOAD_ATTR_SLOT 401 +#define _LOAD_ATTR_SLOT_0 402 +#define _LOAD_ATTR_SLOT_1 403 +#define _LOAD_ATTR_WITH_HINT 404 #define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS #define _LOAD_CONST LOAD_CONST -#define _LOAD_CONST_INLINE 403 -#define _LOAD_CONST_INLINE_BORROW 404 -#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 405 -#define _LOAD_CONST_INLINE_WITH_NULL 406 +#define _LOAD_CONST_INLINE 405 +#define _LOAD_CONST_INLINE_BORROW 406 +#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 407 +#define _LOAD_CONST_INLINE_WITH_NULL 408 #define _LOAD_DEREF LOAD_DEREF -#define _LOAD_FAST 407 -#define _LOAD_FAST_0 408 -#define _LOAD_FAST_1 409 -#define _LOAD_FAST_2 410 -#define _LOAD_FAST_3 411 -#define _LOAD_FAST_4 412 -#define _LOAD_FAST_5 413 -#define _LOAD_FAST_6 414 -#define _LOAD_FAST_7 415 +#define _LOAD_FAST 409 +#define _LOAD_FAST_0 410 +#define _LOAD_FAST_1 411 +#define _LOAD_FAST_2 412 +#define _LOAD_FAST_3 413 +#define _LOAD_FAST_4 414 +#define _LOAD_FAST_5 415 +#define _LOAD_FAST_6 416 +#define _LOAD_FAST_7 417 #define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR #define _LOAD_FAST_CHECK LOAD_FAST_CHECK #define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST #define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF #define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS -#define _LOAD_GLOBAL 416 -#define _LOAD_GLOBAL_BUILTINS 417 -#define _LOAD_GLOBAL_MODULE 418 +#define _LOAD_GLOBAL 418 +#define _LOAD_GLOBAL_BUILTINS 419 +#define _LOAD_GLOBAL_MODULE 420 #define _LOAD_LOCALS LOAD_LOCALS #define _LOAD_NAME LOAD_NAME #define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR @@ -221,50 +222,50 @@ extern "C" { #define _MATCH_SEQUENCE MATCH_SEQUENCE #define _NOP NOP #define _POP_EXCEPT POP_EXCEPT -#define _POP_FRAME 419 -#define _POP_JUMP_IF_FALSE 420 -#define _POP_JUMP_IF_TRUE 421 +#define _POP_FRAME 421 +#define _POP_JUMP_IF_FALSE 422 +#define _POP_JUMP_IF_TRUE 423 #define _POP_TOP POP_TOP -#define _POP_TOP_LOAD_CONST_INLINE_BORROW 422 +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 424 #define _PUSH_EXC_INFO PUSH_EXC_INFO -#define _PUSH_FRAME 423 +#define _PUSH_FRAME 425 #define _PUSH_NULL PUSH_NULL -#define _REPLACE_WITH_TRUE 424 +#define _REPLACE_WITH_TRUE 426 #define _RESUME_CHECK RESUME_CHECK #define _RETURN_GENERATOR RETURN_GENERATOR -#define _SAVE_RETURN_OFFSET 425 -#define _SEND 426 +#define _SAVE_RETURN_OFFSET 427 +#define _SEND 428 #define _SEND_GEN SEND_GEN #define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS #define _SET_ADD SET_ADD #define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE #define _SET_UPDATE SET_UPDATE -#define _SIDE_EXIT 427 -#define _START_EXECUTOR 428 -#define _STORE_ATTR 429 -#define _STORE_ATTR_INSTANCE_VALUE 430 -#define _STORE_ATTR_SLOT 431 +#define _SIDE_EXIT 429 +#define _START_EXECUTOR 430 +#define _STORE_ATTR 431 +#define _STORE_ATTR_INSTANCE_VALUE 432 +#define _STORE_ATTR_SLOT 433 #define _STORE_ATTR_WITH_HINT STORE_ATTR_WITH_HINT #define _STORE_DEREF STORE_DEREF -#define _STORE_FAST 432 -#define _STORE_FAST_0 433 -#define _STORE_FAST_1 434 -#define _STORE_FAST_2 435 -#define _STORE_FAST_3 436 -#define _STORE_FAST_4 437 -#define _STORE_FAST_5 438 -#define _STORE_FAST_6 439 -#define _STORE_FAST_7 440 +#define _STORE_FAST 434 +#define _STORE_FAST_0 435 +#define _STORE_FAST_1 436 +#define _STORE_FAST_2 437 +#define _STORE_FAST_3 438 +#define _STORE_FAST_4 439 +#define _STORE_FAST_5 440 +#define _STORE_FAST_6 441 +#define _STORE_FAST_7 442 #define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST #define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST #define _STORE_GLOBAL STORE_GLOBAL #define _STORE_NAME STORE_NAME #define _STORE_SLICE STORE_SLICE -#define _STORE_SUBSCR 441 +#define _STORE_SUBSCR 443 #define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT #define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT #define _SWAP SWAP -#define _TO_BOOL 442 +#define _TO_BOOL 444 #define _TO_BOOL_BOOL TO_BOOL_BOOL #define _TO_BOOL_INT TO_BOOL_INT #define _TO_BOOL_LIST TO_BOOL_LIST @@ -274,12 +275,12 @@ extern "C" { #define _UNARY_NEGATIVE UNARY_NEGATIVE #define _UNARY_NOT UNARY_NOT #define _UNPACK_EX UNPACK_EX -#define _UNPACK_SEQUENCE 443 +#define _UNPACK_SEQUENCE 445 #define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST #define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE #define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE #define _WITH_EXCEPT_START WITH_EXCEPT_START -#define MAX_UOP_ID 443 +#define MAX_UOP_ID 445 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index b8cdfae8391..776728d04bc 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -180,6 +180,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_ITER_CHECK_RANGE] = HAS_EXIT_FLAG, [_GUARD_NOT_EXHAUSTED_RANGE] = HAS_EXIT_FLAG, [_ITER_NEXT_RANGE] = HAS_ERROR_FLAG, + [_FOR_ITER_GEN_FRAME] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_WITH_EXCEPT_START] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_PUSH_EXC_INFO] = 0, [_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT] = HAS_DEOPT_FLAG, @@ -245,6 +246,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_CHECK_FUNCTION] = HAS_DEOPT_FLAG, [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, [_COLD_EXIT] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, + [_DYNAMIC_EXIT] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, [_START_EXECUTOR] = HAS_DEOPT_FLAG, [_FATAL_ERROR] = HAS_ESCAPES_FLAG, [_CHECK_VALIDITY_AND_SET_IP] = HAS_DEOPT_FLAG, @@ -331,6 +333,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_DEOPT] = "_DEOPT", [_DICT_MERGE] = "_DICT_MERGE", [_DICT_UPDATE] = "_DICT_UPDATE", + [_DYNAMIC_EXIT] = "_DYNAMIC_EXIT", [_END_SEND] = "_END_SEND", [_ERROR_POP_N] = "_ERROR_POP_N", [_EXIT_INIT_CHECK] = "_EXIT_INIT_CHECK", @@ -338,6 +341,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_FATAL_ERROR] = "_FATAL_ERROR", [_FORMAT_SIMPLE] = "_FORMAT_SIMPLE", [_FORMAT_WITH_SPEC] = "_FORMAT_WITH_SPEC", + [_FOR_ITER_GEN_FRAME] = "_FOR_ITER_GEN_FRAME", [_FOR_ITER_TIER_TWO] = "_FOR_ITER_TIER_TWO", [_GET_AITER] = "_GET_AITER", [_GET_ANEXT] = "_GET_ANEXT", @@ -818,6 +822,8 @@ int _PyUop_num_popped(int opcode, int oparg) return 1; case _ITER_NEXT_RANGE: return 1; + case _FOR_ITER_GEN_FRAME: + return 1; case _WITH_EXCEPT_START: return 4; case _PUSH_EXC_INFO: @@ -948,6 +954,8 @@ int _PyUop_num_popped(int opcode, int oparg) return 1; case _COLD_EXIT: return 0; + case _DYNAMIC_EXIT: + return 0; case _START_EXECUTOR: return 0; case _FATAL_ERROR: diff --git a/Lib/test/test_capi/test_opt.py b/Lib/test/test_capi/test_opt.py index e2e772a52d7..c798b343626 100644 --- a/Lib/test/test_capi/test_opt.py +++ b/Lib/test/test_capi/test_opt.py @@ -132,7 +132,7 @@ def iter_opnames(ex): def get_opnames(ex): - return set(iter_opnames(ex)) + return list(iter_opnames(ex)) @requires_specialization @@ -1298,5 +1298,20 @@ def testfunc(n): self.assertIsNotNone(ex) self.assertIn("_RETURN_GENERATOR", get_opnames(ex)) + def test_for_iter_gen(self): + def gen(n): + for i in range(n): + yield i + def testfunc(n): + g = gen(n) + s = 0 + for i in g: + s += i + return s + res, ex = self._run_with_optimizer(testfunc, 20) + self.assertEqual(res, 190) + self.assertIsNotNone(ex) + self.assertIn("_FOR_ITER_GEN_FRAME", get_opnames(ex)) + if __name__ == "__main__": unittest.main() diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 48550491491..fe3d61362e6 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1109,6 +1109,10 @@ dummy_func( _PyFrame_StackPush(frame, retval); /* We don't know which of these is relevant here, so keep them equal */ assert(INLINE_CACHE_ENTRIES_SEND == INLINE_CACHE_ENTRIES_FOR_ITER); + assert(_PyOpcode_Deopt[frame->instr_ptr->op.code] == SEND || + _PyOpcode_Deopt[frame->instr_ptr->op.code] == FOR_ITER || + _PyOpcode_Deopt[frame->instr_ptr->op.code] == INTERPRETER_EXIT || + _PyOpcode_Deopt[frame->instr_ptr->op.code] == ENTER_EXECUTOR); LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND); goto resume_frame; } @@ -2759,24 +2763,26 @@ dummy_func( _ITER_JUMP_RANGE + _ITER_NEXT_RANGE; - inst(FOR_ITER_GEN, (unused/1, iter -- iter, unused)) { - DEOPT_IF(tstate->interp->eval_frame); + op(_FOR_ITER_GEN_FRAME, (iter -- iter, gen_frame: _PyInterpreterFrame*)) { PyGenObject *gen = (PyGenObject *)iter; DEOPT_IF(Py_TYPE(gen) != &PyGen_Type); DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING); STAT_INC(FOR_ITER, hit); - _PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe; + gen_frame = (_PyInterpreterFrame *)gen->gi_iframe; _PyFrame_StackPush(gen_frame, Py_None); gen->gi_frame_state = FRAME_EXECUTING; gen->gi_exc_state.previous_item = tstate->exc_info; tstate->exc_info = &gen->gi_exc_state; - assert(next_instr[oparg].op.code == END_FOR || - next_instr[oparg].op.code == INSTRUMENTED_END_FOR); - assert(next_instr - this_instr + oparg <= UINT16_MAX); - frame->return_offset = (uint16_t)(next_instr - this_instr + oparg); - DISPATCH_INLINED(gen_frame); + // oparg is the return offset from the next instruction. + frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg); } + macro(FOR_ITER_GEN) = + unused/1 + + _CHECK_PEP_523 + + _FOR_ITER_GEN_FRAME + + _PUSH_FRAME; + inst(BEFORE_ASYNC_WITH, (mgr -- exit, res)) { PyObject *enter = _PyObject_LookupSpecial(mgr, &_Py_ID(__aenter__)); if (enter == NULL) { @@ -3166,10 +3172,7 @@ dummy_func( } } - // The 'unused' output effect represents the return value - // (which will be pushed when the frame returns). - // It is needed so CALL_PY_EXACT_ARGS matches its family. - op(_PUSH_FRAME, (new_frame: _PyInterpreterFrame* -- unused if (0))) { + op(_PUSH_FRAME, (new_frame: _PyInterpreterFrame* -- )) { // Write it out explicitly because it's subtly different. // Eventually this should be the only occurrence of this code. assert(tstate->interp->eval_frame == NULL); @@ -4189,6 +4192,38 @@ dummy_func( GOTO_TIER_TWO(executor); } + tier2 op(_DYNAMIC_EXIT, (--)) { + tstate->previous_executor = (PyObject *)current_executor; + _PyExitData *exit = (_PyExitData *)¤t_executor->exits[oparg]; + _Py_CODEUNIT *target = frame->instr_ptr; + _PyExecutorObject *executor; + if (target->op.code == ENTER_EXECUTOR) { + PyCodeObject *code = (PyCodeObject *)frame->f_executable; + executor = code->co_executors->executors[target->op.arg]; + Py_INCREF(executor); + } + else { + if (!backoff_counter_triggers(exit->temperature)) { + exit->temperature = advance_backoff_counter(exit->temperature); + GOTO_TIER_ONE(target); + } + int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor); + if (optimized <= 0) { + exit->temperature = restart_backoff_counter(exit->temperature); + if (optimized < 0) { + Py_DECREF(current_executor); + tstate->previous_executor = Py_None; + GOTO_UNWIND(); + } + GOTO_TIER_ONE(target); + } + else { + exit->temperature = initial_temperature_backoff_counter(); + } + } + GOTO_TIER_TWO(executor); + } + tier2 op(_START_EXECUTOR, (executor/4 --)) { Py_DECREF(tstate->previous_executor); tstate->previous_executor = NULL; @@ -4222,6 +4257,7 @@ dummy_func( GOTO_UNWIND(); } + // END BYTECODES // } diff --git a/Python/ceval.c b/Python/ceval.c index 2f217c5f33c..d130c734a67 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1072,9 +1072,13 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int next_uop = current_executor->trace + target; goto tier2_dispatch; +exit_to_tier1_dynamic: + next_instr = frame->instr_ptr; + goto goto_to_tier1; exit_to_tier1: assert(next_uop[-1].format == UOP_FORMAT_TARGET); next_instr = next_uop[-1].target + _PyCode_CODE(_PyFrame_GetCode(frame)); +goto_to_tier1: #ifdef Py_DEBUG if (lltrace >= 2) { printf("DEOPT: [UOp "); diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index 871d1747e2b..1a8554ab722 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -442,3 +442,4 @@ do { \ #define GOTO_UNWIND() goto error_tier_two #define EXIT_TO_TRACE() goto exit_to_trace #define EXIT_TO_TIER1() goto exit_to_tier1 +#define EXIT_TO_TIER1_DYNAMIC() goto exit_to_tier1_dynamic; diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 1eb3da9b700..280cca1592a 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2769,7 +2769,32 @@ break; } - /* _FOR_ITER_GEN is not a viable micro-op for tier 2 because it uses the 'this_instr' variable */ + case _FOR_ITER_GEN_FRAME: { + PyObject *iter; + _PyInterpreterFrame *gen_frame; + oparg = CURRENT_OPARG(); + iter = stack_pointer[-1]; + PyGenObject *gen = (PyGenObject *)iter; + if (Py_TYPE(gen) != &PyGen_Type) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + if (gen->gi_frame_state >= FRAME_EXECUTING) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + STAT_INC(FOR_ITER, hit); + gen_frame = (_PyInterpreterFrame *)gen->gi_iframe; + _PyFrame_StackPush(gen_frame, Py_None); + gen->gi_frame_state = FRAME_EXECUTING; + gen->gi_exc_state.previous_item = tstate->exc_info; + tstate->exc_info = &gen->gi_exc_state; + // oparg is the return offset from the next instruction. + frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg); + stack_pointer[0] = (PyObject *)gen_frame; + stack_pointer += 1; + break; + } /* _BEFORE_ASYNC_WITH is not a viable micro-op for tier 2 because it has both popping and not-popping errors */ @@ -4187,6 +4212,40 @@ break; } + case _DYNAMIC_EXIT: { + oparg = CURRENT_OPARG(); + tstate->previous_executor = (PyObject *)current_executor; + _PyExitData *exit = (_PyExitData *)¤t_executor->exits[oparg]; + _Py_CODEUNIT *target = frame->instr_ptr; + _PyExecutorObject *executor; + if (target->op.code == ENTER_EXECUTOR) { + PyCodeObject *code = (PyCodeObject *)frame->f_executable; + executor = code->co_executors->executors[target->op.arg]; + Py_INCREF(executor); + } + else { + if (!backoff_counter_triggers(exit->temperature)) { + exit->temperature = advance_backoff_counter(exit->temperature); + GOTO_TIER_ONE(target); + } + int optimized = _PyOptimizer_Optimize(frame, target, stack_pointer, &executor); + if (optimized <= 0) { + exit->temperature = restart_backoff_counter(exit->temperature); + if (optimized < 0) { + Py_DECREF(current_executor); + tstate->previous_executor = Py_None; + GOTO_UNWIND(); + } + GOTO_TIER_ONE(target); + } + else { + exit->temperature = initial_temperature_backoff_counter(); + } + } + GOTO_TIER_TWO(executor); + break; + } + case _START_EXECUTOR: { PyObject *executor = (PyObject *)CURRENT_OPERAND(); Py_DECREF(tstate->previous_executor); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 0c58f3f87d4..c27505fde3d 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2631,28 +2631,49 @@ } TARGET(FOR_ITER_GEN) { - _Py_CODEUNIT *this_instr = frame->instr_ptr = next_instr; + frame->instr_ptr = next_instr; next_instr += 2; INSTRUCTION_STATS(FOR_ITER_GEN); static_assert(INLINE_CACHE_ENTRIES_FOR_ITER == 1, "incorrect cache size"); PyObject *iter; + _PyInterpreterFrame *gen_frame; + _PyInterpreterFrame *new_frame; /* Skip 1 cache entry */ + // _CHECK_PEP_523 + { + DEOPT_IF(tstate->interp->eval_frame, FOR_ITER); + } + // _FOR_ITER_GEN_FRAME iter = stack_pointer[-1]; - DEOPT_IF(tstate->interp->eval_frame, FOR_ITER); - PyGenObject *gen = (PyGenObject *)iter; - DEOPT_IF(Py_TYPE(gen) != &PyGen_Type, FOR_ITER); - DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, FOR_ITER); - STAT_INC(FOR_ITER, hit); - _PyInterpreterFrame *gen_frame = (_PyInterpreterFrame *)gen->gi_iframe; - _PyFrame_StackPush(gen_frame, Py_None); - gen->gi_frame_state = FRAME_EXECUTING; - gen->gi_exc_state.previous_item = tstate->exc_info; - tstate->exc_info = &gen->gi_exc_state; - assert(next_instr[oparg].op.code == END_FOR || - next_instr[oparg].op.code == INSTRUMENTED_END_FOR); - assert(next_instr - this_instr + oparg <= UINT16_MAX); - frame->return_offset = (uint16_t)(next_instr - this_instr + oparg); - DISPATCH_INLINED(gen_frame); + { + PyGenObject *gen = (PyGenObject *)iter; + DEOPT_IF(Py_TYPE(gen) != &PyGen_Type, FOR_ITER); + DEOPT_IF(gen->gi_frame_state >= FRAME_EXECUTING, FOR_ITER); + STAT_INC(FOR_ITER, hit); + gen_frame = (_PyInterpreterFrame *)gen->gi_iframe; + _PyFrame_StackPush(gen_frame, Py_None); + gen->gi_frame_state = FRAME_EXECUTING; + gen->gi_exc_state.previous_item = tstate->exc_info; + tstate->exc_info = &gen->gi_exc_state; + // oparg is the return offset from the next instruction. + frame->return_offset = (uint16_t)(1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg); + } + // _PUSH_FRAME + new_frame = gen_frame; + { + // Write it out explicitly because it's subtly different. + // Eventually this should be the only occurrence of this code. + assert(tstate->interp->eval_frame == NULL); + _PyFrame_SetStackPointer(frame, stack_pointer); + new_frame->previous = frame; + CALL_STAT_INC(inlined_py_calls); + frame = tstate->current_frame = new_frame; + tstate->py_recursion_remaining--; + LOAD_SP(); + LOAD_IP(0); + LLTRACE_RESUME_FRAME(); + } + DISPATCH(); } TARGET(FOR_ITER_LIST) { @@ -6011,6 +6032,10 @@ _PyFrame_StackPush(frame, retval); /* We don't know which of these is relevant here, so keep them equal */ assert(INLINE_CACHE_ENTRIES_SEND == INLINE_CACHE_ENTRIES_FOR_ITER); + assert(_PyOpcode_Deopt[frame->instr_ptr->op.code] == SEND || + _PyOpcode_Deopt[frame->instr_ptr->op.code] == FOR_ITER || + _PyOpcode_Deopt[frame->instr_ptr->op.code] == INTERPRETER_EXIT || + _PyOpcode_Deopt[frame->instr_ptr->op.code] == ENTER_EXECUTOR); LOAD_IP(1 + INLINE_CACHE_ENTRIES_SEND); goto resume_frame; } diff --git a/Python/optimizer.c b/Python/optimizer.c index e5c70f72f9c..02c9b395027 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -567,8 +567,6 @@ translate_bytecode_to_trace( top: // Jump here after _PUSH_FRAME or likely branches for (;;) { target = INSTR_IP(instr, code); - RESERVE_RAW(2, "_CHECK_VALIDITY_AND_SET_IP"); - ADD_TO_TRACE(_CHECK_VALIDITY_AND_SET_IP, 0, (uintptr_t)instr, target); // Need space for _DEOPT max_length--; @@ -597,6 +595,8 @@ translate_bytecode_to_trace( } } assert(opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG); + RESERVE_RAW(2, "_CHECK_VALIDITY_AND_SET_IP"); + ADD_TO_TRACE(_CHECK_VALIDITY_AND_SET_IP, 0, (uintptr_t)instr, target); /* Special case the first instruction, * so that we can guarantee forward progress */ @@ -814,6 +814,12 @@ translate_bytecode_to_trace( ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0); goto done; } + if (opcode == FOR_ITER_GEN) { + DPRINTF(2, "Bailing due to dynamic target\n"); + ADD_TO_TRACE(uop, oparg, 0, target); + ADD_TO_TRACE(_DYNAMIC_EXIT, 0, 0, 0); + goto done; + } // Increment IP to the return address instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + 1; TRACE_STACK_PUSH(); @@ -847,7 +853,7 @@ translate_bytecode_to_trace( } DPRINTF(2, "Bail, new_code == NULL\n"); ADD_TO_TRACE(uop, oparg, 0, target); - ADD_TO_TRACE(_EXIT_TRACE, 0, 0, 0); + ADD_TO_TRACE(_DYNAMIC_EXIT, 0, 0, 0); goto done; } @@ -917,7 +923,7 @@ count_exits(_PyUOpInstruction *buffer, int length) int exit_count = 0; for (int i = 0; i < length; i++) { int opcode = buffer[i].opcode; - if (opcode == _SIDE_EXIT) { + if (opcode == _SIDE_EXIT || opcode == _DYNAMIC_EXIT) { exit_count++; } } @@ -1114,6 +1120,11 @@ make_executor_from_uops(_PyUOpInstruction *buffer, int length, const _PyBloomFil dest->format = UOP_FORMAT_EXIT; next_exit--; } + if (opcode == _DYNAMIC_EXIT) { + executor->exits[next_exit].target = 0; + dest->oparg = next_exit; + next_exit--; + } } assert(next_exit == -1); assert(dest == executor->trace); diff --git a/Python/optimizer_cases.c.h b/Python/optimizer_cases.c.h index 4f0941a3cc3..b1965687701 100644 --- a/Python/optimizer_cases.c.h +++ b/Python/optimizer_cases.c.h @@ -1439,7 +1439,14 @@ break; } - /* _FOR_ITER_GEN is not a viable micro-op for tier 2 */ + case _FOR_ITER_GEN_FRAME: { + _PyInterpreterFrame *gen_frame; + gen_frame = sym_new_not_null(ctx); + if (gen_frame == NULL) goto out_of_space; + stack_pointer[0] = (_Py_UopsSymbol *)gen_frame; + stack_pointer += 1; + break; + } /* _BEFORE_ASYNC_WITH is not a viable micro-op for tier 2 */ @@ -2109,6 +2116,10 @@ break; } + case _DYNAMIC_EXIT: { + break; + } + case _START_EXECUTOR: { break; } diff --git a/Tools/jit/template.c b/Tools/jit/template.c index 228dc83254d..3e81fd15bb8 100644 --- a/Tools/jit/template.c +++ b/Tools/jit/template.c @@ -87,6 +87,7 @@ _JIT_ENTRY(_PyInterpreterFrame *frame, PyObject **stack_pointer, PyThreadState * PATCH_VALUE(_PyExecutorObject *, current_executor, _JIT_EXECUTOR) int oparg; int uopcode = _JIT_OPCODE; + _Py_CODEUNIT *next_instr; // Other stuff we need handy: PATCH_VALUE(uint16_t, _oparg, _JIT_OPARG) #if SIZEOF_VOID_P == 8 @@ -122,6 +123,9 @@ _JIT_ENTRY(_PyInterpreterFrame *frame, PyObject **stack_pointer, PyThreadState * exit_to_tier1: tstate->previous_executor = (PyObject *)current_executor; GOTO_TIER_ONE(_PyCode_CODE(_PyFrame_GetCode(frame)) + _target); +exit_to_tier1_dynamic: + tstate->previous_executor = (PyObject *)current_executor; + GOTO_TIER_ONE(frame->instr_ptr); exit_to_trace: { _PyExitData *exit = ¤t_executor->exits[_exit_index];