mirror of https://github.com/python/cpython.git
GH-98831: Move assorted macros from ceval.h to a new header (#101116)
This commit is contained in:
parent
d65f485070
commit
1f0d0a432c
|
@ -1466,8 +1466,12 @@ regen-cases:
|
|||
-o $(srcdir)/Python/opcode_metadata.h.new
|
||||
$(UPDATE_FILE) $(srcdir)/Python/opcode_metadata.h $(srcdir)/Python/opcode_metadata.h.new
|
||||
|
||||
Python/ceval.o: $(srcdir)/Python/opcode_targets.h $(srcdir)/Python/condvar.h $(srcdir)/Python/generated_cases.c.h
|
||||
|
||||
Python/ceval.o: \
|
||||
$(srcdir)/Python/ceval_macros.h \
|
||||
$(srcdir)/Python/condvar.h \
|
||||
$(srcdir)/Python/generated_cases.c.h \
|
||||
$(srcdir)/Python/opcode_metadata.h \
|
||||
$(srcdir)/Python/opcode_targets.h
|
||||
|
||||
Python/frozen.o: $(FROZEN_FILES_OUT)
|
||||
|
||||
|
|
|
@ -34,41 +34,14 @@
|
|||
#include "setobject.h"
|
||||
#include "structmember.h" // struct PyMemberDef, T_OFFSET_EX
|
||||
|
||||
void _PyFloat_ExactDealloc(PyObject *);
|
||||
void _PyUnicode_ExactDealloc(PyObject *);
|
||||
|
||||
/* Stack effect macros
|
||||
* These will be mostly replaced by stack effect descriptions,
|
||||
* but the tooling need to recognize them.
|
||||
*/
|
||||
#define SET_TOP(v) (stack_pointer[-1] = (v))
|
||||
#define SET_SECOND(v) (stack_pointer[-2] = (v))
|
||||
#define PEEK(n) (stack_pointer[-(n)])
|
||||
#define POKE(n, v) (stack_pointer[-(n)] = (v))
|
||||
#define PUSH(val) (*(stack_pointer++) = (val))
|
||||
#define POP() (*(--stack_pointer))
|
||||
#define TOP() PEEK(1)
|
||||
#define SECOND() PEEK(2)
|
||||
#define STACK_GROW(n) (stack_pointer += (n))
|
||||
#define STACK_SHRINK(n) (stack_pointer -= (n))
|
||||
#define EMPTY() 1
|
||||
#define STACK_LEVEL() 2
|
||||
|
||||
/* Local variable macros */
|
||||
#define GETLOCAL(i) (frame->localsplus[i])
|
||||
#define SETLOCAL(i, val) \
|
||||
do { \
|
||||
PyObject *_tmp = frame->localsplus[i]; \
|
||||
frame->localsplus[i] = (val); \
|
||||
Py_XDECREF(_tmp); \
|
||||
} while (0)
|
||||
#define USE_COMPUTED_GOTOS 0
|
||||
#include "ceval_macros.h"
|
||||
|
||||
/* Flow control macros */
|
||||
#define DEOPT_IF(cond, instname) ((void)0)
|
||||
#define ERROR_IF(cond, labelname) ((void)0)
|
||||
#define JUMPBY(offset) ((void)0)
|
||||
#define GO_TO_INSTRUCTION(instname) ((void)0)
|
||||
#define DISPATCH_SAME_OPARG() ((void)0)
|
||||
#define PREDICT(opname) ((void)0)
|
||||
|
||||
#define inst(name, ...) case name:
|
||||
#define op(name, ...) /* NAME is ignored */
|
||||
|
@ -76,16 +49,14 @@ do { \
|
|||
#define super(name) static int SUPER_##name
|
||||
#define family(name, ...) static int family_##name
|
||||
|
||||
#define NAME_ERROR_MSG \
|
||||
"name '%.200s' is not defined"
|
||||
|
||||
// Dummy variables for stack effects.
|
||||
static PyObject *value, *value1, *value2, *left, *right, *res, *sum, *prod, *sub;
|
||||
static PyObject *container, *start, *stop, *v, *lhs, *rhs;
|
||||
static PyObject *list, *tuple, *dict, *owner;
|
||||
static PyObject *list, *tuple, *dict, *owner, *set, *str, *tup, *map, *keys;
|
||||
static PyObject *exit_func, *lasti, *val, *retval, *obj, *iter;
|
||||
static PyObject *aiter, *awaitable, *iterable, *w, *exc_value, *bc;
|
||||
static PyObject *orig, *excs, *update, *b, *fromlist, *level, *from;
|
||||
static PyObject **pieces, **values;
|
||||
static size_t jump;
|
||||
// Dummy variables for cache effects
|
||||
static uint16_t invert, counter, index, hint;
|
||||
|
@ -456,7 +427,7 @@ dummy_func(
|
|||
PREDICT(JUMP_BACKWARD);
|
||||
}
|
||||
|
||||
inst(SET_ADD, (set, unused[oparg-1], v -- set, unused[oparg-1])) {
|
||||
inst(SET_ADD, (set, unused[oparg-1], v -- set, unused[oparg-1])) {
|
||||
int err = PySet_Add(set, v);
|
||||
Py_DECREF(v);
|
||||
ERROR_IF(err, error);
|
||||
|
@ -3336,8 +3307,10 @@ dummy_func(
|
|||
// END BYTECODES //
|
||||
|
||||
}
|
||||
dispatch_opcode:
|
||||
error:
|
||||
exception_unwind:
|
||||
exit_unwind:
|
||||
handle_eval_breaker:
|
||||
resume_frame:
|
||||
resume_with_error:
|
||||
|
|
349
Python/ceval.c
349
Python/ceval.c
|
@ -215,8 +215,6 @@ _PyEvalFramePushAndInit(PyThreadState *tstate, PyFunctionObject *func,
|
|||
static void
|
||||
_PyEvalFrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame);
|
||||
|
||||
#define NAME_ERROR_MSG \
|
||||
"name '%.200s' is not defined"
|
||||
#define UNBOUNDLOCAL_ERROR_MSG \
|
||||
"cannot access local variable '%s' where it is not associated with a value"
|
||||
#define UNBOUNDFREE_ERROR_MSG \
|
||||
|
@ -600,352 +598,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
|
|||
return _PyEval_EvalFrame(tstate, f->f_frame, throwflag);
|
||||
}
|
||||
|
||||
|
||||
/* Computed GOTOs, or
|
||||
the-optimization-commonly-but-improperly-known-as-"threaded code"
|
||||
using gcc's labels-as-values extension
|
||||
(http://gcc.gnu.org/onlinedocs/gcc/Labels-as-Values.html).
|
||||
|
||||
The traditional bytecode evaluation loop uses a "switch" statement, which
|
||||
decent compilers will optimize as a single indirect branch instruction
|
||||
combined with a lookup table of jump addresses. However, since the
|
||||
indirect jump instruction is shared by all opcodes, the CPU will have a
|
||||
hard time making the right prediction for where to jump next (actually,
|
||||
it will be always wrong except in the uncommon case of a sequence of
|
||||
several identical opcodes).
|
||||
|
||||
"Threaded code" in contrast, uses an explicit jump table and an explicit
|
||||
indirect jump instruction at the end of each opcode. Since the jump
|
||||
instruction is at a different address for each opcode, the CPU will make a
|
||||
separate prediction for each of these instructions, which is equivalent to
|
||||
predicting the second opcode of each opcode pair. These predictions have
|
||||
a much better chance to turn out valid, especially in small bytecode loops.
|
||||
|
||||
A mispredicted branch on a modern CPU flushes the whole pipeline and
|
||||
can cost several CPU cycles (depending on the pipeline depth),
|
||||
and potentially many more instructions (depending on the pipeline width).
|
||||
A correctly predicted branch, however, is nearly free.
|
||||
|
||||
At the time of this writing, the "threaded code" version is up to 15-20%
|
||||
faster than the normal "switch" version, depending on the compiler and the
|
||||
CPU architecture.
|
||||
|
||||
NOTE: care must be taken that the compiler doesn't try to "optimize" the
|
||||
indirect jumps by sharing them between all opcodes. Such optimizations
|
||||
can be disabled on gcc by using the -fno-gcse flag (or possibly
|
||||
-fno-crossjumping).
|
||||
*/
|
||||
|
||||
/* Use macros rather than inline functions, to make it as clear as possible
|
||||
* to the C compiler that the tracing check is a simple test then branch.
|
||||
* We want to be sure that the compiler knows this before it generates
|
||||
* the CFG.
|
||||
*/
|
||||
|
||||
#ifdef WITH_DTRACE
|
||||
#define OR_DTRACE_LINE | (PyDTrace_LINE_ENABLED() ? 255 : 0)
|
||||
#else
|
||||
#define OR_DTRACE_LINE
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_COMPUTED_GOTOS
|
||||
#ifndef USE_COMPUTED_GOTOS
|
||||
#define USE_COMPUTED_GOTOS 1
|
||||
#endif
|
||||
#else
|
||||
#if defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
|
||||
#error "Computed gotos are not supported on this compiler."
|
||||
#endif
|
||||
#undef USE_COMPUTED_GOTOS
|
||||
#define USE_COMPUTED_GOTOS 0
|
||||
#endif
|
||||
|
||||
#ifdef Py_STATS
|
||||
#define INSTRUCTION_START(op) \
|
||||
do { \
|
||||
frame->prev_instr = next_instr++; \
|
||||
OPCODE_EXE_INC(op); \
|
||||
if (_py_stats) _py_stats->opcode_stats[lastopcode].pair_count[op]++; \
|
||||
lastopcode = op; \
|
||||
} while (0)
|
||||
#else
|
||||
#define INSTRUCTION_START(op) (frame->prev_instr = next_instr++)
|
||||
#endif
|
||||
|
||||
#if USE_COMPUTED_GOTOS
|
||||
# define TARGET(op) TARGET_##op: INSTRUCTION_START(op);
|
||||
# define DISPATCH_GOTO() goto *opcode_targets[opcode]
|
||||
#else
|
||||
# define TARGET(op) case op: TARGET_##op: INSTRUCTION_START(op);
|
||||
# define DISPATCH_GOTO() goto dispatch_opcode
|
||||
#endif
|
||||
|
||||
/* PRE_DISPATCH_GOTO() does lltrace if enabled. Normally a no-op */
|
||||
#ifdef LLTRACE
|
||||
#define PRE_DISPATCH_GOTO() if (lltrace) { \
|
||||
lltrace_instruction(frame, stack_pointer, next_instr); }
|
||||
#else
|
||||
#define PRE_DISPATCH_GOTO() ((void)0)
|
||||
#endif
|
||||
|
||||
|
||||
/* Do interpreter dispatch accounting for tracing and instrumentation */
|
||||
#define DISPATCH() \
|
||||
{ \
|
||||
NEXTOPARG(); \
|
||||
PRE_DISPATCH_GOTO(); \
|
||||
assert(cframe.use_tracing == 0 || cframe.use_tracing == 255); \
|
||||
opcode |= cframe.use_tracing OR_DTRACE_LINE; \
|
||||
DISPATCH_GOTO(); \
|
||||
}
|
||||
|
||||
#define DISPATCH_SAME_OPARG() \
|
||||
{ \
|
||||
opcode = _Py_OPCODE(*next_instr); \
|
||||
PRE_DISPATCH_GOTO(); \
|
||||
opcode |= cframe.use_tracing OR_DTRACE_LINE; \
|
||||
DISPATCH_GOTO(); \
|
||||
}
|
||||
|
||||
#define DISPATCH_INLINED(NEW_FRAME) \
|
||||
do { \
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer); \
|
||||
frame->prev_instr = next_instr - 1; \
|
||||
(NEW_FRAME)->previous = frame; \
|
||||
frame = cframe.current_frame = (NEW_FRAME); \
|
||||
CALL_STAT_INC(inlined_py_calls); \
|
||||
goto start_frame; \
|
||||
} while (0)
|
||||
|
||||
#define CHECK_EVAL_BREAKER() \
|
||||
_Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY(); \
|
||||
if (_Py_atomic_load_relaxed_int32(eval_breaker)) { \
|
||||
goto handle_eval_breaker; \
|
||||
}
|
||||
|
||||
|
||||
/* Tuple access macros */
|
||||
|
||||
#ifndef Py_DEBUG
|
||||
#define GETITEM(v, i) PyTuple_GET_ITEM((v), (i))
|
||||
#else
|
||||
static inline PyObject *
|
||||
GETITEM(PyObject *v, Py_ssize_t i) {
|
||||
assert(PyTuple_Check(v));
|
||||
assert(i >= 0);
|
||||
assert(i < PyTuple_GET_SIZE(v));
|
||||
return PyTuple_GET_ITEM(v, i);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Code access macros */
|
||||
|
||||
/* The integer overflow is checked by an assertion below. */
|
||||
#define INSTR_OFFSET() ((int)(next_instr - _PyCode_CODE(frame->f_code)))
|
||||
#define NEXTOPARG() do { \
|
||||
_Py_CODEUNIT word = *next_instr; \
|
||||
opcode = _Py_OPCODE(word); \
|
||||
oparg = _Py_OPARG(word); \
|
||||
} while (0)
|
||||
#define JUMPTO(x) (next_instr = _PyCode_CODE(frame->f_code) + (x))
|
||||
#define JUMPBY(x) (next_instr += (x))
|
||||
|
||||
/* OpCode prediction macros
|
||||
Some opcodes tend to come in pairs thus making it possible to
|
||||
predict the second code when the first is run. For example,
|
||||
COMPARE_OP is often followed by POP_JUMP_IF_FALSE or POP_JUMP_IF_TRUE.
|
||||
|
||||
Verifying the prediction costs a single high-speed test of a register
|
||||
variable against a constant. If the pairing was good, then the
|
||||
processor's own internal branch predication has a high likelihood of
|
||||
success, resulting in a nearly zero-overhead transition to the
|
||||
next opcode. A successful prediction saves a trip through the eval-loop
|
||||
including its unpredictable switch-case branch. Combined with the
|
||||
processor's internal branch prediction, a successful PREDICT has the
|
||||
effect of making the two opcodes run as if they were a single new opcode
|
||||
with the bodies combined.
|
||||
|
||||
If collecting opcode statistics, your choices are to either keep the
|
||||
predictions turned-on and interpret the results as if some opcodes
|
||||
had been combined or turn-off predictions so that the opcode frequency
|
||||
counter updates for both opcodes.
|
||||
|
||||
Opcode prediction is disabled with threaded code, since the latter allows
|
||||
the CPU to record separate branch prediction information for each
|
||||
opcode.
|
||||
|
||||
*/
|
||||
|
||||
#define PREDICT_ID(op) PRED_##op
|
||||
|
||||
#if USE_COMPUTED_GOTOS
|
||||
#define PREDICT(op) if (0) goto PREDICT_ID(op)
|
||||
#else
|
||||
#define PREDICT(op) \
|
||||
do { \
|
||||
_Py_CODEUNIT word = *next_instr; \
|
||||
opcode = _Py_OPCODE(word) | cframe.use_tracing OR_DTRACE_LINE; \
|
||||
if (opcode == op) { \
|
||||
oparg = _Py_OPARG(word); \
|
||||
INSTRUCTION_START(op); \
|
||||
goto PREDICT_ID(op); \
|
||||
} \
|
||||
} while(0)
|
||||
#endif
|
||||
#define PREDICTED(op) PREDICT_ID(op):
|
||||
|
||||
|
||||
/* Stack manipulation macros */
|
||||
|
||||
/* The stack can grow at most MAXINT deep, as co_nlocals and
|
||||
co_stacksize are ints. */
|
||||
#define STACK_LEVEL() ((int)(stack_pointer - _PyFrame_Stackbase(frame)))
|
||||
#define STACK_SIZE() (frame->f_code->co_stacksize)
|
||||
#define EMPTY() (STACK_LEVEL() == 0)
|
||||
#define TOP() (stack_pointer[-1])
|
||||
#define SECOND() (stack_pointer[-2])
|
||||
#define THIRD() (stack_pointer[-3])
|
||||
#define FOURTH() (stack_pointer[-4])
|
||||
#define PEEK(n) (stack_pointer[-(n)])
|
||||
#define POKE(n, v) (stack_pointer[-(n)] = (v))
|
||||
#define SET_TOP(v) (stack_pointer[-1] = (v))
|
||||
#define SET_SECOND(v) (stack_pointer[-2] = (v))
|
||||
#define BASIC_STACKADJ(n) (stack_pointer += n)
|
||||
#define BASIC_PUSH(v) (*stack_pointer++ = (v))
|
||||
#define BASIC_POP() (*--stack_pointer)
|
||||
|
||||
#ifdef Py_DEBUG
|
||||
#define PUSH(v) do { \
|
||||
BASIC_PUSH(v); \
|
||||
assert(STACK_LEVEL() <= STACK_SIZE()); \
|
||||
} while (0)
|
||||
#define POP() (assert(STACK_LEVEL() > 0), BASIC_POP())
|
||||
#define STACK_GROW(n) do { \
|
||||
assert(n >= 0); \
|
||||
BASIC_STACKADJ(n); \
|
||||
assert(STACK_LEVEL() <= STACK_SIZE()); \
|
||||
} while (0)
|
||||
#define STACK_SHRINK(n) do { \
|
||||
assert(n >= 0); \
|
||||
assert(STACK_LEVEL() >= n); \
|
||||
BASIC_STACKADJ(-(n)); \
|
||||
} while (0)
|
||||
#else
|
||||
#define PUSH(v) BASIC_PUSH(v)
|
||||
#define POP() BASIC_POP()
|
||||
#define STACK_GROW(n) BASIC_STACKADJ(n)
|
||||
#define STACK_SHRINK(n) BASIC_STACKADJ(-(n))
|
||||
#endif
|
||||
|
||||
/* Local variable macros */
|
||||
|
||||
#define GETLOCAL(i) (frame->localsplus[i])
|
||||
|
||||
/* The SETLOCAL() macro must not DECREF the local variable in-place and
|
||||
then store the new value; it must copy the old value to a temporary
|
||||
value, then store the new value, and then DECREF the temporary value.
|
||||
This is because it is possible that during the DECREF the frame is
|
||||
accessed by other code (e.g. a __del__ method or gc.collect()) and the
|
||||
variable would be pointing to already-freed memory. */
|
||||
#define SETLOCAL(i, value) do { PyObject *tmp = GETLOCAL(i); \
|
||||
GETLOCAL(i) = value; \
|
||||
Py_XDECREF(tmp); } while (0)
|
||||
|
||||
#define GO_TO_INSTRUCTION(op) goto PREDICT_ID(op)
|
||||
|
||||
#ifdef Py_STATS
|
||||
#define UPDATE_MISS_STATS(INSTNAME) \
|
||||
do { \
|
||||
STAT_INC(opcode, miss); \
|
||||
STAT_INC((INSTNAME), miss); \
|
||||
/* The counter is always the first cache entry: */ \
|
||||
if (ADAPTIVE_COUNTER_IS_ZERO(next_instr->cache)) { \
|
||||
STAT_INC((INSTNAME), deopt); \
|
||||
} \
|
||||
else { \
|
||||
/* This is about to be (incorrectly) incremented: */ \
|
||||
STAT_DEC((INSTNAME), deferred); \
|
||||
} \
|
||||
} while (0)
|
||||
#else
|
||||
#define UPDATE_MISS_STATS(INSTNAME) ((void)0)
|
||||
#endif
|
||||
|
||||
#define DEOPT_IF(COND, INSTNAME) \
|
||||
if ((COND)) { \
|
||||
/* This is only a single jump on release builds! */ \
|
||||
UPDATE_MISS_STATS((INSTNAME)); \
|
||||
assert(_PyOpcode_Deopt[opcode] == (INSTNAME)); \
|
||||
GO_TO_INSTRUCTION(INSTNAME); \
|
||||
}
|
||||
|
||||
|
||||
#define GLOBALS() frame->f_globals
|
||||
#define BUILTINS() frame->f_builtins
|
||||
#define LOCALS() frame->f_locals
|
||||
|
||||
/* Shared opcode macros */
|
||||
|
||||
#define TRACE_FUNCTION_EXIT() \
|
||||
if (cframe.use_tracing) { \
|
||||
if (trace_function_exit(tstate, frame, retval)) { \
|
||||
Py_DECREF(retval); \
|
||||
goto exit_unwind; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define DTRACE_FUNCTION_EXIT() \
|
||||
if (PyDTrace_FUNCTION_RETURN_ENABLED()) { \
|
||||
dtrace_function_return(frame); \
|
||||
}
|
||||
|
||||
#define TRACE_FUNCTION_UNWIND() \
|
||||
if (cframe.use_tracing) { \
|
||||
/* Since we are already unwinding, \
|
||||
* we don't care if this raises */ \
|
||||
trace_function_exit(tstate, frame, NULL); \
|
||||
}
|
||||
|
||||
#define TRACE_FUNCTION_ENTRY() \
|
||||
if (cframe.use_tracing) { \
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer); \
|
||||
int err = trace_function_entry(tstate, frame); \
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame); \
|
||||
if (err) { \
|
||||
goto error; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define TRACE_FUNCTION_THROW_ENTRY() \
|
||||
if (cframe.use_tracing) { \
|
||||
assert(frame->stacktop >= 0); \
|
||||
if (trace_function_entry(tstate, frame)) { \
|
||||
goto exit_unwind; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define DTRACE_FUNCTION_ENTRY() \
|
||||
if (PyDTrace_FUNCTION_ENTRY_ENABLED()) { \
|
||||
dtrace_function_entry(frame); \
|
||||
}
|
||||
|
||||
#define ADAPTIVE_COUNTER_IS_ZERO(COUNTER) \
|
||||
(((COUNTER) >> ADAPTIVE_BACKOFF_BITS) == 0)
|
||||
|
||||
#define ADAPTIVE_COUNTER_IS_MAX(COUNTER) \
|
||||
(((COUNTER) >> ADAPTIVE_BACKOFF_BITS) == ((1 << MAX_BACKOFF_VALUE) - 1))
|
||||
|
||||
#define DECREMENT_ADAPTIVE_COUNTER(COUNTER) \
|
||||
do { \
|
||||
assert(!ADAPTIVE_COUNTER_IS_ZERO((COUNTER))); \
|
||||
(COUNTER) -= (1 << ADAPTIVE_BACKOFF_BITS); \
|
||||
} while (0);
|
||||
|
||||
#define INCREMENT_ADAPTIVE_COUNTER(COUNTER) \
|
||||
do { \
|
||||
assert(!ADAPTIVE_COUNTER_IS_MAX((COUNTER))); \
|
||||
(COUNTER) += (1 << ADAPTIVE_BACKOFF_BITS); \
|
||||
} while (0);
|
||||
#include "ceval_macros.h"
|
||||
|
||||
static int
|
||||
trace_function_entry(PyThreadState *tstate, _PyInterpreterFrame *frame)
|
||||
|
|
|
@ -0,0 +1,349 @@
|
|||
// Macros needed by ceval.c and bytecodes.c
|
||||
|
||||
/* Computed GOTOs, or
|
||||
the-optimization-commonly-but-improperly-known-as-"threaded code"
|
||||
using gcc's labels-as-values extension
|
||||
(http://gcc.gnu.org/onlinedocs/gcc/Labels-as-Values.html).
|
||||
|
||||
The traditional bytecode evaluation loop uses a "switch" statement, which
|
||||
decent compilers will optimize as a single indirect branch instruction
|
||||
combined with a lookup table of jump addresses. However, since the
|
||||
indirect jump instruction is shared by all opcodes, the CPU will have a
|
||||
hard time making the right prediction for where to jump next (actually,
|
||||
it will be always wrong except in the uncommon case of a sequence of
|
||||
several identical opcodes).
|
||||
|
||||
"Threaded code" in contrast, uses an explicit jump table and an explicit
|
||||
indirect jump instruction at the end of each opcode. Since the jump
|
||||
instruction is at a different address for each opcode, the CPU will make a
|
||||
separate prediction for each of these instructions, which is equivalent to
|
||||
predicting the second opcode of each opcode pair. These predictions have
|
||||
a much better chance to turn out valid, especially in small bytecode loops.
|
||||
|
||||
A mispredicted branch on a modern CPU flushes the whole pipeline and
|
||||
can cost several CPU cycles (depending on the pipeline depth),
|
||||
and potentially many more instructions (depending on the pipeline width).
|
||||
A correctly predicted branch, however, is nearly free.
|
||||
|
||||
At the time of this writing, the "threaded code" version is up to 15-20%
|
||||
faster than the normal "switch" version, depending on the compiler and the
|
||||
CPU architecture.
|
||||
|
||||
NOTE: care must be taken that the compiler doesn't try to "optimize" the
|
||||
indirect jumps by sharing them between all opcodes. Such optimizations
|
||||
can be disabled on gcc by using the -fno-gcse flag (or possibly
|
||||
-fno-crossjumping).
|
||||
*/
|
||||
|
||||
/* Use macros rather than inline functions, to make it as clear as possible
|
||||
* to the C compiler that the tracing check is a simple test then branch.
|
||||
* We want to be sure that the compiler knows this before it generates
|
||||
* the CFG.
|
||||
*/
|
||||
|
||||
#ifdef WITH_DTRACE
|
||||
#define OR_DTRACE_LINE | (PyDTrace_LINE_ENABLED() ? 255 : 0)
|
||||
#else
|
||||
#define OR_DTRACE_LINE
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_COMPUTED_GOTOS
|
||||
#ifndef USE_COMPUTED_GOTOS
|
||||
#define USE_COMPUTED_GOTOS 1
|
||||
#endif
|
||||
#else
|
||||
#if defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS
|
||||
#error "Computed gotos are not supported on this compiler."
|
||||
#endif
|
||||
#undef USE_COMPUTED_GOTOS
|
||||
#define USE_COMPUTED_GOTOS 0
|
||||
#endif
|
||||
|
||||
#ifdef Py_STATS
|
||||
#define INSTRUCTION_START(op) \
|
||||
do { \
|
||||
frame->prev_instr = next_instr++; \
|
||||
OPCODE_EXE_INC(op); \
|
||||
if (_py_stats) _py_stats->opcode_stats[lastopcode].pair_count[op]++; \
|
||||
lastopcode = op; \
|
||||
} while (0)
|
||||
#else
|
||||
#define INSTRUCTION_START(op) (frame->prev_instr = next_instr++)
|
||||
#endif
|
||||
|
||||
#if USE_COMPUTED_GOTOS
|
||||
# define TARGET(op) TARGET_##op: INSTRUCTION_START(op);
|
||||
# define DISPATCH_GOTO() goto *opcode_targets[opcode]
|
||||
#else
|
||||
# define TARGET(op) case op: TARGET_##op: INSTRUCTION_START(op);
|
||||
# define DISPATCH_GOTO() goto dispatch_opcode
|
||||
#endif
|
||||
|
||||
/* PRE_DISPATCH_GOTO() does lltrace if enabled. Normally a no-op */
|
||||
#ifdef LLTRACE
|
||||
#define PRE_DISPATCH_GOTO() if (lltrace) { \
|
||||
lltrace_instruction(frame, stack_pointer, next_instr); }
|
||||
#else
|
||||
#define PRE_DISPATCH_GOTO() ((void)0)
|
||||
#endif
|
||||
|
||||
|
||||
/* Do interpreter dispatch accounting for tracing and instrumentation */
|
||||
#define DISPATCH() \
|
||||
{ \
|
||||
NEXTOPARG(); \
|
||||
PRE_DISPATCH_GOTO(); \
|
||||
assert(cframe.use_tracing == 0 || cframe.use_tracing == 255); \
|
||||
opcode |= cframe.use_tracing OR_DTRACE_LINE; \
|
||||
DISPATCH_GOTO(); \
|
||||
}
|
||||
|
||||
#define DISPATCH_SAME_OPARG() \
|
||||
{ \
|
||||
opcode = _Py_OPCODE(*next_instr); \
|
||||
PRE_DISPATCH_GOTO(); \
|
||||
opcode |= cframe.use_tracing OR_DTRACE_LINE; \
|
||||
DISPATCH_GOTO(); \
|
||||
}
|
||||
|
||||
#define DISPATCH_INLINED(NEW_FRAME) \
|
||||
do { \
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer); \
|
||||
frame->prev_instr = next_instr - 1; \
|
||||
(NEW_FRAME)->previous = frame; \
|
||||
frame = cframe.current_frame = (NEW_FRAME); \
|
||||
CALL_STAT_INC(inlined_py_calls); \
|
||||
goto start_frame; \
|
||||
} while (0)
|
||||
|
||||
#define CHECK_EVAL_BREAKER() \
|
||||
_Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY(); \
|
||||
if (_Py_atomic_load_relaxed_int32(eval_breaker)) { \
|
||||
goto handle_eval_breaker; \
|
||||
}
|
||||
|
||||
|
||||
/* Tuple access macros */
|
||||
|
||||
#ifndef Py_DEBUG
|
||||
#define GETITEM(v, i) PyTuple_GET_ITEM((v), (i))
|
||||
#else
|
||||
static inline PyObject *
|
||||
GETITEM(PyObject *v, Py_ssize_t i) {
|
||||
assert(PyTuple_Check(v));
|
||||
assert(i >= 0);
|
||||
assert(i < PyTuple_GET_SIZE(v));
|
||||
return PyTuple_GET_ITEM(v, i);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Code access macros */
|
||||
|
||||
/* The integer overflow is checked by an assertion below. */
|
||||
#define INSTR_OFFSET() ((int)(next_instr - _PyCode_CODE(frame->f_code)))
|
||||
#define NEXTOPARG() do { \
|
||||
_Py_CODEUNIT word = *next_instr; \
|
||||
opcode = _Py_OPCODE(word); \
|
||||
oparg = _Py_OPARG(word); \
|
||||
} while (0)
|
||||
#define JUMPTO(x) (next_instr = _PyCode_CODE(frame->f_code) + (x))
|
||||
#define JUMPBY(x) (next_instr += (x))
|
||||
|
||||
/* OpCode prediction macros
|
||||
Some opcodes tend to come in pairs thus making it possible to
|
||||
predict the second code when the first is run. For example,
|
||||
COMPARE_OP is often followed by POP_JUMP_IF_FALSE or POP_JUMP_IF_TRUE.
|
||||
|
||||
Verifying the prediction costs a single high-speed test of a register
|
||||
variable against a constant. If the pairing was good, then the
|
||||
processor's own internal branch predication has a high likelihood of
|
||||
success, resulting in a nearly zero-overhead transition to the
|
||||
next opcode. A successful prediction saves a trip through the eval-loop
|
||||
including its unpredictable switch-case branch. Combined with the
|
||||
processor's internal branch prediction, a successful PREDICT has the
|
||||
effect of making the two opcodes run as if they were a single new opcode
|
||||
with the bodies combined.
|
||||
|
||||
If collecting opcode statistics, your choices are to either keep the
|
||||
predictions turned-on and interpret the results as if some opcodes
|
||||
had been combined or turn-off predictions so that the opcode frequency
|
||||
counter updates for both opcodes.
|
||||
|
||||
Opcode prediction is disabled with threaded code, since the latter allows
|
||||
the CPU to record separate branch prediction information for each
|
||||
opcode.
|
||||
|
||||
*/
|
||||
|
||||
#define PREDICT_ID(op) PRED_##op
|
||||
|
||||
#if USE_COMPUTED_GOTOS
|
||||
#define PREDICT(op) if (0) goto PREDICT_ID(op)
|
||||
#else
|
||||
#define PREDICT(op) \
|
||||
do { \
|
||||
_Py_CODEUNIT word = *next_instr; \
|
||||
opcode = _Py_OPCODE(word) | cframe.use_tracing OR_DTRACE_LINE; \
|
||||
if (opcode == op) { \
|
||||
oparg = _Py_OPARG(word); \
|
||||
INSTRUCTION_START(op); \
|
||||
goto PREDICT_ID(op); \
|
||||
} \
|
||||
} while(0)
|
||||
#endif
|
||||
#define PREDICTED(op) PREDICT_ID(op):
|
||||
|
||||
|
||||
/* Stack manipulation macros */
|
||||
|
||||
/* The stack can grow at most MAXINT deep, as co_nlocals and
|
||||
co_stacksize are ints. */
|
||||
#define STACK_LEVEL() ((int)(stack_pointer - _PyFrame_Stackbase(frame)))
|
||||
#define STACK_SIZE() (frame->f_code->co_stacksize)
|
||||
#define EMPTY() (STACK_LEVEL() == 0)
|
||||
#define TOP() (stack_pointer[-1])
|
||||
#define SECOND() (stack_pointer[-2])
|
||||
#define THIRD() (stack_pointer[-3])
|
||||
#define FOURTH() (stack_pointer[-4])
|
||||
#define PEEK(n) (stack_pointer[-(n)])
|
||||
#define POKE(n, v) (stack_pointer[-(n)] = (v))
|
||||
#define SET_TOP(v) (stack_pointer[-1] = (v))
|
||||
#define SET_SECOND(v) (stack_pointer[-2] = (v))
|
||||
#define BASIC_STACKADJ(n) (stack_pointer += n)
|
||||
#define BASIC_PUSH(v) (*stack_pointer++ = (v))
|
||||
#define BASIC_POP() (*--stack_pointer)
|
||||
|
||||
#ifdef Py_DEBUG
|
||||
#define PUSH(v) do { \
|
||||
BASIC_PUSH(v); \
|
||||
assert(STACK_LEVEL() <= STACK_SIZE()); \
|
||||
} while (0)
|
||||
#define POP() (assert(STACK_LEVEL() > 0), BASIC_POP())
|
||||
#define STACK_GROW(n) do { \
|
||||
assert(n >= 0); \
|
||||
BASIC_STACKADJ(n); \
|
||||
assert(STACK_LEVEL() <= STACK_SIZE()); \
|
||||
} while (0)
|
||||
#define STACK_SHRINK(n) do { \
|
||||
assert(n >= 0); \
|
||||
assert(STACK_LEVEL() >= n); \
|
||||
BASIC_STACKADJ(-(n)); \
|
||||
} while (0)
|
||||
#else
|
||||
#define PUSH(v) BASIC_PUSH(v)
|
||||
#define POP() BASIC_POP()
|
||||
#define STACK_GROW(n) BASIC_STACKADJ(n)
|
||||
#define STACK_SHRINK(n) BASIC_STACKADJ(-(n))
|
||||
#endif
|
||||
|
||||
/* Local variable macros */
|
||||
|
||||
#define GETLOCAL(i) (frame->localsplus[i])
|
||||
|
||||
/* The SETLOCAL() macro must not DECREF the local variable in-place and
|
||||
then store the new value; it must copy the old value to a temporary
|
||||
value, then store the new value, and then DECREF the temporary value.
|
||||
This is because it is possible that during the DECREF the frame is
|
||||
accessed by other code (e.g. a __del__ method or gc.collect()) and the
|
||||
variable would be pointing to already-freed memory. */
|
||||
#define SETLOCAL(i, value) do { PyObject *tmp = GETLOCAL(i); \
|
||||
GETLOCAL(i) = value; \
|
||||
Py_XDECREF(tmp); } while (0)
|
||||
|
||||
#define GO_TO_INSTRUCTION(op) goto PREDICT_ID(op)
|
||||
|
||||
#ifdef Py_STATS
|
||||
#define UPDATE_MISS_STATS(INSTNAME) \
|
||||
do { \
|
||||
STAT_INC(opcode, miss); \
|
||||
STAT_INC((INSTNAME), miss); \
|
||||
/* The counter is always the first cache entry: */ \
|
||||
if (ADAPTIVE_COUNTER_IS_ZERO(next_instr->cache)) { \
|
||||
STAT_INC((INSTNAME), deopt); \
|
||||
} \
|
||||
else { \
|
||||
/* This is about to be (incorrectly) incremented: */ \
|
||||
STAT_DEC((INSTNAME), deferred); \
|
||||
} \
|
||||
} while (0)
|
||||
#else
|
||||
#define UPDATE_MISS_STATS(INSTNAME) ((void)0)
|
||||
#endif
|
||||
|
||||
#define DEOPT_IF(COND, INSTNAME) \
|
||||
if ((COND)) { \
|
||||
/* This is only a single jump on release builds! */ \
|
||||
UPDATE_MISS_STATS((INSTNAME)); \
|
||||
assert(_PyOpcode_Deopt[opcode] == (INSTNAME)); \
|
||||
GO_TO_INSTRUCTION(INSTNAME); \
|
||||
}
|
||||
|
||||
|
||||
#define GLOBALS() frame->f_globals
|
||||
#define BUILTINS() frame->f_builtins
|
||||
#define LOCALS() frame->f_locals
|
||||
|
||||
/* Shared opcode macros */
|
||||
|
||||
#define TRACE_FUNCTION_EXIT() \
|
||||
if (cframe.use_tracing) { \
|
||||
if (trace_function_exit(tstate, frame, retval)) { \
|
||||
Py_DECREF(retval); \
|
||||
goto exit_unwind; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define DTRACE_FUNCTION_EXIT() \
|
||||
if (PyDTrace_FUNCTION_RETURN_ENABLED()) { \
|
||||
dtrace_function_return(frame); \
|
||||
}
|
||||
|
||||
#define TRACE_FUNCTION_UNWIND() \
|
||||
if (cframe.use_tracing) { \
|
||||
/* Since we are already unwinding, \
|
||||
* we don't care if this raises */ \
|
||||
trace_function_exit(tstate, frame, NULL); \
|
||||
}
|
||||
|
||||
#define TRACE_FUNCTION_ENTRY() \
|
||||
if (cframe.use_tracing) { \
|
||||
_PyFrame_SetStackPointer(frame, stack_pointer); \
|
||||
int err = trace_function_entry(tstate, frame); \
|
||||
stack_pointer = _PyFrame_GetStackPointer(frame); \
|
||||
if (err) { \
|
||||
goto error; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define TRACE_FUNCTION_THROW_ENTRY() \
|
||||
if (cframe.use_tracing) { \
|
||||
assert(frame->stacktop >= 0); \
|
||||
if (trace_function_entry(tstate, frame)) { \
|
||||
goto exit_unwind; \
|
||||
} \
|
||||
}
|
||||
|
||||
#define DTRACE_FUNCTION_ENTRY() \
|
||||
if (PyDTrace_FUNCTION_ENTRY_ENABLED()) { \
|
||||
dtrace_function_entry(frame); \
|
||||
}
|
||||
|
||||
#define ADAPTIVE_COUNTER_IS_ZERO(COUNTER) \
|
||||
(((COUNTER) >> ADAPTIVE_BACKOFF_BITS) == 0)
|
||||
|
||||
#define ADAPTIVE_COUNTER_IS_MAX(COUNTER) \
|
||||
(((COUNTER) >> ADAPTIVE_BACKOFF_BITS) == ((1 << MAX_BACKOFF_VALUE) - 1))
|
||||
|
||||
#define DECREMENT_ADAPTIVE_COUNTER(COUNTER) \
|
||||
do { \
|
||||
assert(!ADAPTIVE_COUNTER_IS_ZERO((COUNTER))); \
|
||||
(COUNTER) -= (1 << ADAPTIVE_BACKOFF_BITS); \
|
||||
} while (0);
|
||||
|
||||
#define INCREMENT_ADAPTIVE_COUNTER(COUNTER) \
|
||||
do { \
|
||||
assert(!ADAPTIVE_COUNTER_IS_MAX((COUNTER))); \
|
||||
(COUNTER) += (1 << ADAPTIVE_BACKOFF_BITS); \
|
||||
} while (0);
|
||||
|
||||
#define NAME_ERROR_MSG "name '%.200s' is not defined"
|
Loading…
Reference in New Issue