diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 282089c0837..6289acdd968 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -30,6 +30,11 @@ typedef struct { uint32_t builtin_keys_version; } _PyLoadGlobalCache; +typedef struct { + /* Borrowed ref in LOAD_METHOD */ + PyObject *obj; +} _PyObjectCache; + /* Add specialized versions of entries to this union. * * Do not break the invariant: sizeof(SpecializedCacheEntry) == 8 @@ -45,6 +50,7 @@ typedef union { _PyAdaptiveEntry adaptive; _PyAttrCache attr; _PyLoadGlobalCache load_global; + _PyObjectCache obj; } SpecializedCacheEntry; #define INSTRUCTIONS_PER_ENTRY (sizeof(SpecializedCacheEntry)/sizeof(_Py_CODEUNIT)) @@ -299,6 +305,7 @@ cache_backoff(_PyAdaptiveEntry *entry) { int _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); int _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); int _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); +int _Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache); int _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, _Py_CODEUNIT *instr); #define PRINT_SPECIALIZATION_STATS 0 diff --git a/Include/opcode.h b/Include/opcode.h index 3334242e7e4..6b0298224ae 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -149,15 +149,19 @@ extern "C" { #define LOAD_GLOBAL_ADAPTIVE 41 #define LOAD_GLOBAL_MODULE 42 #define LOAD_GLOBAL_BUILTIN 43 -#define STORE_ATTR_ADAPTIVE 44 -#define STORE_ATTR_SPLIT_KEYS 45 -#define STORE_ATTR_SLOT 46 -#define STORE_ATTR_WITH_HINT 47 -#define LOAD_FAST__LOAD_FAST 48 -#define STORE_FAST__LOAD_FAST 58 -#define LOAD_FAST__LOAD_CONST 80 -#define LOAD_CONST__LOAD_FAST 81 -#define STORE_FAST__STORE_FAST 87 +#define LOAD_METHOD_ADAPTIVE 44 +#define LOAD_METHOD_CACHED 45 +#define LOAD_METHOD_CLASS 46 +#define LOAD_METHOD_MODULE 47 +#define STORE_ATTR_ADAPTIVE 48 +#define STORE_ATTR_SPLIT_KEYS 58 +#define STORE_ATTR_SLOT 80 +#define STORE_ATTR_WITH_HINT 81 +#define LOAD_FAST__LOAD_FAST 87 +#define STORE_FAST__LOAD_FAST 88 +#define LOAD_FAST__LOAD_CONST 120 +#define LOAD_CONST__LOAD_FAST 122 +#define STORE_FAST__STORE_FAST 123 #ifdef NEED_OPCODE_JUMP_TABLES static uint32_t _PyOpcode_RelativeJump[8] = { 0U, diff --git a/Lib/opcode.py b/Lib/opcode.py index 53cdc4aa0d5..d3a7c8bf16f 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -233,6 +233,10 @@ def jabs_op(name, op): "LOAD_GLOBAL_ADAPTIVE", "LOAD_GLOBAL_MODULE", "LOAD_GLOBAL_BUILTIN", + "LOAD_METHOD_ADAPTIVE", + "LOAD_METHOD_CACHED", + "LOAD_METHOD_CLASS", + "LOAD_METHOD_MODULE", "STORE_ATTR_ADAPTIVE", "STORE_ATTR_SPLIT_KEYS", "STORE_ATTR_SLOT", diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-08-11-20-45-02.bpo-44889.2T3nTn.rst b/Misc/NEWS.d/next/Core and Builtins/2021-08-11-20-45-02.bpo-44889.2T3nTn.rst new file mode 100644 index 00000000000..a50b6851c14 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-08-11-20-45-02.bpo-44889.2T3nTn.rst @@ -0,0 +1,8 @@ +Initial implementation of adaptive specialization of ``LOAD_METHOD``. The +following specialized forms were added: + +* ``LOAD_METHOD_CACHED`` + +* ``LOAD_METHOD_MODULE`` + +* ``LOAD_METHOD_CLASS`` diff --git a/Python/ceval.c b/Python/ceval.c index 48787493fdd..333c54f50e2 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1439,6 +1439,27 @@ eval_frame_handle_pending(PyThreadState *tstate) #define BUILTINS() frame->f_builtins #define LOCALS() frame->f_locals +/* Shared opcode macros */ + +// shared by LOAD_ATTR_MODULE and LOAD_METHOD_MODULE +#define LOAD_MODULE_ATTR_OR_METHOD(attr_or_method) \ + SpecializedCacheEntry *caches = GET_CACHE(); \ + _PyAdaptiveEntry *cache0 = &caches[0].adaptive; \ + _PyAttrCache *cache1 = &caches[-1].attr; \ + DEOPT_IF(!PyModule_CheckExact(owner), LOAD_##attr_or_method); \ + PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner)->md_dict; \ + assert(dict != NULL); \ + DEOPT_IF(dict->ma_keys->dk_version != cache1->dk_version_or_hint, \ + LOAD_##attr_or_method); \ + assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE); \ + assert(cache0->index < dict->ma_keys->dk_nentries); \ + PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + cache0->index; \ + res = ep->me_value; \ + DEOPT_IF(res == NULL, LOAD_##attr_or_method); \ + STAT_INC(LOAD_##attr_or_method, hit); \ + record_cache_hit(cache0); \ + Py_INCREF(res); + static int trace_function_entry(PyThreadState *tstate, InterpreterFrame *frame) { @@ -3511,23 +3532,10 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr TARGET(LOAD_ATTR_MODULE): { assert(cframe.use_tracing == 0); + // shared with LOAD_METHOD_MODULE PyObject *owner = TOP(); PyObject *res; - SpecializedCacheEntry *caches = GET_CACHE(); - _PyAdaptiveEntry *cache0 = &caches[0].adaptive; - _PyAttrCache *cache1 = &caches[-1].attr; - DEOPT_IF(!PyModule_CheckExact(owner), LOAD_ATTR); - PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner)->md_dict; - assert(dict != NULL); - DEOPT_IF(dict->ma_keys->dk_version != cache1->dk_version_or_hint, LOAD_ATTR); - assert(dict->ma_keys->dk_kind == DICT_KEYS_UNICODE); - assert(cache0->index < dict->ma_keys->dk_nentries); - PyDictKeyEntry *ep = DK_ENTRIES(dict->ma_keys) + cache0->index; - res = ep->me_value; - DEOPT_IF(res == NULL, LOAD_ATTR); - STAT_INC(LOAD_ATTR, hit); - record_cache_hit(cache0); - Py_INCREF(res); + LOAD_MODULE_ATTR_OR_METHOD(ATTR); SET_TOP(res); Py_DECREF(owner); DISPATCH(); @@ -4282,6 +4290,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr } TARGET(LOAD_METHOD): { + PREDICTED(LOAD_METHOD); + STAT_INC(LOAD_METHOD, unquickened); /* Designed to work in tandem with CALL_METHOD. */ PyObject *name = GETITEM(names, oparg); PyObject *obj = TOP(); @@ -4318,6 +4328,107 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, InterpreterFrame *frame, int thr DISPATCH(); } + TARGET(LOAD_METHOD_ADAPTIVE): { + assert(cframe.use_tracing == 0); + SpecializedCacheEntry *cache = GET_CACHE(); + if (cache->adaptive.counter == 0) { + PyObject *owner = TOP(); + PyObject *name = GETITEM(names, cache->adaptive.original_oparg); + next_instr--; + if (_Py_Specialize_LoadMethod(owner, next_instr, name, cache) < 0) { + goto error; + } + DISPATCH(); + } + else { + STAT_INC(LOAD_METHOD, deferred); + cache->adaptive.counter--; + oparg = cache->adaptive.original_oparg; + STAT_DEC(LOAD_METHOD, unquickened); + JUMP_TO_INSTRUCTION(LOAD_METHOD); + } + } + + TARGET(LOAD_METHOD_CACHED): { + /* LOAD_METHOD, with cached method object */ + assert(cframe.use_tracing == 0); + PyObject *self = TOP(); + PyTypeObject *self_cls = Py_TYPE(self); + SpecializedCacheEntry *caches = GET_CACHE(); + _PyAdaptiveEntry *cache0 = &caches[0].adaptive; + _PyAttrCache *cache1 = &caches[-1].attr; + _PyObjectCache *cache2 = &caches[-2].obj; + + DEOPT_IF(self_cls->tp_version_tag != cache1->tp_version, LOAD_METHOD); + assert(cache1->dk_version_or_hint != 0); + assert(cache1->tp_version != 0); + assert(self_cls->tp_dictoffset >= 0); + assert(Py_TYPE(self_cls)->tp_dictoffset > 0); + + // inline version of _PyObject_GetDictPtr for offset >= 0 + PyObject *dict = self_cls->tp_dictoffset != 0 ? + *(PyObject **) ((char *)self + self_cls->tp_dictoffset) : NULL; + + // Ensure self.__dict__ didn't modify keys. + // Don't care if self has no dict, it could be builtin or __slots__. + DEOPT_IF(dict != NULL && + ((PyDictObject *)dict)->ma_keys->dk_version != + cache1->dk_version_or_hint, LOAD_METHOD); + + STAT_INC(LOAD_METHOD, hit); + record_cache_hit(cache0); + PyObject *res = cache2->obj; + assert(res != NULL); + assert(_PyType_HasFeature(Py_TYPE(res), Py_TPFLAGS_METHOD_DESCRIPTOR)); + Py_INCREF(res); + SET_TOP(res); + PUSH(self); + DISPATCH(); + } + + TARGET(LOAD_METHOD_MODULE): { + assert(cframe.use_tracing == 0); + PyObject *owner = TOP(); + PyObject *res; + LOAD_MODULE_ATTR_OR_METHOD(METHOD); + SET_TOP(NULL); + Py_DECREF(owner); + PUSH(res); + DISPATCH(); + } + + TARGET(LOAD_METHOD_CLASS): { + /* LOAD_METHOD, for class methods */ + assert(cframe.use_tracing == 0); + SpecializedCacheEntry *caches = GET_CACHE(); + _PyAdaptiveEntry *cache0 = &caches[0].adaptive; + _PyAttrCache *cache1 = &caches[-1].attr; + _PyObjectCache *cache2 = &caches[-2].obj; + + PyObject *cls = TOP(); + PyTypeObject *cls_type = Py_TYPE(cls); + assert(cls_type->tp_dictoffset > 0); + PyObject *dict = *(PyObject **) ((char *)cls + cls_type->tp_dictoffset); + // Don't care if no dict -- tp_version_tag should catch anything wrong. + DEOPT_IF(dict != NULL && ((PyDictObject *)dict)->ma_keys->dk_version != + cache1->dk_version_or_hint, LOAD_METHOD); + DEOPT_IF(((PyTypeObject *)cls)->tp_version_tag != cache1->tp_version, + LOAD_METHOD); + assert(cache1->dk_version_or_hint != 0); + assert(cache1->tp_version != 0); + + STAT_INC(LOAD_METHOD, hit); + record_cache_hit(cache0); + PyObject *res = cache2->obj; + assert(res != NULL); + assert(_PyType_HasFeature(Py_TYPE(res), Py_TPFLAGS_METHOD_DESCRIPTOR)); + Py_INCREF(res); + SET_TOP(NULL); + Py_DECREF(cls); + PUSH(res); + DISPATCH(); + } + TARGET(CALL_METHOD): { /* Designed to work in tamdem with LOAD_METHOD. */ PyObject **sp, *res; @@ -4648,6 +4759,7 @@ opname ## _miss: \ MISS_WITH_CACHE(LOAD_ATTR) MISS_WITH_CACHE(STORE_ATTR) MISS_WITH_CACHE(LOAD_GLOBAL) +MISS_WITH_CACHE(LOAD_METHOD) MISS_WITH_OPARG_COUNTER(BINARY_SUBSCR) binary_subscr_dict_error: diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index c8036a63f22..f97eaf80815 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -43,11 +43,11 @@ static void *opcode_targets[256] = { &&TARGET_LOAD_GLOBAL_ADAPTIVE, &&TARGET_LOAD_GLOBAL_MODULE, &&TARGET_LOAD_GLOBAL_BUILTIN, + &&TARGET_LOAD_METHOD_ADAPTIVE, + &&TARGET_LOAD_METHOD_CACHED, + &&TARGET_LOAD_METHOD_CLASS, + &&TARGET_LOAD_METHOD_MODULE, &&TARGET_STORE_ATTR_ADAPTIVE, - &&TARGET_STORE_ATTR_SPLIT_KEYS, - &&TARGET_STORE_ATTR_SLOT, - &&TARGET_STORE_ATTR_WITH_HINT, - &&TARGET_LOAD_FAST__LOAD_FAST, &&TARGET_WITH_EXCEPT_START, &&TARGET_GET_AITER, &&TARGET_GET_ANEXT, @@ -57,7 +57,7 @@ static void *opcode_targets[256] = { &&TARGET_INPLACE_ADD, &&TARGET_INPLACE_SUBTRACT, &&TARGET_INPLACE_MULTIPLY, - &&TARGET_STORE_FAST__LOAD_FAST, + &&TARGET_STORE_ATTR_SPLIT_KEYS, &&TARGET_INPLACE_MODULO, &&TARGET_STORE_SUBSCR, &&TARGET_DELETE_SUBSCR, @@ -79,15 +79,15 @@ static void *opcode_targets[256] = { &&TARGET_INPLACE_AND, &&TARGET_INPLACE_XOR, &&TARGET_INPLACE_OR, - &&TARGET_LOAD_FAST__LOAD_CONST, - &&TARGET_LOAD_CONST__LOAD_FAST, + &&TARGET_STORE_ATTR_SLOT, + &&TARGET_STORE_ATTR_WITH_HINT, &&TARGET_LIST_TO_TUPLE, &&TARGET_RETURN_VALUE, &&TARGET_IMPORT_STAR, &&TARGET_SETUP_ANNOTATIONS, &&TARGET_YIELD_VALUE, - &&TARGET_STORE_FAST__STORE_FAST, - &&_unknown_opcode, + &&TARGET_LOAD_FAST__LOAD_FAST, + &&TARGET_STORE_FAST__LOAD_FAST, &&TARGET_POP_EXCEPT, &&TARGET_STORE_NAME, &&TARGET_DELETE_NAME, @@ -119,10 +119,10 @@ static void *opcode_targets[256] = { &&TARGET_IS_OP, &&TARGET_CONTAINS_OP, &&TARGET_RERAISE, - &&_unknown_opcode, + &&TARGET_LOAD_FAST__LOAD_CONST, &&TARGET_JUMP_IF_NOT_EXC_MATCH, - &&_unknown_opcode, - &&_unknown_opcode, + &&TARGET_LOAD_CONST__LOAD_FAST, + &&TARGET_STORE_FAST__STORE_FAST, &&TARGET_LOAD_FAST, &&TARGET_STORE_FAST, &&TARGET_DELETE_FAST, diff --git a/Python/specialize.c b/Python/specialize.c index ecab69bcae7..359bec57193 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -121,6 +121,7 @@ _Py_GetSpecializationStats(void) { int err = 0; err += add_stat_dict(stats, LOAD_ATTR, "load_attr"); err += add_stat_dict(stats, LOAD_GLOBAL, "load_global"); + err += add_stat_dict(stats, LOAD_GLOBAL, "load_method"); err += add_stat_dict(stats, BINARY_SUBSCR, "binary_subscr"); err += add_stat_dict(stats, STORE_ATTR, "store_attr"); if (err < 0) { @@ -175,6 +176,7 @@ _Py_PrintSpecializationStats(void) #endif print_stats(out, &_specialization_stats[LOAD_ATTR], "load_attr"); print_stats(out, &_specialization_stats[LOAD_GLOBAL], "load_global"); + print_stats(out, &_specialization_stats[LOAD_METHOD], "load_method"); print_stats(out, &_specialization_stats[BINARY_SUBSCR], "binary_subscr"); print_stats(out, &_specialization_stats[STORE_ATTR], "store_attr"); if (out != stderr) { @@ -223,6 +225,7 @@ get_cache_count(SpecializedCacheOrInstruction *quickened) { static uint8_t adaptive_opcodes[256] = { [LOAD_ATTR] = LOAD_ATTR_ADAPTIVE, [LOAD_GLOBAL] = LOAD_GLOBAL_ADAPTIVE, + [LOAD_METHOD] = LOAD_METHOD_ADAPTIVE, [BINARY_SUBSCR] = BINARY_SUBSCR_ADAPTIVE, [STORE_ATTR] = STORE_ATTR_ADAPTIVE, }; @@ -231,6 +234,7 @@ static uint8_t adaptive_opcodes[256] = { static uint8_t cache_requirements[256] = { [LOAD_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */ [LOAD_GLOBAL] = 2, /* _PyAdaptiveEntry and _PyLoadGlobalCache */ + [LOAD_METHOD] = 3, /* _PyAdaptiveEntry, _PyAttrCache and _PyObjectCache */ [BINARY_SUBSCR] = 0, [STORE_ATTR] = 2, /* _PyAdaptiveEntry and _PyAttrCache */ }; @@ -417,6 +421,15 @@ _Py_Quicken(PyCodeObject *code) { #define SPEC_FAIL_READ_ONLY 15 #define SPEC_FAIL_AUDITED_SLOT 16 +/* Methods */ + +#define SPEC_FAIL_NEGATIVE_DICTOFFSET 14 +#define SPEC_FAIL_IS_ATTR 15 +#define SPEC_FAIL_DICT_SUBCLASS 16 +#define SPEC_FAIL_BUILTIN_CLASS_METHOD 17 +#define SPEC_FAIL_CLASS_METHOD_OBJ 18 +#define SPEC_FAIL_NOT_METHOD 19 + /* Binary subscr */ #define SPEC_FAIL_LIST_NON_INT_SUBSCRIPT 8 @@ -427,7 +440,8 @@ _Py_Quicken(PyCodeObject *code) { static int specialize_module_load_attr( PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, - _PyAdaptiveEntry *cache0, _PyAttrCache *cache1) + _PyAdaptiveEntry *cache0, _PyAttrCache *cache1, int opcode, + int opcode_module) { PyModuleObject *m = (PyModuleObject *)owner; PyObject *value = NULL; @@ -435,39 +449,39 @@ specialize_module_load_attr( _Py_IDENTIFIER(__getattr__); PyDictObject *dict = (PyDictObject *)m->md_dict; if (dict == NULL) { - SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_NO_DICT); + SPECIALIZATION_FAIL(opcode, SPEC_FAIL_NO_DICT); return -1; } if (dict->ma_keys->dk_kind != DICT_KEYS_UNICODE) { - SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_NON_STRING_OR_SPLIT); + SPECIALIZATION_FAIL(opcode, SPEC_FAIL_NON_STRING_OR_SPLIT); return -1; } getattr = _PyUnicode_FromId(&PyId___getattr__); /* borrowed */ if (getattr == NULL) { - SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OVERRIDDEN); + SPECIALIZATION_FAIL(opcode, SPEC_FAIL_OVERRIDDEN); PyErr_Clear(); return -1; } Py_ssize_t index = _PyDict_GetItemHint(dict, getattr, -1, &value); assert(index != DKIX_ERROR); if (index != DKIX_EMPTY) { - SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_MODULE_ATTR_NOT_FOUND); + SPECIALIZATION_FAIL(opcode, SPEC_FAIL_MODULE_ATTR_NOT_FOUND); return -1; } index = _PyDict_GetItemHint(dict, name, -1, &value); assert (index != DKIX_ERROR); if (index != (uint16_t)index) { - SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OUT_OF_RANGE); + SPECIALIZATION_FAIL(opcode, SPEC_FAIL_OUT_OF_RANGE); return -1; } uint32_t keys_version = _PyDictKeys_GetVersionForCurrentState(dict); if (keys_version == 0) { - SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OUT_OF_VERSIONS); + SPECIALIZATION_FAIL(opcode, SPEC_FAIL_OUT_OF_VERSIONS); return -1; } cache1->dk_version_or_hint = keys_version; cache0->index = (uint16_t)index; - *instr = _Py_MAKECODEUNIT(LOAD_ATTR_MODULE, _Py_OPARG(*instr)); + *instr = _Py_MAKECODEUNIT(opcode_module, _Py_OPARG(*instr)); return 0; } @@ -482,6 +496,8 @@ typedef enum { OBJECT_SLOT, /* Is an object slot descriptor */ OTHER_SLOT, /* Is a slot descriptor of another type */ NON_OVERRIDING, /* Is another non-overriding descriptor, and is an instance of an immutable class*/ + BUILTIN_CLASSMETHOD, /* Builtin methods with METH_CLASS */ + PYTHON_CLASSMETHOD, /* Python classmethod(func) object */ NON_DESCRIPTOR, /* Is not a descriptor, and is an instance of an immutable class */ MUTABLE, /* Instance of a mutable class; might, or might not, be a descriptor */ ABSENT, /* Attribute is not present on the class */ @@ -537,6 +553,12 @@ analyze_descriptor(PyTypeObject *type, PyObject *name, PyObject **descr, int sto if (desc_cls->tp_flags & Py_TPFLAGS_METHOD_DESCRIPTOR) { return METHOD; } + if (Py_IS_TYPE(descriptor, &PyClassMethodDescr_Type)) { + return BUILTIN_CLASSMETHOD; + } + if (Py_IS_TYPE(descriptor, &PyClassMethod_Type)) { + return PYTHON_CLASSMETHOD; + } return NON_OVERRIDING; } return NON_DESCRIPTOR; @@ -549,7 +571,8 @@ specialize_dict_access( _PyAdaptiveEntry *cache0, _PyAttrCache *cache1, int base_op, int split_op, int hint_op) { - assert(kind == NON_OVERRIDING || kind == NON_DESCRIPTOR || kind == ABSENT); + assert(kind == NON_OVERRIDING || kind == NON_DESCRIPTOR || kind == ABSENT || + kind == BUILTIN_CLASSMETHOD || kind == PYTHON_CLASSMETHOD); // No desciptor, or non overriding. if (type->tp_dictoffset < 0) { SPECIALIZATION_FAIL(base_op, SPEC_FAIL_OUT_OF_RANGE); @@ -608,6 +631,8 @@ specialize_dict_access( /* No attribute in instance dictionary */ switch(kind) { case NON_OVERRIDING: + case BUILTIN_CLASSMETHOD: + case PYTHON_CLASSMETHOD: SPECIALIZATION_FAIL(base_op, SPEC_FAIL_NON_OVERRIDING_DESCRIPTOR); return 0; case NON_DESCRIPTOR: @@ -628,7 +653,8 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp _PyAdaptiveEntry *cache0 = &cache->adaptive; _PyAttrCache *cache1 = &cache[-1].attr; if (PyModule_CheckExact(owner)) { - int err = specialize_module_load_attr(owner, instr, name, cache0, cache1); + int err = specialize_module_load_attr(owner, instr, name, cache0, cache1, + LOAD_ATTR, LOAD_ATTR_MODULE); if (err) { goto fail; } @@ -690,6 +716,8 @@ _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, Sp case GETSET_OVERRIDDEN: SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OVERRIDDEN); goto fail; + case BUILTIN_CLASSMETHOD: + case PYTHON_CLASSMETHOD: case NON_OVERRIDING: case NON_DESCRIPTOR: case ABSENT: @@ -769,6 +797,8 @@ _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, S case GETSET_OVERRIDDEN: SPECIALIZATION_FAIL(STORE_ATTR, SPEC_FAIL_OVERRIDDEN); goto fail; + case BUILTIN_CLASSMETHOD: + case PYTHON_CLASSMETHOD: case NON_OVERRIDING: case NON_DESCRIPTOR: case ABSENT: @@ -798,6 +828,146 @@ _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, S } +// Please collect stats carefully before and after modifying. A subtle change +// can cause a significant drop in cache hits. A possible test is +// python.exe -m test_typing test_re test_dis test_zlib. +int +_Py_Specialize_LoadMethod(PyObject *owner, _Py_CODEUNIT *instr, PyObject *name, SpecializedCacheEntry *cache) +{ + _PyAdaptiveEntry *cache0 = &cache->adaptive; + _PyAttrCache *cache1 = &cache[-1].attr; + _PyObjectCache *cache2 = &cache[-2].obj; + + PyTypeObject *owner_cls = Py_TYPE(owner); + PyDictObject *owner_dict = NULL; + if (PyModule_CheckExact(owner)) { + int err = specialize_module_load_attr(owner, instr, name, cache0, cache1, + LOAD_METHOD, LOAD_METHOD_MODULE); + if (err) { + goto fail; + } + goto success; + } + if (owner_cls->tp_dict == NULL) { + if (PyType_Ready(owner_cls) < 0) { + return -1; + } + } + if (Py_TYPE(owner_cls)->tp_dictoffset < 0) { + SPECIALIZATION_FAIL(LOAD_METHOD, SPEC_FAIL_OUT_OF_RANGE); + goto fail; + } + // Technically this is fine for bound method calls, but it's uncommon and + // slightly slower at runtime to get dict. + if (owner_cls->tp_dictoffset < 0) { + SPECIALIZATION_FAIL(LOAD_METHOD, SPEC_FAIL_NEGATIVE_DICTOFFSET); + goto fail; + } + PyObject **owner_dictptr = _PyObject_GetDictPtr(owner); + int owner_has_dict = (owner_dictptr != NULL && *owner_dictptr != NULL); + owner_dict = owner_has_dict ? (PyDictObject *)*owner_dictptr : NULL; + // Make sure dict doesn't get GC-ed halfway. + Py_XINCREF(owner_dict); + // Check for classmethods. + int owner_is_class = PyType_Check(owner); + owner_cls = owner_is_class ? (PyTypeObject *)owner : owner_cls; + + if ((owner_cls->tp_flags & Py_TPFLAGS_VALID_VERSION_TAG) == 0 || + owner_cls->tp_version_tag == 0) { + SPECIALIZATION_FAIL(LOAD_METHOD, SPEC_FAIL_OUT_OF_VERSIONS); + goto fail; + } + + PyObject *descr = NULL; + DesciptorClassification kind = 0; + kind = analyze_descriptor(owner_cls, name, &descr, 0); + // Store the version right away, in case it's modified halfway through. + cache1->tp_version = owner_cls->tp_version_tag; + + assert(descr != NULL || kind == ABSENT || kind == GETSET_OVERRIDDEN); + switch (kind) { + case METHOD: + break; + case BUILTIN_CLASSMETHOD: + SPECIALIZATION_FAIL(LOAD_METHOD, SPEC_FAIL_BUILTIN_CLASS_METHOD); + goto fail; + case PYTHON_CLASSMETHOD: + SPECIALIZATION_FAIL(LOAD_METHOD, SPEC_FAIL_CLASS_METHOD_OBJ); + goto fail; + default: + SPECIALIZATION_FAIL(LOAD_METHOD, SPEC_FAIL_NOT_METHOD); + goto fail; + } + + assert(kind == METHOD); + // If o.__dict__ changes, the method might be found in o.__dict__ + // instead of old type lookup. So record o.__dict__'s keys. + uint32_t keys_version = UINT32_MAX; + if (owner_has_dict) { + // _PyDictKeys_GetVersionForCurrentState isn't accurate for + // custom dict subclasses at the moment. + if (!PyDict_CheckExact(owner_dict)) { + SPECIALIZATION_FAIL(LOAD_METHOD, SPEC_FAIL_DICT_SUBCLASS); + goto fail; + } + assert(PyUnicode_CheckExact(name)); + Py_hash_t hash = PyObject_Hash(name); + if (hash == -1) { + return -1; + } + PyObject *value = NULL; + if (!owner_is_class) { + // Instance methods shouldn't be in o.__dict__. That makes + // it an attribute. + Py_ssize_t ix = _Py_dict_lookup(owner_dict, name, hash, &value); + assert(ix != DKIX_ERROR); + if (ix != DKIX_EMPTY) { + SPECIALIZATION_FAIL(LOAD_METHOD, SPEC_FAIL_IS_ATTR); + goto fail; + } + } + keys_version = _PyDictKeys_GetVersionForCurrentState(owner_dict); + if (keys_version == 0) { + SPECIALIZATION_FAIL(LOAD_ATTR, SPEC_FAIL_OUT_OF_VERSIONS); + goto fail; + } + // Fall through. + } // Else owner is maybe a builtin with no dict, or __slots__. Doesn't matter. + + /* `descr` is borrowed. Just check tp_version_tag before accessing in case + * it's deleted. This is safe for methods (even inherited ones from super + * classes!) as long as tp_version_tag is validated for two main reasons: + * + * 1. The class will always hold a reference to the method so it will + * usually not be GC-ed. Should it be deleted in Python, e.g. + * `del obj.meth`, tp_version_tag will be invalidated, because of reason 2. + * + * 2. The pre-existing type method cache (MCACHE) uses the same principles + * of caching a borrowed descriptor. It does all the heavy lifting for us. + * E.g. it invalidates on any MRO modification, on any type object + * change along said MRO, etc. (see PyType_Modified usages in typeobject.c). + * The type method cache has been working since Python 2.6 and it's + * battle-tested. + */ + cache2->obj = descr; + cache1->dk_version_or_hint = keys_version; + *instr = _Py_MAKECODEUNIT(owner_is_class ? LOAD_METHOD_CLASS : + LOAD_METHOD_CACHED, _Py_OPARG(*instr)); + // Fall through. +success: + Py_XDECREF(owner_dict); + STAT_INC(LOAD_METHOD, specialization_success); + assert(!PyErr_Occurred()); + cache0->counter = saturating_start(); + return 0; +fail: + Py_XDECREF(owner_dict); + STAT_INC(LOAD_METHOD, specialization_failure); + assert(!PyErr_Occurred()); + cache_backoff(cache0); + return 0; + +} int _Py_Specialize_LoadGlobal( PyObject *globals, PyObject *builtins,